Index: doc/theses/aaron/comp_II/.gitignore
===================================================================
--- doc/theses/aaron/comp_II/.gitignore	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,14 +1,0 @@
-# generated by latex
-*.aux
-*.bbl
-*.blg
-*.brf
-*.dvi
-*.idx
-*.ilg
-*.ind
-*.log
-*.out
-*.pdf
-*.ps
-*.toc
Index: doc/theses/aaron/comp_II/Makefile
===================================================================
--- doc/theses/aaron/comp_II/Makefile	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,78 +1,0 @@
-## Define the appropriate configuration variables.
-
-TeXLIB = .:../LaTeXmacros:../LaTeXmacros/listings:../LaTeXmacros/enumitem:../bibliography/:
-LaTeX  = TEXINPUTS=${TeXLIB} && export TEXINPUTS && latex -halt-on-error
-BibTeX = BIBINPUTS=${TeXLIB} && export BIBINPUTS && bibtex
-
-## Define the text source files.
-
-SOURCES = ${addsuffix .tex, \
-comp_II \
-}
-
-FIGURES = ${addsuffix .tex, \
-}
-
-PICTURES = ${addsuffix .pstex, \
-}
-
-PROGRAMS = ${addsuffix .tex, \
-}
-
-GRAPHS = ${addsuffix .tex, \
-}
-
-## Define the documents that need to be made.
-
-DOCUMENT = comp_II.pdf
-
-# Directives #
-
-all : ${DOCUMENT}
-
-clean :
-	rm -f *.bbl *.aux *.dvi *.idx *.ilg *.ind *.brf *.out *.log *.toc *.blg *.pstex_t *.cf \
-		${FIGURES} ${PICTURES} ${PROGRAMS} ${GRAPHS} ${basename ${DOCUMENT}}.ps ${DOCUMENT}
-
-# File Dependencies #
-
-${DOCUMENT} : ${basename ${DOCUMENT}}.ps
-	ps2pdf $<
-
-${basename ${DOCUMENT}}.ps : ${basename ${DOCUMENT}}.dvi
-	dvips $< -o $@
-
-${basename ${DOCUMENT}}.dvi : Makefile ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} ${basename ${DOCUMENT}}.tex \
-		../LaTeXmacros/common.tex ../LaTeXmacros/indexstyle ../bibliography/cfa.bib
-	# Conditionally create an empty *.ind (index) file for inclusion until makeindex is run.
-	if [ ! -r ${basename $@}.ind ] ; then touch ${basename $@}.ind ; fi
-	# Must have *.aux file containing citations for bibtex
-	if [ ! -r ${basename $@}.aux ] ; then ${LaTeX} ${basename $@}.tex ; fi
-	-${BibTeX} ${basename $@}
-	# Some citations reference others so run steps again to resolve these citations
-	${LaTeX} ${basename $@}.tex
-	-${BibTeX} ${basename $@}
-	# Make index from *.aux entries and input index at end of document
-	makeindex -s ../LaTeXmacros/indexstyle ${basename $@}.idx
-	${LaTeX} ${basename $@}.tex
-	# Run again to get index title into table of contents
-	${LaTeX} ${basename $@}.tex
-
-predefined :
-	sed -f predefined.sed ${basename ${DOCUMENT}}.tex > ${basename $@}.cf
-
-## Define the default recipes.
-
-%.tex : %.fig
-	fig2dev -L eepic $< > $@
-
-%.ps : %.fig
-	fig2dev -L ps $< > $@
-
-%.pstex : %.fig
-	fig2dev -L pstex $< > $@
-	fig2dev -L pstex_t -p $@ $< > $@_t
-
-# Local Variables: #
-# compile-command: "make" #
-# End: #
Index: doc/theses/aaron/comp_II/comp_II.tex
===================================================================
--- doc/theses/aaron/comp_II/comp_II.tex	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,636 +1,0 @@
-% inline code ©...© (copyright symbol) emacs: C-q M-)
-% red highlighting ®...® (registered trademark symbol) emacs: C-q M-.
-% blue highlighting ß...ß (sharp s symbol) emacs: C-q M-_
-% green highlighting ¢...¢ (cent symbol) emacs: C-q M-"
-% LaTex escape §...§ (section symbol) emacs: C-q M-'
-% keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^
-% math escape $...$ (dollar symbol)
-
-\documentclass[twoside,11pt]{article}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-% Latex packages used in the document (copied from CFA user manual).
-\usepackage[T1]{fontenc}                                % allow Latin1 (extended ASCII) characters
-\usepackage{textcomp}
-\usepackage[latin1]{inputenc}
-\usepackage{fullpage,times,comment}
-\usepackage{epic,eepic}
-\usepackage{upquote}									% switch curled `'" to straight
-\usepackage{calc}
-\usepackage{xspace}
-\usepackage{graphicx}
-\usepackage{varioref}									% extended references
-\usepackage{listings}									% format program code
-\usepackage[flushmargin]{footmisc}						% support label/reference in footnote
-\usepackage{latexsym}                                   % \Box glyph
-\usepackage{mathptmx}                                   % better math font with "times"
-\usepackage[usenames]{color}
-\usepackage[pagewise]{lineno}
-\renewcommand{\linenumberfont}{\scriptsize\sffamily}
-\input{common}                                          % bespoke macros used in the document
-\usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref}
-\usepackage{breakurl}
-\renewcommand{\UrlFont}{\small\sf}
-
-\setlength{\topmargin}{-0.45in}							% move running title into header
-\setlength{\headsep}{0.25in}
-
-\usepackage{caption}
-\usepackage{subcaption}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\newsavebox{\LstBox}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\title{
-\Huge \vspace*{1in} Efficient Type Resolution in \CFA \\
-\huge \vspace*{0.25in} PhD Comprehensive II Research Proposal
-\vspace*{1in}
-}
-
-\author{
-\huge Aaron Moss \\
-\Large \vspace*{0.1in} \texttt{a3moss@uwaterloo.ca} \\
-\Large Cheriton School of Computer Science \\
-\Large University of Waterloo
-}
-
-\date{
-\today
-}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\newcommand{\bigO}[1]{O\!\left( #1 \right)}
-
-\begin{document}
-\pagestyle{headings}
-% changed after setting pagestyle
-\renewcommand{\sectionmark}[1]{\markboth{\thesection\quad #1}{\thesection\quad #1}}
-\renewcommand{\subsectionmark}[1]{\markboth{\thesubsection\quad #1}{\thesubsection\quad #1}}
-\pagenumbering{roman}
-\linenumbers                                            % comment out to turn off line numbering
-
-\maketitle
-\thispagestyle{empty}
-
-\clearpage
-\thispagestyle{plain}
-\pdfbookmark[1]{Contents}{section}
-\tableofcontents
-
-\clearpage
-\thispagestyle{plain}
-\pagenumbering{arabic}
-
-\section{Introduction}
-
-\CFA\footnote{Pronounced ``C-for-all'', and written \CFA or \CFL.} is an evolutionary modernization of the C programming language currently being designed and built at the University of Waterloo by a team led by Peter Buhr. 
-\CFA both fixes existing design problems and adds multiple new features to C, including name overloading, user-defined operators, parametric-polymorphic routines, and type constructors and destructors, among others. 
-The new features make \CFA more powerful and expressive than C, but impose a compile-time cost, particularly in the expression resolver, which must evaluate the typing rules of a significantly more complex type-system.
-
-The primary goal of this research project is to develop a sufficiently performant expression resolution algorithm, experimentally validate its performance, and integrate it into CFA, the \CFA reference compiler.
-Secondary goals of this project include the development of various new language features for \CFA: parametric-polymorphic (``generic'') types have already been designed and implemented, and reference types and user-defined conversions are under design consideration. 
-An experimental performance-testing architecture for resolution algorithms is under development to determine the relative performance of different expression resolution algorithms, as well as the compile-time cost of adding various new features to the \CFA type-system. 
-More broadly, this research should provide valuable data for implementers of compilers for other programming languages with similarly powerful static type-systems.
-
-\section{\CFA}
-
-To make the scope of the proposed expression resolution problem more explicit, it is necessary to define the features of both C and \CFA (both current and proposed) that affect this algorithm. 
-In some cases the interactions of multiple features make expression resolution a significantly more complex problem than any individual feature would; in other cases a feature that does not by itself add any complexity to expression resolution triggers previously rare edge cases more frequently.
-
-It is important to note that \CFA is not an object-oriented language.
-\CFA does have a system of (possibly implicit) type conversions derived from C's type conversions; while these conversions may be thought of as something like an inheritance hierarchy, the underlying semantics are significantly different and such an analogy is loose at best. 
-Particularly, \CFA has no concept of ``subclass'', and thus no need to integrate an inheritance-based form of polymorphism with its parametric and overloading-based polymorphism. 
-The graph structure of the \CFA type conversions is also markedly different than an inheritance graph; it has neither a top nor a bottom type, and does not satisfy the lattice properties typical of inheritance graphs.
-
-\subsection{Polymorphic Functions}
-The most significant feature \CFA adds is parametric-polymorphic functions. 
-Such functions are written using a ©forall© clause (which gives the language its name):
-\begin{lstlisting}
-®forall(otype T)®
-T identity(T x) {
-    return x;
-}
-
-int forty_two = identity(42); // T is bound to int, forty_two == 42
-\end{lstlisting}
-The ©identity© function above can be applied to any complete object type (or ``©otype©''). 
-The type variable ©T© is transformed into a set of additional implicit parameters to ©identity©, which encode sufficient information about ©T© to create and return a variable of that type. 
-The current \CFA implementation passes the size and alignment of the type represented by an ©otype© parameter, as well as an assignment operator, constructor, copy constructor and destructor. 
-Here, the runtime cost of polymorphism is spread over each polymorphic call, due to passing more arguments to polymorphic functions; preliminary experiments have shown this overhead to be similar to \CC virtual function calls. 
-Determining if packaging all polymorphic arguments to a function into a virtual function table would reduce the runtime overhead of polymorphic calls is an open research question. 
-
-Since bare polymorphic types do not provide a great range of available operations, \CFA provides a \emph{type assertion} mechanism to provide further information about a type:
-\begin{lstlisting}
-forall(otype T ®| { T twice(T); }®)
-T four_times(T x) {
-    return twice( twice(x) );
-}
-
-double twice(double d) { return d * 2.0; } // (1)
-
-double magic = four_times(10.5); // T is bound to double, uses (1) to satisfy type assertion
-\end{lstlisting}
-These type assertions may be either variable or function declarations that depend on a polymorphic type variable. 
-©four_times© can only be called with an argument for which there exists a function named ©twice© that can take that argument and return another value of the same type; a pointer to the appropriate ©twice© function is passed as an additional implicit parameter to the call of ©four_times©.
-
-Monomorphic specializations of polymorphic functions can themselves be used to satisfy type assertions. 
-For instance, ©twice© could have been defined using the \CFA syntax for operator overloading as:
-\begin{lstlisting}
-forall(otype S | { ®S ?+?(S, S);® })
-S twice(S x) { return x + x; }  // (2)
-\end{lstlisting} 
-This version of ©twice© works for any type ©S© that has an addition operator defined for it, and it could have been used to satisfy the type assertion on ©four_times©. 
-The compiler accomplishes this by creating a wrapper function calling ©twice // (2)© with ©S© bound to ©double©, then providing this wrapper function to ©four_times©\footnote{©twice // (2)© could also have had a type parameter named ©T©; \CFA specifies renaming of the type parameters, which would avoid the name conflict with the type variable ©T© of ©four_times©.}. 
-
-Finding appropriate functions to satisfy type assertions is essentially a recursive case of expression resolution, as it takes a name (that of the type assertion) and attempts to match it to a suitable declaration \emph{in the current scope}. 
-If a polymorphic function can be used to satisfy one of its own type assertions, this recursion may not terminate, as it is possible that function is examined as a candidate for its own type assertion unboundedly repeatedly. 
-To avoid infinite loops, the current CFA compiler imposes a fixed limit on the possible depth of recursion, similar to that employed by most \CC compilers for template expansion; this restriction means that there are some semantically well-typed expressions that cannot be resolved by CFA. 
-One area of potential improvement this project proposes to investigate is the possibility of using the compiler's knowledge of the current set of declarations to more precicely determine when further type assertion satisfaction recursion does not produce a well-typed expression.
-
-\subsubsection{Traits}
-\CFA provides \emph{traits} as a means to name a group of type assertions, as in the example below:
-\begin{lstlisting}
-®trait has_magnitude(otype T)® {
-    bool ?<?(T, T);        // comparison operator for T
-    T -?(T);               // negation operator for T
-    void ?{}(T*, zero_t);  // constructor from 0 literal
-};
-
-forall(otype M | has_magnitude(M))
-M abs( M m ) {
-    M zero = 0;  // uses zero_t constructor from trait
-    return m < zero ? -m : m;
-}
-
-forall(otype M | has_magnitude(M))
-M max_magnitude( M a, M b ) {
-    return abs(a) < abs(b) ? b : a; 
-}
-\end{lstlisting}
-
-Semantically, traits are simply a named lists of type assertions, but they may be used for many of the same purposes that interfaces in Java or abstract base classes in \CC are used for.
-Unlike Java interfaces or \CC base classes, \CFA types do not explicitly state any inheritance relationship to traits they satisfy; this can be considered a form of structural inheritance, similar to implementation of an interface in Go, as opposed to the nominal inheritance model of Java and \CC. 
-Nominal inheritance can be simulated with traits using marker variables or functions:
-\begin{lstlisting}
-trait nominal(otype T) {
-    ®T is_nominal;®
-};
-
-int is_nominal;  // int now satisfies the nominal trait
-{
-    char is_nominal; // char satisfies the nominal trait
-}
-// char no longer satisfies the nominal trait here  
-\end{lstlisting}
-
-Traits, however, are significantly more powerful than nominal-inheritance interfaces; firstly, due to the scoping rules of the declarations that satisfy a trait's type assertions, a type may not satisfy a trait everywhere that the type is declared, as with ©char© and the ©nominal© trait above. 
-Secondly, traits may be used to declare a relationship among multiple types, a property that may be difficult or impossible to represent in nominal-inheritance type systems:
-\begin{lstlisting}
-trait pointer_like(®otype Ptr, otype El®) {
-    lvalue El *?(Ptr); // Ptr can be dereferenced into a modifiable value of type El
-}
-
-struct list {
-    int value;
-    list *next;  // may omit "struct" on type names
-};
-
-typedef list *list_iterator;
-
-lvalue int *?( list_iterator it ) {
-    return it->value;
-}
-\end{lstlisting}
-
-In the example above, ©(list_iterator, int)© satisfies ©pointer_like© by the user-defined dereference function, and ©(list_iterator, list)© also satisfies ©pointer_like© by the built-in dereference operator for pointers. 
-Given a declaration ©list_iterator it©, ©*it© can be either an ©int© or a ©list©, with the meaning disambiguated by context (\eg ©int x = *it;© interprets ©*it© as an ©int©, while ©(*it).value = 42;© interprets ©*it© as a ©list©).
-While a nominal-inheritance system with associated types could model one of those two relationships by making ©El© an associated type of ©Ptr© in the ©pointer_like© implementation, few such systems could model both relationships simultaneously.
-
-The flexibility of \CFA's implicit trait-satisfaction mechanism provides programmers with a great deal of power, but also blocks some optimization approaches for expression resolution. 
-The ability of types to begin to or cease to satisfy traits when declarations go into or out of scope makes caching of trait satisfaction judgements difficult, and the ability of traits to take multiple type parameters can lead to a combinatorial explosion of work in any attempt to pre-compute trait satisfaction relationships. 
-On the other hand, the addition of a nominal inheritance mechanism to \CFA's type system or replacement of \CFA's trait satisfaction system with a more object-oriented inheritance model and investigation of possible expression resolution optimizations for such a system may be an interesting avenue of further research.
-
-\subsection{Name Overloading}
-In C, no more than one variable or function in the same scope may share the same name\footnote{Technically, C has multiple separated namespaces, one holding ©struct©, ©union©, and ©enum© tags, one holding labels, one holding typedef names, variable, function, and enumerator identifiers, and one for each ©struct© or ©union© type holding the field names.}, and variable or function declarations in inner scopes with the same name as a declaration in an outer scope hide the outer declaration. 
-This restriction makes finding the proper declaration to match to a variable expression or function application a simple matter of symbol-table lookup, which can be easily and efficiently implemented. 
-\CFA, on the other hand, allows overloading of variable and function names, so long as the overloaded declarations do not have the same type, avoiding the multiplication of variable and function names for different types common in the C standard library, as in the following example:
-\begin{lstlisting}
-#include <limits.h>
-
-int max(int a, int b) { return a < b ? b : a; }  // (1)
-double max(double a, double b) { return a < b ? b : a; }  // (2)
-
-int max = INT_MAX;     // (3)
-double max = DBL_MAX;  // (4)
-
-max(7, -max);   // uses (1) and (3), by matching int type of the constant 7 
-max(max, 3.14); // uses (2) and (4), by matching double type of the constant 3.14
-
-max(max, -max);  // ERROR: ambiguous
-int m = max(max, -max); // uses (1) once and (3) twice, by matching return type
-\end{lstlisting}
-
-The presence of name overloading in \CFA means that simple table lookup is insufficient to match identifiers to declarations, and a type matching algorithm must be part of expression resolution.
-
-\subsection{Implicit Conversions}
-In addition to the multiple interpretations of an expression produced by name overloading, \CFA must support all of the implicit conversions present in C for backward compatibility, producing further candidate interpretations for expressions. 
-C does not have a inheritance hierarchy of types, but the C standard's rules for the ``usual arithmetic conversions'' define which of the built-in types are implicitly convertable to which other types, and the relative cost of any pair of such conversions from a single source type. 
-\CFA adds to the usual arithmetic conversions rules defining the cost of binding a polymorphic type variable in a function call; such bindings are cheaper than any \emph{unsafe} (narrowing) conversion, \eg ©int© to ©char©, but more expensive than any \emph{safe} (widening) conversion, \eg ©int© to ©double©. 
-
-The expression resolution problem, then, is to find the unique minimal-cost interpretation of each expression in the program, where all identifiers must be matched to a declaration, and implicit conversions or polymorphic bindings of the result of an expression may increase the cost of the expression. 
-Note that which subexpression interpretation is minimal-cost may require contextual information to disambiguate. 
-For instance, in the example in the previous subsection, ©max(max, -max)© cannot be unambiguously resolved, but ©int m = max(max, -max)© has a single minimal-cost resolution. 
-While the interpretation ©int m = (int)max((double)max, -(double)max)© is also a valid interpretation, it is not minimal-cost due to the unsafe cast from the ©double© result of ©max© to ©int© (the two ©double© casts function as type ascriptions selecting ©double max© rather than casts from ©int max© to ©double©, and as such are zero-cost).
-
-\subsubsection{User-generated Implicit Conversions}
-One possible additional feature to \CFA included in this research proposal is \emph{user-generated implicit conversions}. 
-Such a conversion system should be simple for programmers to utilize, and fit naturally with the existing design of implicit conversions in C; ideally it would also be sufficiently powerful to encode C's usual arithmetic conversions itself, so that \CFA only has one set of rules for conversions. 
-
-Ditchfield~\cite{Ditchfield:conversions} laid out a framework for using polymorphic-conversion-constructor functions to create a directed acyclic graph (DAG) of conversions. 
-A monomorphic variant of these functions can be used to mark a conversion arc in the DAG as only usable as the final step in a conversion. 
-With these two types of conversion arcs, separate DAGs can be created for the safe and the unsafe conversions, and conversion cost can be represented the length of the shortest path through the DAG from one type to another. 
-\begin{figure}[h]
-\centering
-\includegraphics{conversion_dag}
-\caption{A portion of the implicit conversion DAG for built-in types.}\label{fig:conv_dag}
-\end{figure}
-As can be seen in Figure~\ref{fig:conv_dag}, there are either safe or unsafe paths between each of the arithmetic types listed; the ``final'' arcs are important both to avoid creating cycles in the signed-unsigned conversions, and to disambiguate potential diamond conversions (\eg, if the ©int© to ©unsigned int© conversion was not marked final there would be two length-two paths from ©int© to ©unsigned long©, making it impossible to choose which one; however, since the ©unsigned int© to ©unsigned long© arc can not be traversed after the final ©int© to ©unsigned int© arc, there is a single unambiguous conversion path from ©int© to ©unsigned long©).
-
-Open research questions on this topic include:
-\begin{itemize}
-\item Can a conversion graph be generated that represents each allowable conversion in C with a unique minimal-length path such that the path lengths accurately represent the relative costs of the conversions?
-\item Can such a graph representation be usefully augmented to include user-defined types as well as built-in types?
-\item Can the graph be efficiently represented and used in the expression resolver?
-\end{itemize}
-
-\subsection{Constructors and Destructors}
-Rob Shluntz, a current member of the \CFA research team, has added constructors and destructors to \CFA. 
-Each type has an overridable default-generated zero-argument constructor, copy constructor, assignment operator, and destructor.
-For ©struct© types these functions each call their equivalents on each field of the ©struct©. 
-This feature affects expression resolution because an ©otype© type variable ©T© implicitly adds four type assertions, one for each of these four functions, so assertion resolution is pervasive in \CFA polymorphic functions, even those without any explicit type assertions. 
-The following example shows the implicitly-generated code in green:
-\begin{lstlisting}
-struct kv {
-    int key;
-    char *value;
-};
-
-¢void ?{}(kv *this) {  // default constructor
-    ?{}(&(this->key));  // call recursively on members
-    ?{}(&(this->value));
-}
-void ?{}(kv *this, kv that) {  // copy constructor
-    ?{}(&(this->key), that.key);
-    ?{}(&(this->value), that.value);
-}
-kv ?=?(kv *this, kv that) {  // assignment operator
-    ?=?(&(this->key), that.key);
-    ?=?(&(this->value), that.value);
-    return *this;
-}
-void ^?{}(kv *this) {  // destructor
-    ^?{}(&(this->key));
-    ^?{}(&(this->value));
-}¢
-
-forall(otype T ¢| { void ?{}(T*); void ?{}(T*, T); T ?=?(T*, T); void ^?{}(T*); }¢)
-void foo(T);
-\end{lstlisting}
-
-\subsection{Generic Types}
-I have already added a generic type capability to \CFA, designed to efficiently and naturally integrate with \CFA's existing polymorphic functions. 
-A generic type can be declared by placing a ©forall© specifier on a ©struct© or ©union© declaration, and instantiated using a parenthesized list of types after the type name:
-\begin{lstlisting}
-forall(otype R, otype S) struct pair {
-    R first;
-    S second;
-};
-
-forall(otype T)
-T value( pair(const char*, T) *p ) { return p->second; }
-
-pair(const char*, int) p = { "magic", 42 };
-int magic = value( &p );
-\end{lstlisting}
-For \emph{concrete} generic types, that is, those where none of the type parameters depend on polymorphic type variables (like ©pair(const char*, int)© above), the struct is essentially template expanded to a new struct type; for \emph{polymorphic} generic types (such as ©pair(const char*, T)© above), member access is handled by a runtime calculation of the field offset, based on the size and alignment information of the polymorphic parameter type. 
-The default-generated constructors, destructor and assignment operator for a generic type are polymorphic functions with the same list of type parameters as the generic type definition.
-
-Aside from giving users the ability to create more parameterized types than just the built-in pointer, array and function types, the combination of generic types with polymorphic functions and implicit conversions makes the edge case where the resolver may enter an infinite loop much more common, as in the following code example: 
-\begin{lstlisting}
-forall(otype T) struct box { T x; };
-
-void f(void*); // (1)
-
-forall(otype S)
-void f(box(S)* b) { // (2)
-	f(®(void*)0®);
-}
-\end{lstlisting}
-
-The loop in the resolver happens as follows:
-\begin{itemize} 
-\item Since there is an implicit conversion from ©void*© to any pointer type, the highlighted expression can be interpreted as either a ©void*©, matching ©f // (1)©, or a ©box(S)*© for some type ©S©, matching ©f // (2)©.
-\item To determine the cost of the ©box(S)© interpretation, a type must be found for ©S© that satisfies the ©otype© implicit type assertions (assignment operator, default and copy constructors, and destructor); one option is ©box(S2)© for some type ©S2©.
-\item The assignment operator, default and copy constructors, and destructor of ©box(T)© are also polymorphic functions, each of which require the type parameter ©T© to have an assignment operator, default and copy constructors, and destructor. When choosing an interpretation for ©S2©, one option is ©box(S3)©, for some type ©S3©.
-\item The previous step repeats until stopped, with four times as much work performed at each step.
-\end{itemize}
-This problem can occur in any resolution context where a polymorphic function can satisfy its own type assertions is required for a possible interpretation of an expression with no constraints on its type, and is thus not limited to combinations of generic types with ©void*© conversions.
-However, constructors for generic types often satisfy their own assertions and a polymorphic conversion such as the ©void*© conversion to a polymorphic variable is a common way to create an expression with no constraints on its type. 
-As discussed above, the \CFA expression resolver must handle this possible infinite recursion somehow, and it occurs fairly naturally in code like the above that uses generic types. 
-
-\subsection{Tuple Types}
-\CFA adds \emph{tuple types} to C, a syntactic facility for referring to lists of values anonymously or with a single identifier. 
-An identifier may name a tuple, and a function may return one. 
-Particularly relevantly for resolution, a tuple may be implicitly \emph{destructured} into a list of values, as in the call to ©swap©:
-\begin{lstlisting}
-[char, char] x = [ '!', '?' ];  // (1)
-int x = 42;  // (2)
-
-forall(otype T) [T, T] swap( T a, T b ) { return [b, a]; }  // (3)
-
-x = swap( x ); // destructure [char, char] x into two elements of parameter list
-// cannot use int x for parameter, not enough arguments to swap
-
-void swap( int, char, char ); // (4)
-
-swap( x, x ); // resolved as (4) on (2) and (1)
-// (3) on (2) and (2) is close, but the polymorphic binding makes it not minimal-cost
-\end{lstlisting}
-Tuple destructuring means that the mapping from the position of a subexpression in the argument list to the position of a paramter in the function declaration is not straightforward, as some arguments may be expandable to different numbers of parameters, like ©x© above. 
-In the second example, the second ©x© argument can be resolved starting at the second or third parameter of ©swap©, depending which interpretation of ©x© was chosen for the first argument.
-
-\subsection{Reference Types}
-I have been designing \emph{reference types} for \CFA, in collaboration with the rest of the \CFA research team. 
-Given some type ©T©, a ©T&© (``reference to ©T©'') is essentially an automatically dereferenced pointer; with these semantics most of the C standard's discussions of lvalues can be expressed in terms of references instead, with the benefit of being able to express the difference between the reference and non-reference version of a type in user code. 
-References preserve C's existing qualifier-dropping lvalue-to-rvalue conversion (\eg a ©const volatile int&© can be implicitly converted to a bare ©int©).
-The reference proposal also adds a rvalue-to-lvalue conversion to \CFA, implemented by storing the value in a new compiler-generated temporary and passing a reference to the temporary. 
-These two conversions can chain, producing a qualifier-dropping conversion for references, for instance converting a reference to a ©const int© into a reference to a non-©const int© by copying the originally refered to value into a fresh temporary and taking a reference to this temporary, as in:
-\begin{lstlisting} 
-const int magic = 42;
-
-void inc_print( int& x ) { printf("%d\n", ++x); }
-
-print_inc( magic ); // legal; implicitly generated code in green below:
-
-¢int tmp = magic;¢ // to safely strip const-qualifier
-¢print_inc( tmp );¢ // tmp is incremented, magic is unchanged
-\end{lstlisting}
-These reference conversions may also chain with the other implicit type-conversions. 
-The main implication of the reference conversions for expression resolution is the multiplication of available implicit conversions, though given the restricted context reference conversions may be able to be treated efficiently as a special case of implicit conversions.
-
-\subsection{Special Literal Types}
-Another proposal currently under consideration for the \CFA type-system is assigning special types to the literal values ©0© and ©1©. 
-Implicit conversions from these types allow ©0© and ©1© to be considered as values of many different types, depending on context, allowing expression desugarings like ©if ( x ) {}© $\Rightarrow$ ©if ( x != 0 ) {}© to be implemented efficiently and precisely. 
-This approach is a generalization of C's existing behaviour of treating ©0© as either an integer zero or a null pointer constant, and treating either of those values as boolean false. 
-The main implication for expression resolution is that the frequently encountered expressions ©0© and ©1© may have a large number of valid interpretations.
-
-\subsection{Deleted Function Declarations}
-One final proposal for \CFA with an impact on the expression resolver is \emph{deleted function declarations}; in \CCeleven, a function declaration can be deleted as below:
-\begin{lstlisting}
-int somefn(char) = delete;
-\end{lstlisting}
-This feature is typically used in \CCeleven to make a type non-copyable by deleting its copy constructor and assignment operator\footnote{In previous versions of \CC a type could be made non-copyable by declaring a private copy constructor and assignment operator, but not defining either. This idiom is well-known, but depends on some rather subtle and \CC-specific rules about private members and implicitly-generated functions; the deleted-function form is both clearer and less verbose.}, or forbidding some interpretations of a polymorphic function by specifically deleting the forbidden overloads\footnote{Specific polymorphic function overloads can also be forbidden in previous \CC versions through use of template metaprogramming techniques, though this advanced usage is beyond the skills of many programmers. A similar effect can be produced on an ad-hoc basis at the appropriate call sites through use of casts to determine the function type. In both cases, the deleted-function form is clearer and more concise.}. 
-To add a similar feature to \CFA involves including the deleted function declarations in expression resolution along with the normal declarations, but producing a compiler error if the deleted function is the best resolution. 
-How conflicts should be handled between resolution of an expression to both a deleted and a non-deleted function is a small but open research question.
-
-\section{Expression Resolution}
-\subsection{Analysis}
-The expression resolution problem is determining an optimal match between some combination of argument interpretations and the parameter list of some overloaded instance of a function; the argument interpretations are produced by recursive invocations of expression resolution, where the base case is zero-argument functions (which are, for purposes of this discussion, semantically equivalent to named variables or constant literal expressions). 
-Assuming that the matching between a function's parameter list and a combination of argument interpretations can be done in $\bigO{p^k}$ time, where $p$ is the number of parameters and $k$ is some positive number, if there are $\bigO{i}$ valid interpretations for each subexpression, there will be $\bigO{i}$ candidate functions and $\bigO{i^p}$ possible argument combinations for each expression, so for a single recursive call expression resolution takes $\bigO{i^{p+1} \cdot p^k}$ time if it must compare all combinations, or $\bigO{i(p+1) \cdot p^k}$ time if argument-parameter matches can be chosen independently of each other. 
-Given these bounds, resolution of a single top-level expression tree of depth $d$ takes $\bigO{i^{p+1} \cdot p^{k \cdot d}}$ time under full-combination matching, or $\bigO{i(p+1) \cdot p^{k \cdot d}}$ time for independent-parameter matching\footnote{A call tree has leaves at depth $\bigO{d}$, and each internal node has $\bigO{p}$ fan-out, producing $\bigO{p^d}$ total recursive calls.}.
-
-Expression resolution is somewhat unavoidably exponential in $d$, the depth of the expression tree, and if arguments cannot be matched to parameters independently of each other, expression resolution is also exponential in $p$. 
-However, both $d$ and $p$ are fixed by the programmer, and generally bounded by reasonably small constants. 
-$k$, on the other hand, is mostly dependent on the representation of types in the system and the efficiency of type assertion checking; if a candidate argument combination can be compared to a function parameter list in linear time in the length of the list (\ie $k = 1$), then the $p^{k \cdot d}$ factor is linear in the input size of the source code for the expression, otherwise the resolution algorithm exibits sub-linear performance scaling on code containing more-deeply nested expressions.
-The number of valid interpretations of any subexpression, $i$, is bounded by the number of types in the system, which is possibly infinite, though practical resolution algorithms for \CFA must be able to place some finite bound on $i$, possibly at the expense of type-system completeness. 
-
-\subsection{Expression Costs}
-The expression resolution problem involves minimization of a cost function; loosely defined, this cost function is the number of implicit conversions in the top-level expression interpretation. 
-With more specificity, the \emph{cost} of a particular expression interpretation is a lexicographically-ordered tuple, where each element of the tuple corresponds to a particular kind of conversion. 
-In \CFA today, cost is a three-tuple including the number of unsafe conversions, the number of polymorphic parameter bindings, and the number of safe conversions. 
-These counts include conversions used in subexpression interpretations, as well as those necessary to satisfy the type assertions of any polymorphic functions included in the interpretation. 
-
-\begin{lstlisting}
-void f(char, long);  // $f_1$ - cost (2, 0, 1)
-forall(otype T) void f(T, long); // $f_2$ - cost (0, 1, 1)
-void f(long, long); // $f_{3a}$ - cost (0, 0, 2)
-void f(int, float); // $f_{3b}$ - cost (0, 0, 2)
-void f(int, long);  // $f_4$ - cost (0, 0, 1)
-
-f(7, 11);
-\end{lstlisting}
-
-In the example above, the expression resolves to $f_4$. 
-$f_1$ has an unsafe conversion (from ©int© to ©char©), and is thus the highest cost, followed by $f_2$, which has a polymorphic binding (from ©int© to ©T©). 
-Neither $f_{3a}$, $f_{3b}$, or $f_4$ match exactly with the type of the call expression (©void (*)(int, int)©), each involving safe conversions, but in this case $f_4$ is cheaper than $f_{3a}$, because it converts fewer arguments, and is also cheaper than $f_{3b}$, because ©long© is a closer match for ©int© than ©float© is. 
-If the declaration of $f_4$ was missing, the expression would be ambiguous, because the two single-step ©int©-to-©long© conversions in $f_{3a}$ cost the same as the one double-step ©int©-to-©float© conversion in $f_{3b}$.
-
-In the course of this project I may modify the cost tuple,\footnote{I have considered adding an element to distinguish between cast expressions used as conversions and those used as type ascriptions, and another element to differentiate interpretations based on closer qualifier matches. The existing costing of polymorphic functions could also be made more precice than a bare count of parameter bindings.} but the essential nature of the cost calculation should remain the same.
-
-\subsection{Objectives}
-The research goal of this project is to develop a performant expression resolver for \CFA; this analysis suggests three primary areas of investigation to accomplish that end. 
-The first area of investigation is efficient argument-parameter matching; Bilson~\cite{Bilson03} mentions significant optimization opportunities available in the current literature to improve on the existing CFA compiler.
-%TODO: look up and lit review 
-The second area of investigation is minimizing dependencies between argument-parameter matches; the current CFA compiler attempts to match entire argument combinations against functions at once, potentially attempting to match the same argument against the same parameter multiple times. 
-Whether the feature set of \CFA admits an expression resolution algorithm where arguments can be matched to parameters independently of other arguments in the same function application is an area of open research; polymorphic type paramters produce enough cross-argument dependencies that the problem is not trivial. 
-If cross-argument resolution dependencies cannot be completely eliminated, effective caching strategies to reduce duplicated work between equivalent argument-parameter matches in different combinations may mitigate the asymptotic defecits of the whole-combination matching approach. 
-The final area of investigation is heuristics and algorithmic approaches to reduce the number of argument interpretations considered in the common case; if argument-parameter matches cannot be made independent, even small reductions in $i$ should yield significant reductions in the $i^{p+1}$ resolver runtime factor. 
-
-The discussion below presents a number of largely orthagonal axes for expression resolution algorithm design to be investigated, noting prior work where applicable. 
-Though some of the proposed improvements to the expression resolution algorithm are based on heuristics rather than asymptoticly superior algorithms, it should be noted that programmers often employ idioms and other programming patterns to reduce the mental burden of producing correct code, and if these patterns can be identified and exploited by the compiler then the significant reduction in expression resolution time for common, idiomatic expressions should result in lower total compilation time even for code including difficult-to-resolve expressions that push the expression resolver to its theoretical worst case.
-
-\subsection{Argument-Parameter Matching}
-The first axis for consideration is the argument-parameter matching direction --- whether the type matching for a candidate function to a set of candidate arguments is directed by the argument types or the parameter types. 
-For programming languages without implicit conversions, argument-parameter matching is essentially the entirety of the expression resolution problem, and is generally referred to as ``overload resolution'' in the literature.
-All expression-resolution algorithms form a DAG of interpretations, some explicitly, some implicitly; in this DAG, arcs point from function-call interpretations to argument interpretations, as in Figure~\ref{fig:res_dag}:
-\begin{figure}[h]
-\centering
-\begin{subfigure}[h]{2in}
-\begin{lstlisting}
-int *p;  // $p_i$
-char *p; // $p_c$ 
-
-double *f(int*, int*); // $f_d$
-char *f(char*, int*); // $f_c$
-
-f( f( p, p ), p );
-\end{lstlisting}
-\end{subfigure}~\begin{subfigure}[h]{2in}
-\includegraphics{resolution_dag}
-\end{subfigure}
-\caption{Resolution DAG for a simple expression. Functions that do not have a valid argument matching are covered with an \textsf{X}.}\label{fig:res_dag}
-\end{figure}
-
-Note that some interpretations may be part of more than one super-interpretation, as with the second $p_i$ in the bottom row, while some valid subexpression interpretations, like $f_d$ in the middle row, are not used in any interpretation of their superexpression.
-
-\subsubsection{Argument-directed (Bottom-up)}
-Baker's algorithm for expression resolution~\cite{Baker82} pre-computes argument candidates, from the leaves of the expression tree up.
-For each candidate function, Baker attempts to match argument types to parameter types in sequence, failing if any parameter cannot be matched.
-
-Bilson~\cite{Bilson03} similarly pre-computes argument candidates in the original \CFA compiler, but then explicitly enumerates all possible argument combinations for a multi-parameter function; these argument combinations are matched to the parameter types of the candidate function as a unit rather than individual arguments.
-This approach is less efficient than Baker's approach, as the same argument may be compared to the same parameter many times, but allows a more straightforward handling of polymorphic type-binding and multiple return-types.
-It is possible the efficiency losses here relative to Baker could be significantly reduced by keeping a memoized cache of argument-parameter type comparisons and reading previously-seen argument-parameter matches from this cache rather than recomputing them.
-
-\subsubsection{Parameter-directed (Top-down)}
-Unlike Baker and Bilson, Cormack's algorithm~\cite{Cormack81} requests argument candidates that match the type of each parameter of each candidate function, from the top-level expression down; memoization of these requests is presented as an optimization.
-As presented, this algorithm requires the result of the expression to have a known type, though an algorithm based on Cormack's could reasonably request a candidate set of any return type, though such a set may be quite large.
-
-\subsubsection{Hybrid}
-This proposal includes the investigation of hybrid top-down/bottom-up argument-parameter matching.
-A reasonable hybrid approach might take a top-down approach when the expression to be matched has a fixed type, and a bottom-up approach in untyped contexts.
-This approach may involve switching from one type to another at different levels of the expression tree. 
-For instance, in:
-\begin{lstlisting}
-forall(otype T)
-int f(T x);  // (1)
-
-void* f(char y);  // (2)
-
-int x = f( f( '!' ) );
-\end{lstlisting}
-the outer call to ©f© must have a return type that is (implicitly convertable to) ©int©, so a top-down approach is used to select \textit{(1)} as the proper interpretation of ©f©. \textit{(1)}'s parameter ©x©, however, is an unbound type variable, and can thus take a value of any complete type, providing no guidance for the choice of candidate for the inner call to ©f©. The leaf expression ©'!'©, however, determines a zero-cost interpretation of the inner ©f© as \textit{(2)}, providing a minimal-cost expression resolution where ©T© is bound to ©void*©.
-
-Deciding when to switch between bottom-up and top-down resolution to minimize wasted work in a hybrid algorithm is a necessarily heuristic process, and finding good heuristics for which subexpressions to swich matching strategies on is an open question.
-One reasonable approach might be to set a threshold $t$ for the number of candidate functions, and to use top-down resolution for any subexpression with fewer than $t$ candidate functions, to minimize the number of unmatchable argument interpretations computed, but to use bottom-up resolution for any subexpression with at least $t$ candidate functions, to reduce duplication in argument interpretation computation between the different candidate functions. 
-
-Ganzinger and Ripken~\cite{Ganzinger80} propose an approach (later refined by Pennello~\etal~\cite{Pennello80}) that uses a top-down filtering pass followed by a bottom-up filtering pass to reduce the number of candidate interpretations; they prove that for the Ada programming language a small number of such iterations is sufficient to converge to a solution for the expression resolution problem. 
-Persch~\etal~\cite{PW:overload} developed a similar two-pass approach where the bottom-up pass is followed by the top-down pass. 
-These algorithms differ from the hybrid approach under investigation in that they take multiple passes over the expression tree to yield a solution, and that they also apply both filtering heuristics to all expression nodes; \CFA's polymorphic functions and implicit conversions make the approach of filtering out invalid types taken by all of these algorithms infeasible.
-
-\subsubsection{Common Subexpression Caching}
-With any of these argument-parameter approaches, it may be a useful optimization to cache the resolution results for common subexpressions; in Figure~\ref{fig:res_dag} this optimization would result in the list of interpretations $[p_c, p_i]$ for ©p© only being calculated once, and re-used for each of the three instances of ©p©.
-
-\subsection{Implicit Conversion Application}
-With the exception of Bilson, the authors mentioned above do not account for implicit conversions in their algorithms\footnote{Baker does briefly comment on an approach for handling implicit conversions, but does not provide an implementable algorithm.}; all assume that there is at most one valid interpretation of a given expression for each distinct type. 
-Integrating implicit conversion handling into the presented argument-parameter matching algorithms thus provides some choice of implementation approach.
-
-Inference of polymorphic type variables can be considered a form of implicit conversion application, where monomorphic types are implicitly converted to instances of some polymorphic type\footnote{This ``conversion'' may not be implemented in any explicit way at runtime, but does need to be handled by the expression resolver as an inexact match between argument and parameter types.}. 
-This form of implicit conversion is particularly common in functional languages; Haskell's type classes~\cite{typeclass} are a particularly well-studied variant of this inference. 
-However, type classes arguably do not allow name overloading, as (at least in the Haskell implmentation) identifiers belonging to type classes may not be overloaded in any other context than an implementation of that type class; this provides a single (possibly polymorphic) interpretation of any identifier, simplifing the expression resolution problem relative to \CFA. 
-\CC~\cite{ANSI98:C++} includes both name overloading and implicit conversions in its expression resolution specification, though unlike \CFA it does complete type-checking on a generated monomorphization of template functions, where \CFA simply checks a list of type constraints. 
-The upcoming Concepts standard~\cite{C++concepts} defines a system of type constraints similar in principle to \CFA's.
-Cormack and Wright~\cite{Cormack90} present an algorithm that integrates overload resolution with a polymorphic type inference approach very similar to \CFA's.
-However, their algorithm does not account for implicit conversions other than polymorphic type binding and their discussion of their overload resolution algorithm is not sufficiently detailed to classify it with the other argument-parameter matching approaches\footnote{Their overload resolution algorithm is possibly a variant of Ganzinger and Ripken~\cite{Ganzinger80} or Pennello~\etal~\cite{Pennello80}, modified to allow for polymorphic type binding.}.
-
-\subsubsection{On Parameters}
-Bilson does account for implicit conversions in his algorithm, but it is unclear if the approach is optimal. 
-His algorithm integrates checking for valid implicit conversions into the argument-parameter-matching step, essentially trading more expensive matching for a smaller number of argument interpretations. 
-This approach may result in the same subexpression being checked for a type match with the same type multiple times, though again memoization may mitigate this cost; however, this approach does not generate implicit conversions that are not useful to match the containing function.
-
-\subsubsection{On Arguments}
-Another approach is to generate a set of possible implicit conversions for each set of interpretations of a given argument. 
-This approach has the benefit of detecting ambiguous interpretations of arguments at the level of the argument rather than its containing call, never finds more than one interpretation of the argument with a given type, and re-uses calculation of implicit conversions between function candidates. 
-On the other hand, this approach may unnecessarily generate argument interpretations that never match any parameter, wasting work. 
-Furthermore, in the presence of tuple types, this approach may lead to a combinatorial explosion of argument interpretations considered, unless the tuple can be considered as a sequence of elements rather than a unified whole. 
-
-\subsection{Candidate Set Generation}
-All the algorithms discussed to this point generate the complete set of candidate argument interpretations before attempting to match the containing function-call expression. 
-However, given that the top-level expression interpretation that is ultimately chosen is the minimal-cost valid interpretation, any consideration of non-minimal-cost interpretations is wasted work.
-Under the assumption that programmers generally write function calls with relatively low-cost interpretations, a possible work-saving heuristic is to generate only the lowest-cost argument interpretations first, attempt to find a valid top-level interpretation using them, and only if that fails generate the next higher-cost argument interpretations.
-
-\subsubsection{Eager}
-Within the eager approach taken by the existing top-down and bottom-up algorithms, there are still variants to explore. 
-Cormack and Baker do not account for implict conversions, and thus do not account for the possibility of multiple valid interpretations with distinct costs; Bilson, on the other hand, sorts the list of interpretations to aid in finding minimal-cost interpretations. 
-Sorting the lists of argument or function call interpretations by cost at some point during resolution may provide useful opportunities to short-circuit expression evaluation when a minimal-cost interpretation is found, though it is unclear if this short-circuiting behaviour justifies the cost of the sort.
-
-\subsubsection{Lazy}
-In the presence of implicit conversions, many argument interpretations may match a given parameter by application of an appropriate implicit conversion. 
-However, if programmers actually use relatively few implicit conversions, then the ``on arguments'' approach to implicit conversions generates a large number of high-cost interpretations that may never be used. 
-Even if the ``on parameters'' approach to implicit conversions is used, eager generation of interpretations spends extra time attempting possibly expensive polymorphic or conversion-based matches in cases where an exact monomorphic interpretation exists. 
-
-The essence of the lazy approach to candidate set generation is to wrap the matching algorithm into the element generator of a lazy list, only generating as few elements at a time to ensure the next-smallest-cost interpretation has been generated. 
-Assuming argument interpretations are provided to the parameter matching algorithm in sorted order, a sorted list of function call interpretations can be produced by generating combinations of arguments sorted by total cost\footnote{I have already developed a lazy $n$-way combination generation algorithm to perform this task.}, then generating function call interpretations in the order suggested by this list. 
-The function call interpretation chosen may have costs of its own, for instance polymorphic type binding, so in some cases a number of argument combinations (any combination whose marginal cost does not exceed the cost of the function call interpretation itself) may need to be considered to determine the next-smallest-cost function call interpretation.
-Ideally, this candidate generation approach leads to very few unused candidates being generated (in the expected case where the programmer has, in fact, provided a validly-typable program), but it is an open question whether or not the overheads of lazy generation exceed the benefit produced from considering fewer interpretations.
-
-\subsubsection{Stepwise Lazy}
-As a compromise between the trade-offs of the eager and lazy approaches, I also propose to investigate a ``stepwise lazy'' approach, where all the interpretations for some ``step'' are eagerly generated, then the interpretations in the later steps are only generated on demand. 
-Under this approach the \CFA resolver could, for instance, try expression interpretations in the following order:
-\begin{enumerate}
-\item Interpretations with no polymorphic type binding or implicit conversions.
-\item Interpretations containing no polymorphic type binding and at least one safe implicit conversion.
-\item Interpretations containing polymorphic type binding, but only safe implicit conversions.
-\item Interpretations containing at least one unsafe implicit conversion.
-\end{enumerate} 
-If a valid expression interpretation is found in one step, it is guaranteed to be lower-cost than any interpretation in a later step (by the structure of \CFA interpretation costs), so no further steps need be considered.
-This approach may save significant amounts of work, especially given that the first steps avoid potentially expensive handling of implicit conversions and type assertion satisfaction entirely, and cover a large proportion of common monomorphic code.
-
-%\subsection{Parameter-Directed}
-%\textbf{TODO: Richard's algorithm isn't Baker (Cormack?), disentangle from this section \ldots}. 
-%The expression resolution algorithm used by the existing iteration of CFA is based on Baker's\cite{Baker82} algorithm for overload resolution in Ada. 
-%The essential idea of this algorithm is to first find the possible interpretations of the most deeply nested subexpressions, then to use these interpretations to recursively generate valid interpretations of their superexpressions. 
-%To simplify matters, the only expressions considered in this discussion of the algorithm are function application and literal expressions; other expression types can generally be considered to be variants of one of these for the purposes of the resolver, \eg variables are essentially zero-argument functions. 
-%If we consider expressions as graph nodes with arcs connecting them to their subexpressions, these expressions form a DAG, generated by the algorithm from the bottom up.
-%Literal expressions are represented by leaf nodes, annotated with the type of the expression, while a function application will have a reference to the function declaration chosen, as well as arcs to the interpretation nodes for its argument expressions; functions are annotated with their return type (or types, in the case of multiple return values).
-%
-%\textbf{TODO: Figure}
-%
-%Baker's algorithm was designed to account for name overloading; Richard Bilson\cite{Bilson03} extended this algorithm to also handle polymorphic functions, implicit conversions and multiple return types when designing the original \CFA compiler. 
-%The core of the algorithm is a function which Baker refers to as $gen\_calls$. 
-%$gen\_calls$ takes as arguments the name of a function $f$ and a list containing the set of possible subexpression interpretations $S_j$ for each argument of the function and returns a set of possible interpretations of calling that function on those arguments. 
-%The subexpression interpretations are generally either singleton sets generated by the single valid interpretation of a literal expression, or the results of a previous call to $gen\_calls$. 
-%If there are no valid interpretations of an expression, the set returned by $gen\_calls$ will be empty, at which point resolution can cease, since each subexpression must have at least one valid interpretation to produce an interpretation of the whole expression. 
-%On the other hand, if for some type $T$ there is more than one valid interpretation of an expression with type $T$, all interpretations of that expression with type $T$ can be collapsed into a single \emph{ambiguous expression} of type $T$, since the only way to disambiguate expressions is by their return types. 
-%If a subexpression interpretation is ambiguous, than any expression interpretation containing it will also be ambiguous. 
-%In the variant of this algorithm including implicit conversions, the interpretation of an expression as type $T$ is ambiguous only if there is more than one \emph{minimal-cost} interpretation of the expression as type $T$, as cheaper expressions are always chosen in preference to more expensive ones.
-%
-%Given this description of the behaviour of $gen\_calls$, its implementation is quite straightforward: for each function declaration $f_i$ matching the name of the function, consider each of the parameter types $p_j$ of $f_i$, attempting to match the type of an element of $S_j$ to $p_j$ (this may include checking of implicit conversions).
-%If no such element can be found, there is no valid interpretation of the expression using $f_i$, while if more than one such (minimal-cost) element is found than an ambiguous interpretation with the result type of $f_i$ is produced. 
-%In the \CFA variant, which includes polymorphic functions, it is possible that a single polymorphic function definition $f_i$ can produce multiple valid interpretations by different choices of type variable bindings; these interpretations are unambiguous so long as the return type of $f_i$ is different for each type binding. 
-%If all the parameters $p_j$ of $f_i$ can be uniquely matched to a candidate interpretation, then a valid interpretation based on $f_i$ and those $p_j$ is produced. 
-%$gen\_calls$ collects the produced interpretations for each $f_i$ and returns them; a top level expression is invalid if this list is empty, ambiguous if there is more than one (minimal-cost) result, or if this single result is ambiguous, and valid otherwise.
-%
-%In this implementation, resolution of a single top-level expression takes time $O(\ldots)$, where \ldots. \textbf{TODO:} \textit{Look at 2.3.1 in Richard's thesis when working out complexity; I think he does get the Baker algorithm wrong on combinations though, maybe\ldots}
-%
-%\textbf{TODO: Basic Lit Review} \textit{Look at 2.4 in Richard's thesis for any possible more-recent citations of Baker\ldots} \textit{Look back at Baker's related work for other papers that look similar to what you're doing, then check their citations as well\ldots} \textit{Look at Richard's citations in 2.3.2 w.r.t. type data structures\ldots}
-%\textit{CormackWright90 seems to describe a solution for the same problem, mostly focused on how to find the implicit parameters}
-
-\section{Proposal}
-Baker~\cite{Baker82} discussed various expression resolution algorithms that can handle name overloading, but left experimental comparison of those algorithms to future work; Bilson~\cite{Bilson03} described one extension of Baker's algorithm to handle implicit conversions, but did not fully explore the space of algorithmic approaches to handle both overloaded names and implicit conversions. 
-This project is intended to experimentally test a number of expression resolution algorithms that are powerful enough to handle the \CFA type-system, including both name overloading and implicit conversions. 
-This comparison closes Baker's open research question, as well as potentially improving Bilson's \CFA compiler.
-
-Rather than testing all of these algorithms in-place in the \CFA compiler, a resolver prototype is being developed that acts on a simplified input language encapsulating the essential details of the \CFA type-system\footnote{Note this simplified input language is not a usable programming language.}. 
-Multiple variants of this resolver prototype will be implemented, each encapsulating a different expression resolution variant, sharing as much code as feasible. 
-These variants will be instrumented to test runtime performance, and run on a variety of input files; the input files may be generated programmatically or from exisiting code in \CFA or similar languages.
-These experimental results should make it possible to determine the algorithm likely to be most performant in practical use, and replace CFA's existing expression resolver. 
-
-The experimental results will also provide some empirical sense of the compile-time cost of various language features by comparing the results of the most performant resolver variant that supports a feature with the most performant resolver variant that does not support that feature, a useful capability to guide language design. 
-As an example, there are currently multiple open proposals for how implicit conversions should interact with polymorphic type binding in \CFA, each with distinct levels of expressive power; if the resolver prototype is modified to support each proposal, the optimal algorithm for each proposal can be compared, providing an empirical demonstration of the trade-off between expressive power and compiler runtime. 
-
-This proposed project should provide valuable data on how to implement a performant compiler for programming languages such as \CFA with powerful static type-systems, specifically targeting the feature interaction between name overloading and implicit conversions. 
-This work is not limited in applicability to \CFA, but may also be useful for supporting efficient compilation of the upcoming Concepts standard~\cite{C++concepts} for \CC template constraints, for instance. 
-
-\appendix
-\section{Completion Timeline}
-The following is a preliminary estimate of the time necessary to complete the major components of this research project:
-\begin{center}
-\begin{tabular}{ | r @{--} l | p{4in} | }
-\hline       May 2015 & April 2016   & Project familiarization and generic types design and implementation. \\
-\hline       May 2016 & April 2017   & Design and implement resolver prototype and run performance experiments. \\
-\hline       May 2017 & August 2017  & Integrate new language features and best-performing resolver prototype into CFA. \\
-\hline September 2017 & January 2018 & Thesis writing and defense. \\
-\hline
-\end{tabular}
-\end{center}
-
-\addcontentsline{toc}{section}{\refname}
-\bibliographystyle{plain}
-\bibliography{cfa}
-
-%\addcontentsline{toc}{section}{\indexname} % add index name to table of contents
-%\begin{theindex}
-%Italic page numbers give the location of the main entry for the referenced term.
-%Plain page numbers denote uses of the indexed term.
-%Entries for grammar non-terminals are italicized.
-%A typewriter font is used for grammar terminals and program identifiers.
-%\indexspace
-%\input{comp_II.ind}
-%\end{theindex}
-
-\end{document}
Index: doc/theses/aaron_moss/comp_II/.gitignore
===================================================================
--- doc/theses/aaron_moss/comp_II/.gitignore	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/aaron_moss/comp_II/.gitignore	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,14 @@
+# generated by latex
+*.aux
+*.bbl
+*.blg
+*.brf
+*.dvi
+*.idx
+*.ilg
+*.ind
+*.log
+*.out
+*.pdf
+*.ps
+*.toc
Index: doc/theses/aaron_moss/comp_II/Makefile
===================================================================
--- doc/theses/aaron_moss/comp_II/Makefile	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/aaron_moss/comp_II/Makefile	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,78 @@
+## Define the appropriate configuration variables.
+
+TeXLIB = .:../LaTeXmacros:../LaTeXmacros/listings:../LaTeXmacros/enumitem:../bibliography/:
+LaTeX  = TEXINPUTS=${TeXLIB} && export TEXINPUTS && latex -halt-on-error
+BibTeX = BIBINPUTS=${TeXLIB} && export BIBINPUTS && bibtex
+
+## Define the text source files.
+
+SOURCES = ${addsuffix .tex, \
+comp_II \
+}
+
+FIGURES = ${addsuffix .tex, \
+}
+
+PICTURES = ${addsuffix .pstex, \
+}
+
+PROGRAMS = ${addsuffix .tex, \
+}
+
+GRAPHS = ${addsuffix .tex, \
+}
+
+## Define the documents that need to be made.
+
+DOCUMENT = comp_II.pdf
+
+# Directives #
+
+all : ${DOCUMENT}
+
+clean :
+	rm -f *.bbl *.aux *.dvi *.idx *.ilg *.ind *.brf *.out *.log *.toc *.blg *.pstex_t *.cf \
+		${FIGURES} ${PICTURES} ${PROGRAMS} ${GRAPHS} ${basename ${DOCUMENT}}.ps ${DOCUMENT}
+
+# File Dependencies #
+
+${DOCUMENT} : ${basename ${DOCUMENT}}.ps
+	ps2pdf $<
+
+${basename ${DOCUMENT}}.ps : ${basename ${DOCUMENT}}.dvi
+	dvips $< -o $@
+
+${basename ${DOCUMENT}}.dvi : Makefile ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} ${basename ${DOCUMENT}}.tex \
+		../LaTeXmacros/common.tex ../LaTeXmacros/indexstyle ../bibliography/cfa.bib
+	# Conditionally create an empty *.ind (index) file for inclusion until makeindex is run.
+	if [ ! -r ${basename $@}.ind ] ; then touch ${basename $@}.ind ; fi
+	# Must have *.aux file containing citations for bibtex
+	if [ ! -r ${basename $@}.aux ] ; then ${LaTeX} ${basename $@}.tex ; fi
+	-${BibTeX} ${basename $@}
+	# Some citations reference others so run steps again to resolve these citations
+	${LaTeX} ${basename $@}.tex
+	-${BibTeX} ${basename $@}
+	# Make index from *.aux entries and input index at end of document
+	makeindex -s ../LaTeXmacros/indexstyle ${basename $@}.idx
+	${LaTeX} ${basename $@}.tex
+	# Run again to get index title into table of contents
+	${LaTeX} ${basename $@}.tex
+
+predefined :
+	sed -f predefined.sed ${basename ${DOCUMENT}}.tex > ${basename $@}.cf
+
+## Define the default recipes.
+
+%.tex : %.fig
+	fig2dev -L eepic $< > $@
+
+%.ps : %.fig
+	fig2dev -L ps $< > $@
+
+%.pstex : %.fig
+	fig2dev -L pstex $< > $@
+	fig2dev -L pstex_t -p $@ $< > $@_t
+
+# Local Variables: #
+# compile-command: "make" #
+# End: #
Index: doc/theses/aaron_moss/comp_II/comp_II.tex
===================================================================
--- doc/theses/aaron_moss/comp_II/comp_II.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/aaron_moss/comp_II/comp_II.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,636 @@
+% inline code ©...© (copyright symbol) emacs: C-q M-)
+% red highlighting ®...® (registered trademark symbol) emacs: C-q M-.
+% blue highlighting ß...ß (sharp s symbol) emacs: C-q M-_
+% green highlighting ¢...¢ (cent symbol) emacs: C-q M-"
+% LaTex escape §...§ (section symbol) emacs: C-q M-'
+% keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^
+% math escape $...$ (dollar symbol)
+
+\documentclass[twoside,11pt]{article}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+% Latex packages used in the document (copied from CFA user manual).
+\usepackage[T1]{fontenc}                                % allow Latin1 (extended ASCII) characters
+\usepackage{textcomp}
+\usepackage[latin1]{inputenc}
+\usepackage{fullpage,times,comment}
+\usepackage{epic,eepic}
+\usepackage{upquote}									% switch curled `'" to straight
+\usepackage{calc}
+\usepackage{xspace}
+\usepackage{graphicx}
+\usepackage{varioref}									% extended references
+\usepackage{listings}									% format program code
+\usepackage[flushmargin]{footmisc}						% support label/reference in footnote
+\usepackage{latexsym}                                   % \Box glyph
+\usepackage{mathptmx}                                   % better math font with "times"
+\usepackage[usenames]{color}
+\usepackage[pagewise]{lineno}
+\renewcommand{\linenumberfont}{\scriptsize\sffamily}
+\input{common}                                          % bespoke macros used in the document
+\usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref}
+\usepackage{breakurl}
+\renewcommand{\UrlFont}{\small\sf}
+
+\setlength{\topmargin}{-0.45in}							% move running title into header
+\setlength{\headsep}{0.25in}
+
+\usepackage{caption}
+\usepackage{subcaption}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\newsavebox{\LstBox}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\title{
+\Huge \vspace*{1in} Efficient Type Resolution in \CFA \\
+\huge \vspace*{0.25in} PhD Comprehensive II Research Proposal
+\vspace*{1in}
+}
+
+\author{
+\huge Aaron Moss \\
+\Large \vspace*{0.1in} \texttt{a3moss@uwaterloo.ca} \\
+\Large Cheriton School of Computer Science \\
+\Large University of Waterloo
+}
+
+\date{
+\today
+}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\newcommand{\bigO}[1]{O\!\left( #1 \right)}
+
+\begin{document}
+\pagestyle{headings}
+% changed after setting pagestyle
+\renewcommand{\sectionmark}[1]{\markboth{\thesection\quad #1}{\thesection\quad #1}}
+\renewcommand{\subsectionmark}[1]{\markboth{\thesubsection\quad #1}{\thesubsection\quad #1}}
+\pagenumbering{roman}
+\linenumbers                                            % comment out to turn off line numbering
+
+\maketitle
+\thispagestyle{empty}
+
+\clearpage
+\thispagestyle{plain}
+\pdfbookmark[1]{Contents}{section}
+\tableofcontents
+
+\clearpage
+\thispagestyle{plain}
+\pagenumbering{arabic}
+
+\section{Introduction}
+
+\CFA\footnote{Pronounced ``C-for-all'', and written \CFA or \CFL.} is an evolutionary modernization of the C programming language currently being designed and built at the University of Waterloo by a team led by Peter Buhr. 
+\CFA both fixes existing design problems and adds multiple new features to C, including name overloading, user-defined operators, parametric-polymorphic routines, and type constructors and destructors, among others. 
+The new features make \CFA more powerful and expressive than C, but impose a compile-time cost, particularly in the expression resolver, which must evaluate the typing rules of a significantly more complex type-system.
+
+The primary goal of this research project is to develop a sufficiently performant expression resolution algorithm, experimentally validate its performance, and integrate it into CFA, the \CFA reference compiler.
+Secondary goals of this project include the development of various new language features for \CFA: parametric-polymorphic (``generic'') types have already been designed and implemented, and reference types and user-defined conversions are under design consideration. 
+An experimental performance-testing architecture for resolution algorithms is under development to determine the relative performance of different expression resolution algorithms, as well as the compile-time cost of adding various new features to the \CFA type-system. 
+More broadly, this research should provide valuable data for implementers of compilers for other programming languages with similarly powerful static type-systems.
+
+\section{\CFA}
+
+To make the scope of the proposed expression resolution problem more explicit, it is necessary to define the features of both C and \CFA (both current and proposed) that affect this algorithm. 
+In some cases the interactions of multiple features make expression resolution a significantly more complex problem than any individual feature would; in other cases a feature that does not by itself add any complexity to expression resolution triggers previously rare edge cases more frequently.
+
+It is important to note that \CFA is not an object-oriented language.
+\CFA does have a system of (possibly implicit) type conversions derived from C's type conversions; while these conversions may be thought of as something like an inheritance hierarchy, the underlying semantics are significantly different and such an analogy is loose at best. 
+Particularly, \CFA has no concept of ``subclass'', and thus no need to integrate an inheritance-based form of polymorphism with its parametric and overloading-based polymorphism. 
+The graph structure of the \CFA type conversions is also markedly different than an inheritance graph; it has neither a top nor a bottom type, and does not satisfy the lattice properties typical of inheritance graphs.
+
+\subsection{Polymorphic Functions}
+The most significant feature \CFA adds is parametric-polymorphic functions. 
+Such functions are written using a ©forall© clause (which gives the language its name):
+\begin{lstlisting}
+®forall(otype T)®
+T identity(T x) {
+    return x;
+}
+
+int forty_two = identity(42); // T is bound to int, forty_two == 42
+\end{lstlisting}
+The ©identity© function above can be applied to any complete object type (or ``©otype©''). 
+The type variable ©T© is transformed into a set of additional implicit parameters to ©identity©, which encode sufficient information about ©T© to create and return a variable of that type. 
+The current \CFA implementation passes the size and alignment of the type represented by an ©otype© parameter, as well as an assignment operator, constructor, copy constructor and destructor. 
+Here, the runtime cost of polymorphism is spread over each polymorphic call, due to passing more arguments to polymorphic functions; preliminary experiments have shown this overhead to be similar to \CC virtual function calls. 
+Determining if packaging all polymorphic arguments to a function into a virtual function table would reduce the runtime overhead of polymorphic calls is an open research question. 
+
+Since bare polymorphic types do not provide a great range of available operations, \CFA provides a \emph{type assertion} mechanism to provide further information about a type:
+\begin{lstlisting}
+forall(otype T ®| { T twice(T); }®)
+T four_times(T x) {
+    return twice( twice(x) );
+}
+
+double twice(double d) { return d * 2.0; } // (1)
+
+double magic = four_times(10.5); // T is bound to double, uses (1) to satisfy type assertion
+\end{lstlisting}
+These type assertions may be either variable or function declarations that depend on a polymorphic type variable. 
+©four_times© can only be called with an argument for which there exists a function named ©twice© that can take that argument and return another value of the same type; a pointer to the appropriate ©twice© function is passed as an additional implicit parameter to the call of ©four_times©.
+
+Monomorphic specializations of polymorphic functions can themselves be used to satisfy type assertions. 
+For instance, ©twice© could have been defined using the \CFA syntax for operator overloading as:
+\begin{lstlisting}
+forall(otype S | { ®S ?+?(S, S);® })
+S twice(S x) { return x + x; }  // (2)
+\end{lstlisting} 
+This version of ©twice© works for any type ©S© that has an addition operator defined for it, and it could have been used to satisfy the type assertion on ©four_times©. 
+The compiler accomplishes this by creating a wrapper function calling ©twice // (2)© with ©S© bound to ©double©, then providing this wrapper function to ©four_times©\footnote{©twice // (2)© could also have had a type parameter named ©T©; \CFA specifies renaming of the type parameters, which would avoid the name conflict with the type variable ©T© of ©four_times©.}. 
+
+Finding appropriate functions to satisfy type assertions is essentially a recursive case of expression resolution, as it takes a name (that of the type assertion) and attempts to match it to a suitable declaration \emph{in the current scope}. 
+If a polymorphic function can be used to satisfy one of its own type assertions, this recursion may not terminate, as it is possible that function is examined as a candidate for its own type assertion unboundedly repeatedly. 
+To avoid infinite loops, the current CFA compiler imposes a fixed limit on the possible depth of recursion, similar to that employed by most \CC compilers for template expansion; this restriction means that there are some semantically well-typed expressions that cannot be resolved by CFA. 
+One area of potential improvement this project proposes to investigate is the possibility of using the compiler's knowledge of the current set of declarations to more precicely determine when further type assertion satisfaction recursion does not produce a well-typed expression.
+
+\subsubsection{Traits}
+\CFA provides \emph{traits} as a means to name a group of type assertions, as in the example below:
+\begin{lstlisting}
+®trait has_magnitude(otype T)® {
+    bool ?<?(T, T);        // comparison operator for T
+    T -?(T);               // negation operator for T
+    void ?{}(T*, zero_t);  // constructor from 0 literal
+};
+
+forall(otype M | has_magnitude(M))
+M abs( M m ) {
+    M zero = 0;  // uses zero_t constructor from trait
+    return m < zero ? -m : m;
+}
+
+forall(otype M | has_magnitude(M))
+M max_magnitude( M a, M b ) {
+    return abs(a) < abs(b) ? b : a; 
+}
+\end{lstlisting}
+
+Semantically, traits are simply a named lists of type assertions, but they may be used for many of the same purposes that interfaces in Java or abstract base classes in \CC are used for.
+Unlike Java interfaces or \CC base classes, \CFA types do not explicitly state any inheritance relationship to traits they satisfy; this can be considered a form of structural inheritance, similar to implementation of an interface in Go, as opposed to the nominal inheritance model of Java and \CC. 
+Nominal inheritance can be simulated with traits using marker variables or functions:
+\begin{lstlisting}
+trait nominal(otype T) {
+    ®T is_nominal;®
+};
+
+int is_nominal;  // int now satisfies the nominal trait
+{
+    char is_nominal; // char satisfies the nominal trait
+}
+// char no longer satisfies the nominal trait here  
+\end{lstlisting}
+
+Traits, however, are significantly more powerful than nominal-inheritance interfaces; firstly, due to the scoping rules of the declarations that satisfy a trait's type assertions, a type may not satisfy a trait everywhere that the type is declared, as with ©char© and the ©nominal© trait above. 
+Secondly, traits may be used to declare a relationship among multiple types, a property that may be difficult or impossible to represent in nominal-inheritance type systems:
+\begin{lstlisting}
+trait pointer_like(®otype Ptr, otype El®) {
+    lvalue El *?(Ptr); // Ptr can be dereferenced into a modifiable value of type El
+}
+
+struct list {
+    int value;
+    list *next;  // may omit "struct" on type names
+};
+
+typedef list *list_iterator;
+
+lvalue int *?( list_iterator it ) {
+    return it->value;
+}
+\end{lstlisting}
+
+In the example above, ©(list_iterator, int)© satisfies ©pointer_like© by the user-defined dereference function, and ©(list_iterator, list)© also satisfies ©pointer_like© by the built-in dereference operator for pointers. 
+Given a declaration ©list_iterator it©, ©*it© can be either an ©int© or a ©list©, with the meaning disambiguated by context (\eg ©int x = *it;© interprets ©*it© as an ©int©, while ©(*it).value = 42;© interprets ©*it© as a ©list©).
+While a nominal-inheritance system with associated types could model one of those two relationships by making ©El© an associated type of ©Ptr© in the ©pointer_like© implementation, few such systems could model both relationships simultaneously.
+
+The flexibility of \CFA's implicit trait-satisfaction mechanism provides programmers with a great deal of power, but also blocks some optimization approaches for expression resolution. 
+The ability of types to begin to or cease to satisfy traits when declarations go into or out of scope makes caching of trait satisfaction judgements difficult, and the ability of traits to take multiple type parameters can lead to a combinatorial explosion of work in any attempt to pre-compute trait satisfaction relationships. 
+On the other hand, the addition of a nominal inheritance mechanism to \CFA's type system or replacement of \CFA's trait satisfaction system with a more object-oriented inheritance model and investigation of possible expression resolution optimizations for such a system may be an interesting avenue of further research.
+
+\subsection{Name Overloading}
+In C, no more than one variable or function in the same scope may share the same name\footnote{Technically, C has multiple separated namespaces, one holding ©struct©, ©union©, and ©enum© tags, one holding labels, one holding typedef names, variable, function, and enumerator identifiers, and one for each ©struct© or ©union© type holding the field names.}, and variable or function declarations in inner scopes with the same name as a declaration in an outer scope hide the outer declaration. 
+This restriction makes finding the proper declaration to match to a variable expression or function application a simple matter of symbol-table lookup, which can be easily and efficiently implemented. 
+\CFA, on the other hand, allows overloading of variable and function names, so long as the overloaded declarations do not have the same type, avoiding the multiplication of variable and function names for different types common in the C standard library, as in the following example:
+\begin{lstlisting}
+#include <limits.h>
+
+int max(int a, int b) { return a < b ? b : a; }  // (1)
+double max(double a, double b) { return a < b ? b : a; }  // (2)
+
+int max = INT_MAX;     // (3)
+double max = DBL_MAX;  // (4)
+
+max(7, -max);   // uses (1) and (3), by matching int type of the constant 7 
+max(max, 3.14); // uses (2) and (4), by matching double type of the constant 3.14
+
+max(max, -max);  // ERROR: ambiguous
+int m = max(max, -max); // uses (1) once and (3) twice, by matching return type
+\end{lstlisting}
+
+The presence of name overloading in \CFA means that simple table lookup is insufficient to match identifiers to declarations, and a type matching algorithm must be part of expression resolution.
+
+\subsection{Implicit Conversions}
+In addition to the multiple interpretations of an expression produced by name overloading, \CFA must support all of the implicit conversions present in C for backward compatibility, producing further candidate interpretations for expressions. 
+C does not have a inheritance hierarchy of types, but the C standard's rules for the ``usual arithmetic conversions'' define which of the built-in types are implicitly convertable to which other types, and the relative cost of any pair of such conversions from a single source type. 
+\CFA adds to the usual arithmetic conversions rules defining the cost of binding a polymorphic type variable in a function call; such bindings are cheaper than any \emph{unsafe} (narrowing) conversion, \eg ©int© to ©char©, but more expensive than any \emph{safe} (widening) conversion, \eg ©int© to ©double©. 
+
+The expression resolution problem, then, is to find the unique minimal-cost interpretation of each expression in the program, where all identifiers must be matched to a declaration, and implicit conversions or polymorphic bindings of the result of an expression may increase the cost of the expression. 
+Note that which subexpression interpretation is minimal-cost may require contextual information to disambiguate. 
+For instance, in the example in the previous subsection, ©max(max, -max)© cannot be unambiguously resolved, but ©int m = max(max, -max)© has a single minimal-cost resolution. 
+While the interpretation ©int m = (int)max((double)max, -(double)max)© is also a valid interpretation, it is not minimal-cost due to the unsafe cast from the ©double© result of ©max© to ©int© (the two ©double© casts function as type ascriptions selecting ©double max© rather than casts from ©int max© to ©double©, and as such are zero-cost).
+
+\subsubsection{User-generated Implicit Conversions}
+One possible additional feature to \CFA included in this research proposal is \emph{user-generated implicit conversions}. 
+Such a conversion system should be simple for programmers to utilize, and fit naturally with the existing design of implicit conversions in C; ideally it would also be sufficiently powerful to encode C's usual arithmetic conversions itself, so that \CFA only has one set of rules for conversions. 
+
+Ditchfield~\cite{Ditchfield:conversions} laid out a framework for using polymorphic-conversion-constructor functions to create a directed acyclic graph (DAG) of conversions. 
+A monomorphic variant of these functions can be used to mark a conversion arc in the DAG as only usable as the final step in a conversion. 
+With these two types of conversion arcs, separate DAGs can be created for the safe and the unsafe conversions, and conversion cost can be represented the length of the shortest path through the DAG from one type to another. 
+\begin{figure}[h]
+\centering
+\includegraphics{conversion_dag}
+\caption{A portion of the implicit conversion DAG for built-in types.}\label{fig:conv_dag}
+\end{figure}
+As can be seen in Figure~\ref{fig:conv_dag}, there are either safe or unsafe paths between each of the arithmetic types listed; the ``final'' arcs are important both to avoid creating cycles in the signed-unsigned conversions, and to disambiguate potential diamond conversions (\eg, if the ©int© to ©unsigned int© conversion was not marked final there would be two length-two paths from ©int© to ©unsigned long©, making it impossible to choose which one; however, since the ©unsigned int© to ©unsigned long© arc can not be traversed after the final ©int© to ©unsigned int© arc, there is a single unambiguous conversion path from ©int© to ©unsigned long©).
+
+Open research questions on this topic include:
+\begin{itemize}
+\item Can a conversion graph be generated that represents each allowable conversion in C with a unique minimal-length path such that the path lengths accurately represent the relative costs of the conversions?
+\item Can such a graph representation be usefully augmented to include user-defined types as well as built-in types?
+\item Can the graph be efficiently represented and used in the expression resolver?
+\end{itemize}
+
+\subsection{Constructors and Destructors}
+Rob Shluntz, a current member of the \CFA research team, has added constructors and destructors to \CFA. 
+Each type has an overridable default-generated zero-argument constructor, copy constructor, assignment operator, and destructor.
+For ©struct© types these functions each call their equivalents on each field of the ©struct©. 
+This feature affects expression resolution because an ©otype© type variable ©T© implicitly adds four type assertions, one for each of these four functions, so assertion resolution is pervasive in \CFA polymorphic functions, even those without any explicit type assertions. 
+The following example shows the implicitly-generated code in green:
+\begin{lstlisting}
+struct kv {
+    int key;
+    char *value;
+};
+
+¢void ?{}(kv *this) {  // default constructor
+    ?{}(&(this->key));  // call recursively on members
+    ?{}(&(this->value));
+}
+void ?{}(kv *this, kv that) {  // copy constructor
+    ?{}(&(this->key), that.key);
+    ?{}(&(this->value), that.value);
+}
+kv ?=?(kv *this, kv that) {  // assignment operator
+    ?=?(&(this->key), that.key);
+    ?=?(&(this->value), that.value);
+    return *this;
+}
+void ^?{}(kv *this) {  // destructor
+    ^?{}(&(this->key));
+    ^?{}(&(this->value));
+}¢
+
+forall(otype T ¢| { void ?{}(T*); void ?{}(T*, T); T ?=?(T*, T); void ^?{}(T*); }¢)
+void foo(T);
+\end{lstlisting}
+
+\subsection{Generic Types}
+I have already added a generic type capability to \CFA, designed to efficiently and naturally integrate with \CFA's existing polymorphic functions. 
+A generic type can be declared by placing a ©forall© specifier on a ©struct© or ©union© declaration, and instantiated using a parenthesized list of types after the type name:
+\begin{lstlisting}
+forall(otype R, otype S) struct pair {
+    R first;
+    S second;
+};
+
+forall(otype T)
+T value( pair(const char*, T) *p ) { return p->second; }
+
+pair(const char*, int) p = { "magic", 42 };
+int magic = value( &p );
+\end{lstlisting}
+For \emph{concrete} generic types, that is, those where none of the type parameters depend on polymorphic type variables (like ©pair(const char*, int)© above), the struct is essentially template expanded to a new struct type; for \emph{polymorphic} generic types (such as ©pair(const char*, T)© above), member access is handled by a runtime calculation of the field offset, based on the size and alignment information of the polymorphic parameter type. 
+The default-generated constructors, destructor and assignment operator for a generic type are polymorphic functions with the same list of type parameters as the generic type definition.
+
+Aside from giving users the ability to create more parameterized types than just the built-in pointer, array and function types, the combination of generic types with polymorphic functions and implicit conversions makes the edge case where the resolver may enter an infinite loop much more common, as in the following code example: 
+\begin{lstlisting}
+forall(otype T) struct box { T x; };
+
+void f(void*); // (1)
+
+forall(otype S)
+void f(box(S)* b) { // (2)
+	f(®(void*)0®);
+}
+\end{lstlisting}
+
+The loop in the resolver happens as follows:
+\begin{itemize} 
+\item Since there is an implicit conversion from ©void*© to any pointer type, the highlighted expression can be interpreted as either a ©void*©, matching ©f // (1)©, or a ©box(S)*© for some type ©S©, matching ©f // (2)©.
+\item To determine the cost of the ©box(S)© interpretation, a type must be found for ©S© that satisfies the ©otype© implicit type assertions (assignment operator, default and copy constructors, and destructor); one option is ©box(S2)© for some type ©S2©.
+\item The assignment operator, default and copy constructors, and destructor of ©box(T)© are also polymorphic functions, each of which require the type parameter ©T© to have an assignment operator, default and copy constructors, and destructor. When choosing an interpretation for ©S2©, one option is ©box(S3)©, for some type ©S3©.
+\item The previous step repeats until stopped, with four times as much work performed at each step.
+\end{itemize}
+This problem can occur in any resolution context where a polymorphic function can satisfy its own type assertions is required for a possible interpretation of an expression with no constraints on its type, and is thus not limited to combinations of generic types with ©void*© conversions.
+However, constructors for generic types often satisfy their own assertions and a polymorphic conversion such as the ©void*© conversion to a polymorphic variable is a common way to create an expression with no constraints on its type. 
+As discussed above, the \CFA expression resolver must handle this possible infinite recursion somehow, and it occurs fairly naturally in code like the above that uses generic types. 
+
+\subsection{Tuple Types}
+\CFA adds \emph{tuple types} to C, a syntactic facility for referring to lists of values anonymously or with a single identifier. 
+An identifier may name a tuple, and a function may return one. 
+Particularly relevantly for resolution, a tuple may be implicitly \emph{destructured} into a list of values, as in the call to ©swap©:
+\begin{lstlisting}
+[char, char] x = [ '!', '?' ];  // (1)
+int x = 42;  // (2)
+
+forall(otype T) [T, T] swap( T a, T b ) { return [b, a]; }  // (3)
+
+x = swap( x ); // destructure [char, char] x into two elements of parameter list
+// cannot use int x for parameter, not enough arguments to swap
+
+void swap( int, char, char ); // (4)
+
+swap( x, x ); // resolved as (4) on (2) and (1)
+// (3) on (2) and (2) is close, but the polymorphic binding makes it not minimal-cost
+\end{lstlisting}
+Tuple destructuring means that the mapping from the position of a subexpression in the argument list to the position of a paramter in the function declaration is not straightforward, as some arguments may be expandable to different numbers of parameters, like ©x© above. 
+In the second example, the second ©x© argument can be resolved starting at the second or third parameter of ©swap©, depending which interpretation of ©x© was chosen for the first argument.
+
+\subsection{Reference Types}
+I have been designing \emph{reference types} for \CFA, in collaboration with the rest of the \CFA research team. 
+Given some type ©T©, a ©T&© (``reference to ©T©'') is essentially an automatically dereferenced pointer; with these semantics most of the C standard's discussions of lvalues can be expressed in terms of references instead, with the benefit of being able to express the difference between the reference and non-reference version of a type in user code. 
+References preserve C's existing qualifier-dropping lvalue-to-rvalue conversion (\eg a ©const volatile int&© can be implicitly converted to a bare ©int©).
+The reference proposal also adds a rvalue-to-lvalue conversion to \CFA, implemented by storing the value in a new compiler-generated temporary and passing a reference to the temporary. 
+These two conversions can chain, producing a qualifier-dropping conversion for references, for instance converting a reference to a ©const int© into a reference to a non-©const int© by copying the originally refered to value into a fresh temporary and taking a reference to this temporary, as in:
+\begin{lstlisting} 
+const int magic = 42;
+
+void inc_print( int& x ) { printf("%d\n", ++x); }
+
+print_inc( magic ); // legal; implicitly generated code in green below:
+
+¢int tmp = magic;¢ // to safely strip const-qualifier
+¢print_inc( tmp );¢ // tmp is incremented, magic is unchanged
+\end{lstlisting}
+These reference conversions may also chain with the other implicit type-conversions. 
+The main implication of the reference conversions for expression resolution is the multiplication of available implicit conversions, though given the restricted context reference conversions may be able to be treated efficiently as a special case of implicit conversions.
+
+\subsection{Special Literal Types}
+Another proposal currently under consideration for the \CFA type-system is assigning special types to the literal values ©0© and ©1©. 
+Implicit conversions from these types allow ©0© and ©1© to be considered as values of many different types, depending on context, allowing expression desugarings like ©if ( x ) {}© $\Rightarrow$ ©if ( x != 0 ) {}© to be implemented efficiently and precisely. 
+This approach is a generalization of C's existing behaviour of treating ©0© as either an integer zero or a null pointer constant, and treating either of those values as boolean false. 
+The main implication for expression resolution is that the frequently encountered expressions ©0© and ©1© may have a large number of valid interpretations.
+
+\subsection{Deleted Function Declarations}
+One final proposal for \CFA with an impact on the expression resolver is \emph{deleted function declarations}; in \CCeleven, a function declaration can be deleted as below:
+\begin{lstlisting}
+int somefn(char) = delete;
+\end{lstlisting}
+This feature is typically used in \CCeleven to make a type non-copyable by deleting its copy constructor and assignment operator\footnote{In previous versions of \CC a type could be made non-copyable by declaring a private copy constructor and assignment operator, but not defining either. This idiom is well-known, but depends on some rather subtle and \CC-specific rules about private members and implicitly-generated functions; the deleted-function form is both clearer and less verbose.}, or forbidding some interpretations of a polymorphic function by specifically deleting the forbidden overloads\footnote{Specific polymorphic function overloads can also be forbidden in previous \CC versions through use of template metaprogramming techniques, though this advanced usage is beyond the skills of many programmers. A similar effect can be produced on an ad-hoc basis at the appropriate call sites through use of casts to determine the function type. In both cases, the deleted-function form is clearer and more concise.}. 
+To add a similar feature to \CFA involves including the deleted function declarations in expression resolution along with the normal declarations, but producing a compiler error if the deleted function is the best resolution. 
+How conflicts should be handled between resolution of an expression to both a deleted and a non-deleted function is a small but open research question.
+
+\section{Expression Resolution}
+\subsection{Analysis}
+The expression resolution problem is determining an optimal match between some combination of argument interpretations and the parameter list of some overloaded instance of a function; the argument interpretations are produced by recursive invocations of expression resolution, where the base case is zero-argument functions (which are, for purposes of this discussion, semantically equivalent to named variables or constant literal expressions). 
+Assuming that the matching between a function's parameter list and a combination of argument interpretations can be done in $\bigO{p^k}$ time, where $p$ is the number of parameters and $k$ is some positive number, if there are $\bigO{i}$ valid interpretations for each subexpression, there will be $\bigO{i}$ candidate functions and $\bigO{i^p}$ possible argument combinations for each expression, so for a single recursive call expression resolution takes $\bigO{i^{p+1} \cdot p^k}$ time if it must compare all combinations, or $\bigO{i(p+1) \cdot p^k}$ time if argument-parameter matches can be chosen independently of each other. 
+Given these bounds, resolution of a single top-level expression tree of depth $d$ takes $\bigO{i^{p+1} \cdot p^{k \cdot d}}$ time under full-combination matching, or $\bigO{i(p+1) \cdot p^{k \cdot d}}$ time for independent-parameter matching\footnote{A call tree has leaves at depth $\bigO{d}$, and each internal node has $\bigO{p}$ fan-out, producing $\bigO{p^d}$ total recursive calls.}.
+
+Expression resolution is somewhat unavoidably exponential in $d$, the depth of the expression tree, and if arguments cannot be matched to parameters independently of each other, expression resolution is also exponential in $p$. 
+However, both $d$ and $p$ are fixed by the programmer, and generally bounded by reasonably small constants. 
+$k$, on the other hand, is mostly dependent on the representation of types in the system and the efficiency of type assertion checking; if a candidate argument combination can be compared to a function parameter list in linear time in the length of the list (\ie $k = 1$), then the $p^{k \cdot d}$ factor is linear in the input size of the source code for the expression, otherwise the resolution algorithm exibits sub-linear performance scaling on code containing more-deeply nested expressions.
+The number of valid interpretations of any subexpression, $i$, is bounded by the number of types in the system, which is possibly infinite, though practical resolution algorithms for \CFA must be able to place some finite bound on $i$, possibly at the expense of type-system completeness. 
+
+\subsection{Expression Costs}
+The expression resolution problem involves minimization of a cost function; loosely defined, this cost function is the number of implicit conversions in the top-level expression interpretation. 
+With more specificity, the \emph{cost} of a particular expression interpretation is a lexicographically-ordered tuple, where each element of the tuple corresponds to a particular kind of conversion. 
+In \CFA today, cost is a three-tuple including the number of unsafe conversions, the number of polymorphic parameter bindings, and the number of safe conversions. 
+These counts include conversions used in subexpression interpretations, as well as those necessary to satisfy the type assertions of any polymorphic functions included in the interpretation. 
+
+\begin{lstlisting}
+void f(char, long);  // $f_1$ - cost (2, 0, 1)
+forall(otype T) void f(T, long); // $f_2$ - cost (0, 1, 1)
+void f(long, long); // $f_{3a}$ - cost (0, 0, 2)
+void f(int, float); // $f_{3b}$ - cost (0, 0, 2)
+void f(int, long);  // $f_4$ - cost (0, 0, 1)
+
+f(7, 11);
+\end{lstlisting}
+
+In the example above, the expression resolves to $f_4$. 
+$f_1$ has an unsafe conversion (from ©int© to ©char©), and is thus the highest cost, followed by $f_2$, which has a polymorphic binding (from ©int© to ©T©). 
+Neither $f_{3a}$, $f_{3b}$, or $f_4$ match exactly with the type of the call expression (©void (*)(int, int)©), each involving safe conversions, but in this case $f_4$ is cheaper than $f_{3a}$, because it converts fewer arguments, and is also cheaper than $f_{3b}$, because ©long© is a closer match for ©int© than ©float© is. 
+If the declaration of $f_4$ was missing, the expression would be ambiguous, because the two single-step ©int©-to-©long© conversions in $f_{3a}$ cost the same as the one double-step ©int©-to-©float© conversion in $f_{3b}$.
+
+In the course of this project I may modify the cost tuple,\footnote{I have considered adding an element to distinguish between cast expressions used as conversions and those used as type ascriptions, and another element to differentiate interpretations based on closer qualifier matches. The existing costing of polymorphic functions could also be made more precice than a bare count of parameter bindings.} but the essential nature of the cost calculation should remain the same.
+
+\subsection{Objectives}
+The research goal of this project is to develop a performant expression resolver for \CFA; this analysis suggests three primary areas of investigation to accomplish that end. 
+The first area of investigation is efficient argument-parameter matching; Bilson~\cite{Bilson03} mentions significant optimization opportunities available in the current literature to improve on the existing CFA compiler.
+%TODO: look up and lit review 
+The second area of investigation is minimizing dependencies between argument-parameter matches; the current CFA compiler attempts to match entire argument combinations against functions at once, potentially attempting to match the same argument against the same parameter multiple times. 
+Whether the feature set of \CFA admits an expression resolution algorithm where arguments can be matched to parameters independently of other arguments in the same function application is an area of open research; polymorphic type paramters produce enough cross-argument dependencies that the problem is not trivial. 
+If cross-argument resolution dependencies cannot be completely eliminated, effective caching strategies to reduce duplicated work between equivalent argument-parameter matches in different combinations may mitigate the asymptotic defecits of the whole-combination matching approach. 
+The final area of investigation is heuristics and algorithmic approaches to reduce the number of argument interpretations considered in the common case; if argument-parameter matches cannot be made independent, even small reductions in $i$ should yield significant reductions in the $i^{p+1}$ resolver runtime factor. 
+
+The discussion below presents a number of largely orthagonal axes for expression resolution algorithm design to be investigated, noting prior work where applicable. 
+Though some of the proposed improvements to the expression resolution algorithm are based on heuristics rather than asymptoticly superior algorithms, it should be noted that programmers often employ idioms and other programming patterns to reduce the mental burden of producing correct code, and if these patterns can be identified and exploited by the compiler then the significant reduction in expression resolution time for common, idiomatic expressions should result in lower total compilation time even for code including difficult-to-resolve expressions that push the expression resolver to its theoretical worst case.
+
+\subsection{Argument-Parameter Matching}
+The first axis for consideration is the argument-parameter matching direction --- whether the type matching for a candidate function to a set of candidate arguments is directed by the argument types or the parameter types. 
+For programming languages without implicit conversions, argument-parameter matching is essentially the entirety of the expression resolution problem, and is generally referred to as ``overload resolution'' in the literature.
+All expression-resolution algorithms form a DAG of interpretations, some explicitly, some implicitly; in this DAG, arcs point from function-call interpretations to argument interpretations, as in Figure~\ref{fig:res_dag}:
+\begin{figure}[h]
+\centering
+\begin{subfigure}[h]{2in}
+\begin{lstlisting}
+int *p;  // $p_i$
+char *p; // $p_c$ 
+
+double *f(int*, int*); // $f_d$
+char *f(char*, int*); // $f_c$
+
+f( f( p, p ), p );
+\end{lstlisting}
+\end{subfigure}~\begin{subfigure}[h]{2in}
+\includegraphics{resolution_dag}
+\end{subfigure}
+\caption{Resolution DAG for a simple expression. Functions that do not have a valid argument matching are covered with an \textsf{X}.}\label{fig:res_dag}
+\end{figure}
+
+Note that some interpretations may be part of more than one super-interpretation, as with the second $p_i$ in the bottom row, while some valid subexpression interpretations, like $f_d$ in the middle row, are not used in any interpretation of their superexpression.
+
+\subsubsection{Argument-directed (Bottom-up)}
+Baker's algorithm for expression resolution~\cite{Baker82} pre-computes argument candidates, from the leaves of the expression tree up.
+For each candidate function, Baker attempts to match argument types to parameter types in sequence, failing if any parameter cannot be matched.
+
+Bilson~\cite{Bilson03} similarly pre-computes argument candidates in the original \CFA compiler, but then explicitly enumerates all possible argument combinations for a multi-parameter function; these argument combinations are matched to the parameter types of the candidate function as a unit rather than individual arguments.
+This approach is less efficient than Baker's approach, as the same argument may be compared to the same parameter many times, but allows a more straightforward handling of polymorphic type-binding and multiple return-types.
+It is possible the efficiency losses here relative to Baker could be significantly reduced by keeping a memoized cache of argument-parameter type comparisons and reading previously-seen argument-parameter matches from this cache rather than recomputing them.
+
+\subsubsection{Parameter-directed (Top-down)}
+Unlike Baker and Bilson, Cormack's algorithm~\cite{Cormack81} requests argument candidates that match the type of each parameter of each candidate function, from the top-level expression down; memoization of these requests is presented as an optimization.
+As presented, this algorithm requires the result of the expression to have a known type, though an algorithm based on Cormack's could reasonably request a candidate set of any return type, though such a set may be quite large.
+
+\subsubsection{Hybrid}
+This proposal includes the investigation of hybrid top-down/bottom-up argument-parameter matching.
+A reasonable hybrid approach might take a top-down approach when the expression to be matched has a fixed type, and a bottom-up approach in untyped contexts.
+This approach may involve switching from one type to another at different levels of the expression tree. 
+For instance, in:
+\begin{lstlisting}
+forall(otype T)
+int f(T x);  // (1)
+
+void* f(char y);  // (2)
+
+int x = f( f( '!' ) );
+\end{lstlisting}
+the outer call to ©f© must have a return type that is (implicitly convertable to) ©int©, so a top-down approach is used to select \textit{(1)} as the proper interpretation of ©f©. \textit{(1)}'s parameter ©x©, however, is an unbound type variable, and can thus take a value of any complete type, providing no guidance for the choice of candidate for the inner call to ©f©. The leaf expression ©'!'©, however, determines a zero-cost interpretation of the inner ©f© as \textit{(2)}, providing a minimal-cost expression resolution where ©T© is bound to ©void*©.
+
+Deciding when to switch between bottom-up and top-down resolution to minimize wasted work in a hybrid algorithm is a necessarily heuristic process, and finding good heuristics for which subexpressions to swich matching strategies on is an open question.
+One reasonable approach might be to set a threshold $t$ for the number of candidate functions, and to use top-down resolution for any subexpression with fewer than $t$ candidate functions, to minimize the number of unmatchable argument interpretations computed, but to use bottom-up resolution for any subexpression with at least $t$ candidate functions, to reduce duplication in argument interpretation computation between the different candidate functions. 
+
+Ganzinger and Ripken~\cite{Ganzinger80} propose an approach (later refined by Pennello~\etal~\cite{Pennello80}) that uses a top-down filtering pass followed by a bottom-up filtering pass to reduce the number of candidate interpretations; they prove that for the Ada programming language a small number of such iterations is sufficient to converge to a solution for the expression resolution problem. 
+Persch~\etal~\cite{PW:overload} developed a similar two-pass approach where the bottom-up pass is followed by the top-down pass. 
+These algorithms differ from the hybrid approach under investigation in that they take multiple passes over the expression tree to yield a solution, and that they also apply both filtering heuristics to all expression nodes; \CFA's polymorphic functions and implicit conversions make the approach of filtering out invalid types taken by all of these algorithms infeasible.
+
+\subsubsection{Common Subexpression Caching}
+With any of these argument-parameter approaches, it may be a useful optimization to cache the resolution results for common subexpressions; in Figure~\ref{fig:res_dag} this optimization would result in the list of interpretations $[p_c, p_i]$ for ©p© only being calculated once, and re-used for each of the three instances of ©p©.
+
+\subsection{Implicit Conversion Application}
+With the exception of Bilson, the authors mentioned above do not account for implicit conversions in their algorithms\footnote{Baker does briefly comment on an approach for handling implicit conversions, but does not provide an implementable algorithm.}; all assume that there is at most one valid interpretation of a given expression for each distinct type. 
+Integrating implicit conversion handling into the presented argument-parameter matching algorithms thus provides some choice of implementation approach.
+
+Inference of polymorphic type variables can be considered a form of implicit conversion application, where monomorphic types are implicitly converted to instances of some polymorphic type\footnote{This ``conversion'' may not be implemented in any explicit way at runtime, but does need to be handled by the expression resolver as an inexact match between argument and parameter types.}. 
+This form of implicit conversion is particularly common in functional languages; Haskell's type classes~\cite{typeclass} are a particularly well-studied variant of this inference. 
+However, type classes arguably do not allow name overloading, as (at least in the Haskell implmentation) identifiers belonging to type classes may not be overloaded in any other context than an implementation of that type class; this provides a single (possibly polymorphic) interpretation of any identifier, simplifing the expression resolution problem relative to \CFA. 
+\CC~\cite{ANSI98:C++} includes both name overloading and implicit conversions in its expression resolution specification, though unlike \CFA it does complete type-checking on a generated monomorphization of template functions, where \CFA simply checks a list of type constraints. 
+The upcoming Concepts standard~\cite{C++concepts} defines a system of type constraints similar in principle to \CFA's.
+Cormack and Wright~\cite{Cormack90} present an algorithm that integrates overload resolution with a polymorphic type inference approach very similar to \CFA's.
+However, their algorithm does not account for implicit conversions other than polymorphic type binding and their discussion of their overload resolution algorithm is not sufficiently detailed to classify it with the other argument-parameter matching approaches\footnote{Their overload resolution algorithm is possibly a variant of Ganzinger and Ripken~\cite{Ganzinger80} or Pennello~\etal~\cite{Pennello80}, modified to allow for polymorphic type binding.}.
+
+\subsubsection{On Parameters}
+Bilson does account for implicit conversions in his algorithm, but it is unclear if the approach is optimal. 
+His algorithm integrates checking for valid implicit conversions into the argument-parameter-matching step, essentially trading more expensive matching for a smaller number of argument interpretations. 
+This approach may result in the same subexpression being checked for a type match with the same type multiple times, though again memoization may mitigate this cost; however, this approach does not generate implicit conversions that are not useful to match the containing function.
+
+\subsubsection{On Arguments}
+Another approach is to generate a set of possible implicit conversions for each set of interpretations of a given argument. 
+This approach has the benefit of detecting ambiguous interpretations of arguments at the level of the argument rather than its containing call, never finds more than one interpretation of the argument with a given type, and re-uses calculation of implicit conversions between function candidates. 
+On the other hand, this approach may unnecessarily generate argument interpretations that never match any parameter, wasting work. 
+Furthermore, in the presence of tuple types, this approach may lead to a combinatorial explosion of argument interpretations considered, unless the tuple can be considered as a sequence of elements rather than a unified whole. 
+
+\subsection{Candidate Set Generation}
+All the algorithms discussed to this point generate the complete set of candidate argument interpretations before attempting to match the containing function-call expression. 
+However, given that the top-level expression interpretation that is ultimately chosen is the minimal-cost valid interpretation, any consideration of non-minimal-cost interpretations is wasted work.
+Under the assumption that programmers generally write function calls with relatively low-cost interpretations, a possible work-saving heuristic is to generate only the lowest-cost argument interpretations first, attempt to find a valid top-level interpretation using them, and only if that fails generate the next higher-cost argument interpretations.
+
+\subsubsection{Eager}
+Within the eager approach taken by the existing top-down and bottom-up algorithms, there are still variants to explore. 
+Cormack and Baker do not account for implict conversions, and thus do not account for the possibility of multiple valid interpretations with distinct costs; Bilson, on the other hand, sorts the list of interpretations to aid in finding minimal-cost interpretations. 
+Sorting the lists of argument or function call interpretations by cost at some point during resolution may provide useful opportunities to short-circuit expression evaluation when a minimal-cost interpretation is found, though it is unclear if this short-circuiting behaviour justifies the cost of the sort.
+
+\subsubsection{Lazy}
+In the presence of implicit conversions, many argument interpretations may match a given parameter by application of an appropriate implicit conversion. 
+However, if programmers actually use relatively few implicit conversions, then the ``on arguments'' approach to implicit conversions generates a large number of high-cost interpretations that may never be used. 
+Even if the ``on parameters'' approach to implicit conversions is used, eager generation of interpretations spends extra time attempting possibly expensive polymorphic or conversion-based matches in cases where an exact monomorphic interpretation exists. 
+
+The essence of the lazy approach to candidate set generation is to wrap the matching algorithm into the element generator of a lazy list, only generating as few elements at a time to ensure the next-smallest-cost interpretation has been generated. 
+Assuming argument interpretations are provided to the parameter matching algorithm in sorted order, a sorted list of function call interpretations can be produced by generating combinations of arguments sorted by total cost\footnote{I have already developed a lazy $n$-way combination generation algorithm to perform this task.}, then generating function call interpretations in the order suggested by this list. 
+The function call interpretation chosen may have costs of its own, for instance polymorphic type binding, so in some cases a number of argument combinations (any combination whose marginal cost does not exceed the cost of the function call interpretation itself) may need to be considered to determine the next-smallest-cost function call interpretation.
+Ideally, this candidate generation approach leads to very few unused candidates being generated (in the expected case where the programmer has, in fact, provided a validly-typable program), but it is an open question whether or not the overheads of lazy generation exceed the benefit produced from considering fewer interpretations.
+
+\subsubsection{Stepwise Lazy}
+As a compromise between the trade-offs of the eager and lazy approaches, I also propose to investigate a ``stepwise lazy'' approach, where all the interpretations for some ``step'' are eagerly generated, then the interpretations in the later steps are only generated on demand. 
+Under this approach the \CFA resolver could, for instance, try expression interpretations in the following order:
+\begin{enumerate}
+\item Interpretations with no polymorphic type binding or implicit conversions.
+\item Interpretations containing no polymorphic type binding and at least one safe implicit conversion.
+\item Interpretations containing polymorphic type binding, but only safe implicit conversions.
+\item Interpretations containing at least one unsafe implicit conversion.
+\end{enumerate} 
+If a valid expression interpretation is found in one step, it is guaranteed to be lower-cost than any interpretation in a later step (by the structure of \CFA interpretation costs), so no further steps need be considered.
+This approach may save significant amounts of work, especially given that the first steps avoid potentially expensive handling of implicit conversions and type assertion satisfaction entirely, and cover a large proportion of common monomorphic code.
+
+%\subsection{Parameter-Directed}
+%\textbf{TODO: Richard's algorithm isn't Baker (Cormack?), disentangle from this section \ldots}. 
+%The expression resolution algorithm used by the existing iteration of CFA is based on Baker's\cite{Baker82} algorithm for overload resolution in Ada. 
+%The essential idea of this algorithm is to first find the possible interpretations of the most deeply nested subexpressions, then to use these interpretations to recursively generate valid interpretations of their superexpressions. 
+%To simplify matters, the only expressions considered in this discussion of the algorithm are function application and literal expressions; other expression types can generally be considered to be variants of one of these for the purposes of the resolver, \eg variables are essentially zero-argument functions. 
+%If we consider expressions as graph nodes with arcs connecting them to their subexpressions, these expressions form a DAG, generated by the algorithm from the bottom up.
+%Literal expressions are represented by leaf nodes, annotated with the type of the expression, while a function application will have a reference to the function declaration chosen, as well as arcs to the interpretation nodes for its argument expressions; functions are annotated with their return type (or types, in the case of multiple return values).
+%
+%\textbf{TODO: Figure}
+%
+%Baker's algorithm was designed to account for name overloading; Richard Bilson\cite{Bilson03} extended this algorithm to also handle polymorphic functions, implicit conversions and multiple return types when designing the original \CFA compiler. 
+%The core of the algorithm is a function which Baker refers to as $gen\_calls$. 
+%$gen\_calls$ takes as arguments the name of a function $f$ and a list containing the set of possible subexpression interpretations $S_j$ for each argument of the function and returns a set of possible interpretations of calling that function on those arguments. 
+%The subexpression interpretations are generally either singleton sets generated by the single valid interpretation of a literal expression, or the results of a previous call to $gen\_calls$. 
+%If there are no valid interpretations of an expression, the set returned by $gen\_calls$ will be empty, at which point resolution can cease, since each subexpression must have at least one valid interpretation to produce an interpretation of the whole expression. 
+%On the other hand, if for some type $T$ there is more than one valid interpretation of an expression with type $T$, all interpretations of that expression with type $T$ can be collapsed into a single \emph{ambiguous expression} of type $T$, since the only way to disambiguate expressions is by their return types. 
+%If a subexpression interpretation is ambiguous, than any expression interpretation containing it will also be ambiguous. 
+%In the variant of this algorithm including implicit conversions, the interpretation of an expression as type $T$ is ambiguous only if there is more than one \emph{minimal-cost} interpretation of the expression as type $T$, as cheaper expressions are always chosen in preference to more expensive ones.
+%
+%Given this description of the behaviour of $gen\_calls$, its implementation is quite straightforward: for each function declaration $f_i$ matching the name of the function, consider each of the parameter types $p_j$ of $f_i$, attempting to match the type of an element of $S_j$ to $p_j$ (this may include checking of implicit conversions).
+%If no such element can be found, there is no valid interpretation of the expression using $f_i$, while if more than one such (minimal-cost) element is found than an ambiguous interpretation with the result type of $f_i$ is produced. 
+%In the \CFA variant, which includes polymorphic functions, it is possible that a single polymorphic function definition $f_i$ can produce multiple valid interpretations by different choices of type variable bindings; these interpretations are unambiguous so long as the return type of $f_i$ is different for each type binding. 
+%If all the parameters $p_j$ of $f_i$ can be uniquely matched to a candidate interpretation, then a valid interpretation based on $f_i$ and those $p_j$ is produced. 
+%$gen\_calls$ collects the produced interpretations for each $f_i$ and returns them; a top level expression is invalid if this list is empty, ambiguous if there is more than one (minimal-cost) result, or if this single result is ambiguous, and valid otherwise.
+%
+%In this implementation, resolution of a single top-level expression takes time $O(\ldots)$, where \ldots. \textbf{TODO:} \textit{Look at 2.3.1 in Richard's thesis when working out complexity; I think he does get the Baker algorithm wrong on combinations though, maybe\ldots}
+%
+%\textbf{TODO: Basic Lit Review} \textit{Look at 2.4 in Richard's thesis for any possible more-recent citations of Baker\ldots} \textit{Look back at Baker's related work for other papers that look similar to what you're doing, then check their citations as well\ldots} \textit{Look at Richard's citations in 2.3.2 w.r.t. type data structures\ldots}
+%\textit{CormackWright90 seems to describe a solution for the same problem, mostly focused on how to find the implicit parameters}
+
+\section{Proposal}
+Baker~\cite{Baker82} discussed various expression resolution algorithms that can handle name overloading, but left experimental comparison of those algorithms to future work; Bilson~\cite{Bilson03} described one extension of Baker's algorithm to handle implicit conversions, but did not fully explore the space of algorithmic approaches to handle both overloaded names and implicit conversions. 
+This project is intended to experimentally test a number of expression resolution algorithms that are powerful enough to handle the \CFA type-system, including both name overloading and implicit conversions. 
+This comparison closes Baker's open research question, as well as potentially improving Bilson's \CFA compiler.
+
+Rather than testing all of these algorithms in-place in the \CFA compiler, a resolver prototype is being developed that acts on a simplified input language encapsulating the essential details of the \CFA type-system\footnote{Note this simplified input language is not a usable programming language.}. 
+Multiple variants of this resolver prototype will be implemented, each encapsulating a different expression resolution variant, sharing as much code as feasible. 
+These variants will be instrumented to test runtime performance, and run on a variety of input files; the input files may be generated programmatically or from exisiting code in \CFA or similar languages.
+These experimental results should make it possible to determine the algorithm likely to be most performant in practical use, and replace CFA's existing expression resolver. 
+
+The experimental results will also provide some empirical sense of the compile-time cost of various language features by comparing the results of the most performant resolver variant that supports a feature with the most performant resolver variant that does not support that feature, a useful capability to guide language design. 
+As an example, there are currently multiple open proposals for how implicit conversions should interact with polymorphic type binding in \CFA, each with distinct levels of expressive power; if the resolver prototype is modified to support each proposal, the optimal algorithm for each proposal can be compared, providing an empirical demonstration of the trade-off between expressive power and compiler runtime. 
+
+This proposed project should provide valuable data on how to implement a performant compiler for programming languages such as \CFA with powerful static type-systems, specifically targeting the feature interaction between name overloading and implicit conversions. 
+This work is not limited in applicability to \CFA, but may also be useful for supporting efficient compilation of the upcoming Concepts standard~\cite{C++concepts} for \CC template constraints, for instance. 
+
+\appendix
+\section{Completion Timeline}
+The following is a preliminary estimate of the time necessary to complete the major components of this research project:
+\begin{center}
+\begin{tabular}{ | r @{--} l | p{4in} | }
+\hline       May 2015 & April 2016   & Project familiarization and generic types design and implementation. \\
+\hline       May 2016 & April 2017   & Design and implement resolver prototype and run performance experiments. \\
+\hline       May 2017 & August 2017  & Integrate new language features and best-performing resolver prototype into CFA. \\
+\hline September 2017 & January 2018 & Thesis writing and defense. \\
+\hline
+\end{tabular}
+\end{center}
+
+\addcontentsline{toc}{section}{\refname}
+\bibliographystyle{plain}
+\bibliography{cfa}
+
+%\addcontentsline{toc}{section}{\indexname} % add index name to table of contents
+%\begin{theindex}
+%Italic page numbers give the location of the main entry for the referenced term.
+%Plain page numbers denote uses of the indexed term.
+%Entries for grammar non-terminals are italicized.
+%A typewriter font is used for grammar terminals and program identifiers.
+%\indexspace
+%\input{comp_II.ind}
+%\end{theindex}
+
+\end{document}
Index: doc/theses/rob/.gitignore
===================================================================
--- doc/theses/rob/.gitignore	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,17 +1,0 @@
-# generated by latex
-*.aux
-*.bbl
-*.blg
-*.brf
-*.dvi
-*.idx
-*.ilg
-*.ind
-*.log
-*.out
-*.pdf
-*.ps
-*.toc
-*.lof
-*.lot
-*.synctex.gz
Index: doc/theses/rob/Makefile
===================================================================
--- doc/theses/rob/Makefile	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,18 +1,0 @@
-TeXLIB = .:../LaTeXmacros:../bibliography/:
-LaTeX  = TEXINPUTS=${TeXLIB} && export TEXINPUTS && pdflatex -halt-on-error
-BibTeX = BIBINPUTS=${TeXLIB} && export BIBINPUTS && bibtex
-
-all : thesis.pdf
-
-thesis.pdf : Makefile ../LaTeXmacros/common.tex cfa-format.tex thesis.tex intro.tex ctordtor.tex tuples.tex variadic.tex conclusions.tex
-	${LaTeX} thesis
-	${BibTeX} thesis
-	${LaTeX} thesis
-	${LaTeX} thesis
-	pdf2ps thesis.pdf thesis.ps
-
-clean :
-	rm -f *.aux *.bbl *.blg *.lof *.log *.lot *.out *.toc
-
-spotless : clean
-	rm -f thesis.pdf thesis.ps
Index: doc/theses/rob/cfa-format.tex
===================================================================
--- doc/theses/rob/cfa-format.tex	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,227 +1,0 @@
-% \usepackage{xcolor}
-% \usepackage{listings}
-% \usepackage{booktabs}
-% \usepackage{array}
-% \newcolumntype{?}{!{\vrule width 1pt}} % thick vertical line
-
-
-% like Mac Classic or iPlastic
-% \definecolor{basicCol}{HTML}{000000}
-% \definecolor{commentCol}{HTML}{0066FF}
-% \definecolor{stringCol}{HTML}{036A07}
-% \definecolor{keywordCol}{HTML}{0000FF}
-% \definecolor{identifierCol}{HTML}{318495}
-
-% like Visual Studio 2010
-% \definecolor{basicCol}{HTML}{000000}
-% \definecolor{commentCol}{HTML}{006400}
-% \definecolor{stringCol}{HTML}{A31515}
-% \definecolor{keywordCol}{HTML}{0000FF}
-% \definecolor{identifierCol}{HTML}{000000}
-
-\definecolor{basicCol}{HTML}{000000}
-\definecolor{commentCol}{HTML}{000000}
-\definecolor{stringCol}{HTML}{000000}
-\definecolor{keywordCol}{HTML}{000000}
-\definecolor{identifierCol}{HTML}{000000}
-
-% from https://gist.github.com/nikolajquorning/92bbbeef32e1dd80105c9bf2daceb89a
-\lstdefinelanguage{sml} {
-  morekeywords= {
-    EQUAL, GREATER, LESS, NONE, SOME, abstraction, abstype, and, andalso, array, as, before, bool, case, char, datatype, do, else, end, eqtype, exception, exn, false, fn, fun, functor, handle, if, in, include, infix, infixr, int, let, list, local, nil, nonfix, not, o, of, op, open, option, orelse, overload, print, raise, real, rec, ref, sharing, sig, signature, string, struct, structure, substring, then, true, type, unit, val, vector, where, while, with, withtype, word
-  },
-  morestring=[b]",
-  morecomment=[s]{(*}{*)},
-}
-
-\lstdefinelanguage{D}{
-  % Keywords
-  morekeywords=[1]{
-    abstract, alias, align, auto, body, break, cast, catch, class, const,
-    continue, debug, delegate, delete, deprecated, do, else, enum, export,
-    false, final, finally, for, foreach, foreach_reverse, function, goto, if,
-    immutable, import, in, inout, interface, invariant, is, lazy, macro, mixin,
-    module, new, nothrow, null, out, override, package, pragma, private,
-    protected, public, pure, ref, return, shared, static, struct, super,
-    switch, synchronized, template, this, throw, true, try, typedef, typeid,
-    typeof, union, unittest, volatile, while, with
-  },
-  % Special identifiers, common functions
-  morekeywords=[2]{enforce},
-  % Ugly identifiers
-  morekeywords=[3]{
-    __DATE__, __EOF__, __FILE__, __LINE__, __TIMESTAMP__, __TIME__, __VENDOR__,
-    __VERSION__, __ctfe, __gshared, __monitor, __thread, __vptr, _argptr,
-    _arguments, _ctor, _dtor
-  },
-  % Basic types
-  morekeywords=[4]{
-     byte, ubyte, short, ushort, int, uint, long, ulong, cent, ucent, void,
-     bool, bit, float, double, real, ushort, int, uint, long, ulong, float,
-     char, wchar, dchar, string, wstring, dstring, ireal, ifloat, idouble,
-     creal, cfloat, cdouble, size_t, ptrdiff_t, sizediff_t, equals_t, hash_t
-  },
-  % Strings
-  morestring=[b]{"},
-  morestring=[b]{'},
-  morestring=[b]{`},
-  % Comments
-  comment=[l]{//},
-  morecomment=[s]{/*}{*/},
-  morecomment=[s][\color{blue}]{/**}{*/},
-  morecomment=[n]{/+}{+/},
-  morecomment=[n][\color{blue}]{/++}{+/},
-  % Options
-  sensitive=true
-}
-
-\lstdefinelanguage{rust}{
-  % Keywords
-  morekeywords=[1]{
-    abstract, alignof, as, become, box,
-    break, const, continue, crate, do,
-    else, enum, extern, false, final,
-    fn, for, if, impl, in,
-    let, loop, macro, match, mod,
-    move, mut, offsetof, override, priv,
-    proc, pub, pure, ref, return,
-    Self, self, sizeof, static, struct,
-    super, trait, true,  type, typeof,
-    unsafe, unsized, use, virtual, where,
-    while, yield
-  },
-  % Strings
-  morestring=[b]{"},
-  % Comments
-  comment=[l]{//},
-  morecomment=[s]{/*}{*/},
-  % Options
-  sensitive=true
-}
-
-\newcommand{\KWC}{K-W C\xspace}
-
-\renewcommand{\ttdefault}{pcr}
-
-\newcommand{\basicstylesmall}{\scriptsize\ttfamily\color{basicCol}}
-
-\lstdefinestyle{defaultStyle}{
-  escapeinside={@@},
-  basicstyle=\footnotesize\ttfamily\color{basicCol},
-  keywordstyle=\bfseries\color{keywordCol},
-  commentstyle=\itshape\color{commentCol},
-  identifierstyle=\color{identifierCol},
-  stringstyle=\color{stringCol},
-  mathescape=true,
-  columns=fixed,
-  aboveskip=4pt,                                  % spacing above/below code block
-  belowskip=3pt,
-  keepspaces=true,
-  frame=lines,
-  literate=,
-  showlines=true,                                 % show blank lines at end of code
-  showspaces=false,
-  showstringspaces=false,
-  escapechar=\$,
-  xleftmargin=\parindentlnth,                     % indent code to paragraph indentation
-  moredelim=[is][\color{red}\bfseries]{**R**}{**R**},    % red highlighting
-  % moredelim=* detects keywords, comments, strings, and other delimiters and applies their formatting
-  % moredelim=** allows cumulative application
-}
-\lstset{
-  language = CFA,
-  style=defaultStyle
-}
-\lstMakeShortInline[basewidth=0.5em,breaklines=true,breakatwhitespace,basicstyle=\normalsize\ttfamily\color{basicCol}]@  % single-character for \lstinline
-
-\lstnewenvironment{cfacode}[1][]{
-  \lstset{
-    language = CFA,
-    style=defaultStyle,
-    #1
-    % belowcaptionskip=1\baselineskip,
-    % breaklines=true,
-    % frame=L,
-  }
-}{}
-
-\lstnewenvironment{cppcode}[1][]{
-  \lstset{
-    language = c++,
-    style=defaultStyle,
-    #1
-  }
-}{}
-
-\lstnewenvironment{javacode}[1][]{
-  \lstset{
-    language = java,
-    style=defaultStyle,
-    #1
-  }
-}{}
-
-\lstnewenvironment{scalacode}[1][]{
-  \lstset{
-    language = scala,
-    style=defaultStyle,
-    #1
-  }
-}{}
-
-\lstnewenvironment{smlcode}[1][]{
-  \lstset{
-    language = sml,
-    style=defaultStyle,
-    #1
-  }
-}{}
-
-\lstnewenvironment{dcode}[1][]{
-  \lstset{
-    language = D,
-    style=defaultStyle,
-    #1
-  }
-}{}
-
-\lstnewenvironment{rustcode}[1][]{
-  \lstset{
-    language = rust,
-    style=defaultStyle,
-    #1
-  }
-}{}
-
-\newcommand{\zero}{\lstinline{zero_t}\xspace}
-\newcommand{\one}{\lstinline{one_t}\xspace}
-\newcommand{\ateq}{\lstinline{\@=}\xspace}
-
-\newenvironment{newtext}{\color{red}}{\ignorespacesafterend}
-
-% \lstset{ %
-%   backgroundcolor=\color{white},
-%   basicstyle=\footnotesize,
-%   breakatwhitespace=false,
-%   breaklines=true,
-%   captionpos=b,
-%   commentstyle=\color{mygreen},
-%   escapeinside={\%*}{*)},
-%   extendedchars=true,
-%   frame=single,
-%   keywordstyle=\color{blue},
-%   language=Prolog,
-%   numbers=left,
-%   numbersep=5pt,
-%   numberstyle=\tiny\color{mygray},
-%   rulecolor=\color{black},
-%   showspaces=false,
-%   showstringspaces=false,
-%   showtabs=false,
-%   stepnumber=2,
-%   stringstyle=\color{mymauve},
-%   tabsize=2,
-%   title=\lstname,
-%   morekeywords={not,\},\{,preconditions,effects },
-%   deletekeywords={time}
-% }
Index: doc/theses/rob/conclusions.tex
===================================================================
--- doc/theses/rob/conclusions.tex	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,285 +1,0 @@
-%======================================================================
-\chapter{Conclusions}
-%======================================================================
-
-Adding resource management and tuples to \CFA has been a challenging design, engineering, and implementation exercise.
-On the surface, the work may appear as a rehash of similar mechanisms in \CC.
-However, every added feature is different than its \CC counterpart, often with extended functionality, better integration with C and its programmers, and always supports separate compilation.
-All of these new features are being used extensively by the \CFA development-team to build the \CFA runtime system.
-In particular, the concurrency system is built on top of RAII, library functions @new@ and @delete@ are used to manage dynamically allocated objects, and tuples are used to provide uniform interfaces to C library routines such as @div@ and @remquo@.
-
-\section{Constructors and Destructors}
-\CFA supports the RAII idiom using constructors and destructors.
-There are many engineering challenges in introducing constructors and destructors, partially since \CFA is not an object-oriented language.
-By making use of managed types, \CFA programmers are afforded an extra layer of safety and ease of use in comparison to C programmers.
-While constructors and destructors provide a sensible default behaviour, \CFA allows experienced programmers to declare unmanaged objects to take control of object management for performance reasons.
-Constructors and destructors as named functions fit the \CFA polymorphism model perfectly, allowing polymorphic code to use managed types seamlessly.
-
-\section{Tuples}
-\CFA can express functions with multiple return values in a way that is simple, concise, and safe.
-The addition of multiple-return-value functions naturally requires a way to use multiple return values, which begets tuple types.
-Tuples provide two useful notions of assignment: multiple assignment, allowing simple, yet expressive assignment between multiple variables, and mass assignment, allowing a lossless assignment of a single value across multiple variables.
-Tuples have a flexible structure that allows the \CFA type-system to decide how to restructure tuples, making it syntactically simple to pass tuples between functions.
-Tuple types can be combined with polymorphism and tuple conversions can apply during assertion inference to produce a cohesive feel.
-
-\section{Variadic Functions}
-Type-safe variadic functions, with a similar feel to variadic templates, are added to \CFA.
-The new variadic functions can express complicated recursive algorithms.
-Unlike variadic templates, it is possible to write @new@ as a library routine and to separately compile @ttype@ polymorphic functions.
-Variadic functions are statically type checked and provide a user experience that is consistent with that of tuples and polymorphic functions.
-
-\section{Future Work}
-\subsection{Constructors and Destructors}
-Both \CC and Rust support move semantics, which expand the user's control of memory management by providing the ability to transfer ownership of large data, rather than forcing potentially expensive copy semantics.
-\CFA currently does not support move semantics, partially due to the complexity of the model.
-The design space is currently being explored with the goal of finding an alternative to move semantics that provides necessary performance benefits, while reducing the amount of repetition required to create a new type, along with the cognitive burden placed on the user.
-
-% One technique being evaluated is whether named return-values can be used to eliminate unnecessary temporaries \cite{Buhr94a}.
-% For example,
-% \begin{cfacode}
-% struct A { ... };
-% [A x] f(A x);
-% [A y] g(A y);
-% [A z] h(A z);
-
-% struct A a1, a2;
-% a2 = h(g(f(a1)));
-% \end{cfacode}
-% Here, since both @f@'s argument and return value have the same name and type, the compiler can infer that @f@ returns its argument.
-% With this knowledge, the compiler can reuse the storage for the argument to @f@ as the argument to @g@.  % TODO: cite Till thesis?
-
-Exception handling is among the features expected to be added to \CFA in the near future.
-For exception handling to properly interact with the rest of the language, it must ensure all RAII guarantees continue to be met.
-That is, when an exception is raised, it must properly unwind the stack by calling the destructors for any objects that live between the raise and the handler.
-This can be accomplished either by augmenting the translator to properly emit code that executes the destructors, or by switching destructors to hook into the GCC @cleanup@ attribute \cite[6.32.1]{GCCExtensions}.
-
-The @cleanup@ attribute, which is attached to a variable declaration, takes a function name as an argument and schedules that routine to be executed when the variable goes out of scope.
-\begin{cfacode}
-struct S { int x; };
-void __dtor_S(struct S *);
-{
-  __attribute__((cleanup(__dtor_S))) struct S s;
-} // calls __dtor_S(&s)
-\end{cfacode}
-This mechanism is known and understood by GCC, so that the destructor is properly called in any situation where a variable goes out of scope, including function returns, branches, and built-in GCC exception handling mechanisms using libunwind.
-
-A caveat of this approach is that the @cleanup@ attribute only permits a function that consumes a single argument of type @T *@ for a variable of type @T@.
-This restriction means that any destructor that consumes multiple arguments (\eg, because it is polymorphic) or any destructor that is a function pointer (\eg, because it is an assertion parameter) must be called through a local thunk.
-For example,
-\begin{cfacode}
-forall(otype T)
-struct Box {
-  T x;
-};
-forall(otype T) void ^?{}(Box(T) * x); // has implicit parameters
-
-forall(otype T)
-void f(T x) {
-  T y = x;  // destructor is a function-pointer parameter
-  Box(T) z = { x }; // destructor has multiple parameters
-}
-\end{cfacode}
-currently generates the following
-\begin{cfacode}
-void _dtor_BoxT(  // consumes more than 1 parameter due to assertions
-  void (*_adapter_PTT)(void (*)(), void *, void *),
-  void (*_adapter_T_PTT)(void (*)(), void *, void *, void *),
-  long unsigned int _sizeof_T,
-  long unsigned int _alignof_T,
-  void *(*_assign_T_PTT)(void *, void *),
-  void (*_ctor_PT)(void *),
-  void (*_ctor_PTT)(void *, void *),
-  void (*_dtor_PT)(void *),
-  void *x
-);
-
-void f(
-  void (*_adapter_PTT)(void (*)(), void *, void *),
-  void (*_adapter_T_PTT)(void (*)(), void *, void *, void *),
-  long unsigned int _sizeof_T,
-  long unsigned int _alignof_T,
-  void *(*_assign_TT)(void *, void *),
-  void (*_ctor_T)(void *),
-  void (*_ctor_TT)(void *, void *),
-  void (*_dtor_T)(void *),
-  void *x
-){
-  void *y = __builtin_alloca(_sizeof_T);
-  // constructor call elided
-
-  // generic layout computation elided
-  long unsigned int _sizeof_BoxT = ...;
-  void *z = __builtin_alloca(_sizeof_BoxT);
-  // constructor call elided
-
-  _dtor_BoxT(  // ^?{}(&z); -- _dtor_BoxT has > 1 arguments
-    _adapter_PTT,
-    _adapter_T_PTT,
-    _sizeof_T,
-    _alignof_T,
-    _assign_TT,
-    _ctor_T,
-    _ctor_TT,
-    _dtor_T,
-    z
-  );
-  _dtor_T(y);  // ^?{}(&y); -- _dtor_T is a function pointer
-}
-\end{cfacode}
-Further to this point, every distinct array type will require a thunk for its destructor, where array destructor code is currently inlined, since array destructors hard code the length of the array.
-
-For function call temporaries, new scopes have to be added for destructor ordering to remain consistent.
-In particular, the translator currently destroys argument and return value temporary objects as soon as the statement they were created for ends.
-In order for this behaviour to be maintained, new scopes have to be added around every statement that contains a function call.
-Since a nested expression can raise an exception, care must be taken when destroying temporary objects.
-One way to achieve this is to split statements at every function call, to provide the correct scoping to destroy objects as necessary.
-For example,
-\begin{cfacode}
-struct S { ... };
-void ?{}(S *, S);
-void ^?{}(S *);
-
-S f();
-S g(S);
-
-g(f());
-\end{cfacode}
-would generate
-\begin{cfacode}
-struct S { ... };
-void _ctor_S(struct S *, struct S);
-void _dtor_S(struct S *);
-
-{
-  __attribute__((cleanup(_dtor_S))) struct S _tmp1 = f();
-  __attribute__((cleanup(_dtor_S))) struct S _tmp2 =
-    (_ctor_S(&_tmp2, _tmp1), _tmp2);
-  __attribute__((cleanup(_dtor_S))) struct S _tmp3 = g(_tmp2);
-} // destroy _tmp3, _tmp2, _tmp1
-\end{cfacode}
-Note that destructors must be registered after the temporary is fully initialized, since it is possible for initialization expressions to raise exceptions, and a destructor should never be called on an uninitialized object.
-This requires a slightly strange looking initializer for constructor calls, where a comma expression is used to produce the value of the object being initialized, after the constructor call, conceptually bitwise copying the initialized data into itself.
-Since this copy is wholly unnecessary, it is easily optimized away.
-
-A second approach is to attach an accompanying boolean to every temporary that records whether the object contains valid data, and thus whether the value should be destructed.
-\begin{cfacode}
-struct S { ... };
-void _ctor_S(struct S *, struct S);
-void _dtor_S(struct S *);
-
-struct _tmp_bundle_S {
-  bool valid;
-  struct S value;
-};
-
-void _dtor_tmpS(struct _tmp_bundle_S * ret) {
-  if (ret->valid) {
-    _dtor_S(&ret->value);
-  }
-}
-
-{
-  __attribute__((cleanup(_dtor_tmpS))) struct _tmp_bundle_S _tmp1 = { 0 };
-  __attribute__((cleanup(_dtor_tmpS))) struct _tmp_bundle_S _tmp2 = { 0 };
-  __attribute__((cleanup(_dtor_tmpS))) struct _tmp_bundle_S _tmp3 = { 0 };
-  _tmp2.value = g(
-    (_ctor_S(
-      &_tmp2.value,
-      (_tmp1.value = f(), _tmp1.valid = 1, _tmp1.value)
-    ), _tmp2.valid = 1, _tmp2.value)
-  ), _tmp3.valid = 1, _tmp3.value;
-} // destroy _tmp3, _tmp2, _tmp1
-\end{cfacode}
-In particular, the boolean is set immediately after argument construction and immediately after return value copy.
-The boolean is checked as a part of the @cleanup@ routine, forwarding to the object's destructor if the object is valid.
-One such type and @cleanup@ routine needs to be generated for every type used in a function parameter or return value.
-
-The former approach generates much simpler code, however splitting expressions requires care to ensure that expression evaluation order does not change.
-Expression ordering has to be performed by a full compiler, so it is possible that the latter approach would be more suited to the \CFA prototype, whereas the former approach is clearly the better option in a full compiler.
-More investigation is needed to determine whether the translator's current design can easily handle proper expression ordering.
-
-As discussed in Section \ref{s:implicit_copy_construction}, return values are destructed with a different @this@ pointer than they are constructed with.
-This problem can be easily fixed once a full \CFA compiler is built, since it would have full control over the call/return mechanism.
-In particular, since the callee is aware of where it needs to place the return value, it can construct the return value directly, rather than bitwise copy the internal data.
-
-Currently, the special functions are always auto-generated, except for generic types where the type parameter does not have assertions for the corresponding operation.
-For example,
-\begin{cfacode}
-forall(dtype T | sized(T) | { void ?{}(T *); })
-struct S { T x; };
-\end{cfacode}
-only auto-generates the default constructor for @S@, since the member @x@ is missing the other 3 special functions.
-Once deleted functions have been added, function generation can make use of this information to disable generation of special functions when a member has a deleted function.
-For example,
-\begin{cfacode}
-struct A {};
-void ?{}(A *) = delete;
-struct S { A x; };  // does not generate void ?{}(S *);
-\end{cfacode}
-
-Unmanaged objects and their interactions with the managed \CFA environment are an open problem that deserves greater attention.
-In particular, the interactions between unmanaged objects and copy semantics are subtle and can easily lead to errors.
-It is possible that the compiler should mark some of these situations as errors by default, and possibly conditionally emit warnings for some situations.
-Another possibility is to construct, destruct, and assign unmanaged objects using the intrinsic and auto-generated functions.
-A more thorough examination of the design space for this problem is required.
-
-Currently, the \CFA translator does not support any warnings.
-Ideally, the translator should support optional warnings in the case where it can detect that an object has been constructed twice.
-For example, forwarding constructor calls are guaranteed to initialize the entire object, so redundant constructor calls can cause problems such as memory leaks, while looking innocuous to a novice user.
-\begin{cfacode}
-struct B { ... };
-struct A {
-  B x, y, z;
-};
-void ?{}(A * a, B x) {
-  // y, z implicitly default constructed
-  (&a->x){ ... }; // explicitly construct x
-} // constructs an entire A
-void ?{}(A * a) {
-  (&a->y){}; // initialize y
-  a{ (B){ ... } }; // forwarding constructor call
-                   // initializes entire object, including y
-}
-\end{cfacode}
-
-Finally, while constructors provide a mechanism for establishing invariants, there is currently no mechanism for maintaining invariants without resorting to opaque types.
-That is, structure fields can be accessed and modified by any block of code without restriction, so while it is possible to ensure that an object is initially set to a valid state, it is not possible to ensure that it remains in a consistent state throughout its lifetime.
-A popular technique for ensuring consistency in object-oriented programming languages is to provide access modifiers such as @private@, which provides compile-time checks that only privileged code accesses private data.
-This approach could be added to \CFA, but it requires an idiomatic way of specifying what code is privileged and what data is protected.
-One possibility is to tie access control into an eventual module system.
-
-\begin{sloppypar}
-The current implementation of implicit subobject-construction is currently an all-or-nothing check.
-That is, if a subobject is conditionally constructed, \eg within an if-statement, no implicit constructors for that object are added.
-\begin{cfacode}
-struct A { ... };
-void ?{}(A * a) { ... }
-
-struct B {
-  A a;
-};
-void ?{}(B * b) {
-  if (...) {
-    (&b->a){};  // explicitly constructed
-  } // does not construct in else case
-}
-\end{cfacode}
-This behaviour is unsafe and breaks the guarantee that constructors fully initialize objects.
-This situation should be properly handled, either by examining all paths and inserting implicit constructor calls only in the paths missing construction, or by emitting an error or warning.
-\end{sloppypar}
-
-\subsection{Tuples}
-Named result values are planned, but not yet implemented.
-This feature ties nicely into named tuples, as seen in D and Swift.
-
-Currently, tuple flattening and structuring conversions are 0-cost conversions in the resolution algorithm.
-This makes tuples conceptually very simple to work with, but easily causes unnecessary ambiguity in situations where the type system should be able to differentiate between alternatives.
-Adding an appropriate cost function to tuple conversions will allow tuples to interact with the rest of the programming language more cohesively.
-
-\subsection{Variadic Functions}
-Use of @ttype@ functions currently relies heavily on recursion.
-\CC has opened variadic templates up so that recursion is not strictly necessary in some cases, and it would be interesting to see if any such cases can be applied to \CFA.
-
-\CC supports variadic templated data-types, making it possible to express arbitrary length tuples, arbitrary parameter function objects, and more with generic types.
-Currently, \CFA does not support @ttype@-parameter generic-types, though there does not appear to be a technical reason that it cannot.
-Notably, opening up support for this makes it possible to implement the exit form of scope guard (see section \ref{s:ResMgmt}), making it possible to call arbitrary functions at scope exit in idiomatic \CFA.
Index: doc/theses/rob/ctordtor.tex
===================================================================
--- doc/theses/rob/ctordtor.tex	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,1259 +1,0 @@
-%======================================================================
-\chapter{Constructors and Destructors}
-%======================================================================
-
-% TODO now: as an experiment, implement Andrei Alexandrescu's ScopeGuard http://www.drdobbs.com/cpp/generic-change-the-way-you-write-excepti/184403758?pgno=2
-% doesn't seem possible to do this without allowing ttype on generic structs?
-
-Since \CFA is a true systems language, it does not require a garbage collector.
-As well, \CFA is not an object-oriented programming language, \ie, structures cannot have methods.
-While structures can have function pointer members, this is different from methods, since methods have implicit access to structure members and methods cannot be reassigned.
-Nevertheless, one important goal is to reduce programming complexity and increase safety.
-To that end, \CFA provides support for implicit pre/post-execution of routines for objects, via constructors and destructors.
-
-This chapter details the design of constructors and destructors in \CFA, along with their current implementation in the translator.
-Generated code samples have been edited for clarity and brevity.
-
-\section{Design Criteria}
-\label{s:Design}
-In designing constructors and destructors for \CFA, the primary goals were ease of use and maintaining backwards compatibility.
-
-In C, when a variable is defined, its value is initially undefined unless it is explicitly initialized or allocated in the static area.
-\begin{cfacode}
-int main() {
-  int x;        // uninitialized
-  int y = 5;    // initialized to 5
-  x = y;        // assigned 5
-  static int z; // initialized to 0
-}
-\end{cfacode}
-In the example above, @x@ is defined and left uninitialized, while @y@ is defined and initialized to 5.
-Next, @x@ is assigned the value of @y@.
-In the last line, @z@ is implicitly initialized to 0 since it is marked @static@.
-The key difference between assignment and initialization being that assignment occurs on a live object (\ie, an object that contains data).
-It is important to note that this means @x@ could have been used uninitialized prior to being assigned, while @y@ could not be used uninitialized.
-Use of uninitialized variables yields undefined behaviour \cite[p.~558]{C11}, which is a common source of errors in C programs.
-
-Initialization of a declaration is strictly optional, permitting uninitialized variables to exist.
-Furthermore, declaration initialization is limited to expressions, so there is no way to insert arbitrary code before a variable is live, without delaying the declaration.
-Many C compilers give good warnings for uninitialized variables most of the time, but they cannot in all cases.
-\begin{cfacode}
-int f(int *);  // output parameter: never reads, only writes
-int g(int *);  // input parameter: never writes, only reads,
-               // so requires initialized variable
-
-int x, y;
-f(&x);  // okay - only writes to x
-g(&y);  // uses y uninitialized
-\end{cfacode}
-Other languages are able to give errors in the case of uninitialized variable use, but due to backwards compatibility concerns, this is not the case in \CFA.
-
-In C, constructors and destructors are often mimicked by providing routines that create and tear down objects, where the tear down function is typically only necessary if the type modifies the execution environment.
-\begin{cfacode}
-struct array_int {
-  int * x;
-};
-struct array_int create_array(int sz) {
-  return (struct array_int) { calloc(sizeof(int)*sz) };
-}
-void destroy_rh(struct resource_holder * rh) {
-  free(rh->x);
-}
-\end{cfacode}
-This idiom does not provide any guarantees unless the structure is opaque, which then requires that all objects are heap allocated.
-\begin{cfacode}
-struct opqaue_array_int;
-struct opqaue_array_int * create_opqaue_array(int sz);
-void destroy_opaque_array(opaque_array_int *);
-int opaque_get(opaque_array_int *);  // subscript
-
-opaque_array_int * x = create_opaque_array(10);
-int x2 = opaque_get(x, 2);
-\end{cfacode}
-This pattern is cumbersome to use since every access becomes a function call, requiring awkward syntax and a performance cost.
-While useful in some situations, this compromise is too restrictive.
-Furthermore, even with this idiom it is easy to make mistakes, such as forgetting to destroy an object or destroying it multiple times.
-
-A constructor provides a way of ensuring that the necessary aspects of object initialization is performed, from setting up invariants to providing compile- and run-time checks for appropriate initialization parameters.
-This goal is achieved through a \emph{guarantee} that a constructor is called \emph{implicitly} after every object is allocated from a type with associated constructors, as part of an object's \emph{definition}.
-Since a constructor is called on every object of a managed type, it is \emph{impossible} to forget to initialize such objects, as long as all constructors perform some sensible form of initialization.
-
-In \CFA, a constructor is a function with the name @?{}@.
-Like other operators in \CFA, the name represents the syntax used to call the constructor, \eg, @struct S = { ... };@.
-Every constructor must have a return type of @void@ and at least one parameter, the first of which is colloquially referred to as the \emph{this} parameter, as in many object-oriented programming-languages (however, a programmer can give it an arbitrary name).
-The @this@ parameter must have a pointer type, whose base type is the type of object that the function constructs.
-There is precedence for enforcing the first parameter to be the @this@ parameter in other operators, such as the assignment operator, where in both cases, the left-hand side of the equals is the first parameter.
-There is currently a proposal to add reference types to \CFA.
-Once this proposal has been implemented, the @this@ parameter will become a reference type with the same restrictions.
-
-Consider the definition of a simple type encapsulating a dynamic array of @int@s.
-
-\begin{cfacode}
-struct Array {
-  int * data;
-  int len;
-}
-\end{cfacode}
-
-In C, if the user creates an @Array@ object, the fields @data@ and @len@ are uninitialized, unless an explicit initializer list is present.
-It is the user's responsibility to remember to initialize both of the fields to sensible values, since there are no implicit checks for invalid values or reasonable defaults.
-In \CFA, the user can define a constructor to handle initialization of @Array@ objects.
-
-\begin{cfacode}
-void ?{}(Array * arr){
-  arr->len = 10;    // default size
-  arr->data = malloc(sizeof(int)*arr->len);
-  for (int i = 0; i < arr->len; ++i) {
-    arr->data[i] = 0;
-  }
-}
-Array x;  // allocates storage for Array and calls ?{}(&x)
-\end{cfacode}
-
-This constructor initializes @x@ so that its @length@ field has the value 10, and its @data@ field holds a pointer to a block of memory large enough to hold 10 @int@s, and sets the value of each element of the array to 0.
-This particular form of constructor is called the \emph{default constructor}, because it is called on an object defined without an initializer.
-In other words, a default constructor is a constructor that takes a single argument: the @this@ parameter.
-
-In \CFA, a destructor is a function much like a constructor, except that its name is \lstinline!^?{}! \footnote{Originally, the name @~?{}@ was chosen for destructors, to provide familiarity to \CC programmers. Unforunately, this name causes parsing conflicts with the bitwise-not operator when used with operator syntax (see section \ref{sub:syntax}.)} and it takes only one argument.
-A destructor for the @Array@ type can be defined as:
-\begin{cfacode}
-void ^?{}(Array * arr) {
-  free(arr->data);
-}
-\end{cfacode}
-The destructor is automatically called at deallocation for all objects of type @Array@.
-Hence, the memory associated with an @Array@ is automatically freed when the object's lifetime ends.
-The exact guarantees made by \CFA with respect to the calling of destructors are discussed in section \ref{sub:implicit_dtor}.
-
-As discussed previously, the distinction between initialization and assignment is important.
-Consider the following example.
-\begin{cfacode}[numbers=left]
-Array x, y;
-Array z = x;  // initialization
-y = x;        // assignment
-\end{cfacode}
-By the previous definition of the default constructor for @Array@, @x@ and @y@ are initialized to valid arrays of length 10 after their respective definitions.
-On line 2, @z@ is initialized with the value of @x@, while on line 3, @y@ is assigned the value of @x@.
-The key distinction between initialization and assignment is that a value to be initialized does not hold any meaningful values, whereas an object to be assigned might.
-In particular, these cases cannot be handled the same way because in the former case @z@ has no array, while @y@ does.
-A \emph{copy constructor} is used to perform initialization using another object of the same type.
-
-\begin{cfacode}[emph={other}, emphstyle=\color{red}]
-void ?{}(Array * arr, Array other) {  // copy constructor
-  arr->len = other.len;               // initialization
-  arr->data = malloc(sizeof(int)*arr->len)
-  for (int i = 0; i < arr->len; ++i) {
-    arr->data[i] = other.data[i];     // copy from other object
-  }
-}
-Array ?=?(Array * arr, Array other) { // assignment
-  ^?{}(arr);                          // explicitly call destructor
-  ?{}(arr, other);                    // explicitly call constructor
-  return *arr;
-}
-\end{cfacode}
-The two functions above handle the cases of initialization and assignment.
-The first function is called a copy constructor, because it constructs its argument by copying the values from another object of the same type.
-The second function is the standard copy-assignment operator.
-\CFA does not currently have the concept of reference types, so the most appropriate type for the source object in copy constructors and assignment operators is a value type.
-Appropriate care is taken in the implementation to avoid recursive calls to the copy constructor.
-The four functions (default constructor, destructor, copy constructor, and assignment operator) are special in that they safely control the state of most objects.
-
-It is possible to define a constructor that takes any combination of parameters to provide additional initialization options.
-For example, a reasonable extension to the array type would be a constructor that allocates the array to a given initial capacity and initializes the elements of the array to a given @fill@ value.
-\begin{cfacode}
-void ?{}(Array * arr, int capacity, int fill) {
-  arr->len = capacity;
-  arr->data = malloc(sizeof(int)*arr->len);
-  for (int i = 0; i < arr->len; ++i) {
-    arr->data[i] = fill;
-  }
-}
-\end{cfacode}
-
-In \CFA, constructors are called implicitly in initialization contexts.
-\begin{cfacode}
-Array x, y = { 20, 0xdeadbeef }, z = y;
-\end{cfacode}
-Constructor calls look just like C initializers, which allows them to be inserted into legacy C code with minimal code changes, and also provides a very simple syntax that veteran C programmers are familiar with.
-One downside of reusing C initialization syntax is that it is not possible to determine whether an object is constructed just by looking at its declaration, since that requires knowledge of whether the type is managed at that point in the program.
-
-This example generates the following code
-\begin{cfacode}
-Array x;
-?{}(&x);                  // implicit default construct
-Array y;
-?{}(&y, 20, 0xdeadbeef);  // explicit fill construct
-Array z;
-?{}(&z, y);               // copy construct
-^?{}(&z);                 // implicit destruct
-^?{}(&y);                 // implicit destruct
-^?{}(&x);                 // implicit destruct
-\end{cfacode}
-Due to the way that constructor calls are interleaved, it is impossible for @y@ to be referenced before it is initialized, except in its own constructor.
-This loophole is minor and exists in \CC as well.
-Destructors are implicitly called in reverse declaration-order so that objects with dependencies are destructed before the objects they are dependent on.
-
-\subsection{Calling Syntax}
-\label{sub:syntax}
-There are several ways to construct an object in \CFA.
-As previously introduced, every variable is automatically constructed at its definition, which is the most natural way to construct an object.
-\begin{cfacode}
-struct A { ... };
-void ?{}(A *);
-void ?{}(A *, A);
-void ?{}(A *, int, int);
-
-A a1;             // default constructed
-A a2 = { 0, 0 };  // constructed with 2 ints
-A a3 = a1;        // copy constructed
-// implicitly destruct a3, a2, a1, in that order
-\end{cfacode}
-Since constructors and destructors are just functions, the second way is to call the function directly.
-\begin{cfacode}
-struct A { int a; };
-void ?{}(A *);
-void ?{}(A *, A);
-void ^?{}(A *);
-
-A x;               // implicitly default constructed: ?{}(&x)
-A * y = malloc();  // copy construct: ?{}(&y, malloc())
-
-^?{}(&x);   // explicit destroy x, in different order
-?{}(&x);    // explicit construct x, second construction
-^?{}(y);    // explicit destroy y
-?{}(y, x);  // explit construct y from x, second construction
-
-// implicit ^?{}(&y);
-// implicit ^?{}(&x);
-\end{cfacode}
-Calling a constructor or destructor directly is a flexible feature that allows complete control over the management of storage.
-In particular, constructors double as a placement syntax.
-\begin{cfacode}
-struct A { ... };
-struct memory_pool { ... };
-void ?{}(memory_pool *, size_t);
-
-memory_pool pool = { 1024 };  // create an arena of size 1024
-
-A * a = allocate(&pool);      // allocate from memory pool
-?{}(a);                       // construct an A in place
-
-for (int i = 0; i < 10; i++) {
-  // reuse storage rather than reallocating
-  ^?{}(a);
-  ?{}(a);
-  // use a ...
-}
-^?{}(a);
-deallocate(&pool, a);         // return to memory pool
-\end{cfacode}
-Finally, constructors and destructors support \emph{operator syntax}.
-Like other operators in \CFA, the function name mirrors the use-case, in that the question marks are placeholders for the first $N$ arguments.
-This syntactic form is similar to the new initialization syntax in \CCeleven, except that it is used in expression contexts, rather than declaration contexts.
-\begin{cfacode}
-struct A { ... };
-struct B { A a; };
-
-A x, y, * z = &x;
-(&x){}          // default construct
-(&x){ y }       // copy construct
-(&x){ 1, 2, 3 } // construct with 3 arguments
-z{ y };         // copy construct x through a pointer
-^(&x){}         // destruct
-
-void ?{}(B * b) {
-  (&b->a){ 11, 17, 13 };  // construct a member
-}
-\end{cfacode}
-Constructor operator syntax has relatively high precedence, requiring parentheses around an address-of expression.
-Destructor operator syntax is actually an statement, and requires parentheses for symmetry with constructor syntax.
-
-One of these three syntactic forms should appeal to either C or \CC programmers using \CFA.
-
-\subsection{Constructor Expressions}
-In \CFA, it is possible to use a constructor as an expression.
-Like other operators, the function name @?{}@ matches its operator syntax.
-For example, @(&x){}@ calls the default constructor on the variable @x@, and produces @&x@ as a result.
-A key example for this capability is the use of constructor expressions to initialize the result of a call to @malloc@.
-\begin{cfacode}
-struct X { ... };
-void ?{}(X *, double);
-X * x = malloc(){ 1.5 };
-\end{cfacode}
-In this example, @malloc@ dynamically allocates storage and initializes it using a constructor, all before assigning it into the variable @x@.
-Intuitively, the expression-resolver determines that @malloc@ returns some type @T *@, as does the constructor expression since it returns the type of its argument.
-This type flows outwards to the declaration site where the expected type is known to be @X *@, thus the first argument to the constructor must be @X *@, narrowing the search space.
-
-If this extension is not present, constructing dynamically allocated objects is much more cumbersome, requiring separate initialization of the pointer and initialization of the pointed-to memory.
-\begin{cfacode}
-X * x = malloc();
-x{ 1.5 };
-\end{cfacode}
-Not only is this verbose, but it is also more error prone, since this form allows maintenance code to easily sneak in between the initialization of @x@ and the initialization of the memory that @x@ points to.
-This feature is implemented via a transformation producing the value of the first argument of the constructor, since constructors do not themselves have a return value.
-Since this transformation results in two instances of the subexpression, care is taken to allocate a temporary variable to hold the result of the subexpression in the case where the subexpression may contain side effects.
-The previous example generates the following code.
-\begin{cfacode}
-struct X *_tmp_ctor;
-struct X *x = ?{}(  // construct result of malloc
-  _tmp_ctor=malloc_T( // store result of malloc
-    sizeof(struct X),
-    _Alignof(struct X)
-  ),
-  1.5
-), _tmp_ctor; // produce constructed result of malloc
-\end{cfacode}
-It should be noted that this technique is not exclusive to @malloc@, and allows a user to write a custom allocator that can be idiomatically used in much the same way as a constructed @malloc@ call.
-
-While it is possible to use operator syntax with destructors, destructors invalidate their argument, thus operator syntax with destructors is void-typed expression.
-
-\subsection{Function Generation}
-In \CFA, every type is defined to have the core set of four special functions described previously.
-Having these functions exist for every type greatly simplifies the semantics of the language, since most operations can simply be defined directly in terms of function calls.
-In addition to simplifying the definition of the language, it also simplifies the analysis that the translator must perform.
-If the translator can expect these functions to exist, then it can unconditionally attempt to resolve them.
-Moreover, the existence of a standard interface allows polymorphic code to interoperate with new types seamlessly.
-While automatic generation of assignment functions is present in previous versions of \CFA, the the implementation has been largely rewritten to accomodate constructors and destructors.
-
-To mimic the behaviour of standard C, the default constructor and destructor for all of the basic types and for all pointer types are defined to do nothing, while the copy constructor and assignment operator perform a bitwise copy of the source parameter (as in \CC).
-This default is intended to maintain backwards compatibility and performance, by not imposing unexpected operations for a C programmer, as a zero-default behaviour would.
-However, it is possible for a user to define such constructors so that variables are safely zeroed by default, if desired.
-%%%%%%%%%%%%%%%%%%%%%%%%%% line width %%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{cfacode}
-void ?{}(int * i) { *i = 0; }
-forall(dtype T) void ?{}(T ** p) { *p = 0; }  // any pointer type
-void f() {
-  int x;    // initialized to 0
-  int * p;  // initialized to 0
-}
-\end{cfacode}
-%%%%%%%%%%%%%%%%%%%%%%%%%% line width %%%%%%%%%%%%%%%%%%%%%%%%%%
-
-There are several options for user-defined types: structures, unions, and enumerations.
-To aid in ease of use, the standard set of four functions is automatically generated for a user-defined type after its definition is completed.
-By auto-generating these functions, it is ensured that legacy C code continues to work correctly in every context where \CFA expects these functions to exist, since they are generated for every complete type.
-As well, these functions are always generated, since they may be needed by polymorphic functions.
-With that said, the generated functions are not called implicitly unless they are non-trivial, and are never exported, making it simple for the optimizer to strip them away when they are not used.
-
-The generated functions for enumerations are the simplest.
-Since enumerations in C are essentially just another integral type, the generated functions behave in the same way that the built-in functions for the basic types work.
-For example, given the enumeration
-\begin{cfacode}
-enum Colour {
-  R, G, B
-};
-\end{cfacode}
-The following functions are automatically generated.
-\begin{cfacode}
-void ?{}(enum Colour *_dst){
-  // default constructor does nothing
-}
-void ?{}(enum Colour *_dst, enum Colour _src){
-  *_dst=_src;  // bitwise copy
-}
-void ^?{}(enum Colour *_dst){
-  // destructor does nothing
-}
-enum Colour ?=?(enum Colour *_dst, enum Colour _src){
-  return *_dst=_src; // bitwise copy
-}
-\end{cfacode}
-In the future, \CFA will introduce strongly-typed enumerations, like those in \CC, wherein enumerations create a new type distinct from @int@ so that integral values require an explicit cast to be stored in an enumeration variable.
-The existing generated routines are sufficient to express this restriction, since they are currently set up to take in values of that enumeration type.
-Changes related to this feature only need to affect the expression resolution phase, where more strict rules will be applied to prevent implicit conversions from integral types to enumeration types, but should continue to permit conversions from enumeration types to @int@.
-In this way, it is still possible to add an @int@ to an enumeration, but the resulting value is an @int@, meaning it cannot be reassigned to an enumeration without a cast.
-
-For structures, the situation is more complicated.
-Given a structure @S@ with members @M$_0$@, @M$_1$@, ... @M$_{N-1}$@, each function @f@ in the standard set calls \lstinline{f(s->M$_i$, ...)} for each @$i$@.
-That is, a default constructor for @S@ default constructs the members of @S@, the copy constructor copy constructs them, and so on.
-For example, given the structure definition
-\begin{cfacode}
-struct A {
-  B b;
-  C c;
-}
-\end{cfacode}
-The following functions are implicitly generated.
-\begin{cfacode}
-void ?{}(A * this) {
-  ?{}(&this->b);  // default construct each field
-  ?{}(&this->c);
-}
-void ?{}(A * this, A other) {
-  ?{}(&this->b, other.b);  // copy construct each field
-  ?{}(&this->c, other.c);
-}
-A ?=?(A * this, A other) {
-  ?=?(&this->b, other.b);  // assign each field
-  ?=?(&this->c, other.c);
-}
-void ^?{}(A * this) {
-  ^?{}(&this->c);  // destruct each field
-  ^?{}(&this->b);
-}
-\end{cfacode}
-It is important to note that the destructors are called in reverse declaration order to prevent conflicts in the event there are dependencies among members.
-
-In addition to the standard set, a set of \emph{field constructors} is also generated for structures.
-The field constructors are constructors that consume a prefix of the structure's member-list.
-That is, $N$ constructors are built of the form @void ?{}(S *, T$_{\text{M}_0}$)@, @void ?{}(S *, T$_{\text{M}_0}$, T$_{\text{M}_1}$)@, ..., @void ?{}(S *, T$_{\text{M}_0}$, T$_{\text{M}_1}$, ..., T$_{\text{M}_{N-1}}$)@, where members are copy constructed if they have a corresponding positional argument and are default constructed otherwise.
-The addition of field constructors allows structures in \CFA to be used naturally in the same ways as used in C (\ie, to initialize any prefix of the structure), \eg, @A a0 = { b }, a1 = { b, c }@.
-Extending the previous example, the following constructors are implicitly generated for @A@.
-\begin{cfacode}
-void ?{}(A * this, B b) {
-  ?{}(&this->b, b);
-  ?{}(&this->c);
-}
-void ?{}(A * this, B b, C c) {
-  ?{}(&this->b, b);
-  ?{}(&this->c, c);
-}
-\end{cfacode}
-
-For unions, the default constructor and destructor do nothing, as it is not obvious which member, if any, should be constructed.
-For copy constructor and assignment operations, a bitwise @memcpy@ is applied.
-In standard C, a union can also be initialized using a value of the same type as its first member, and so a corresponding field constructor is generated to perform a bitwise @memcpy@ of the object.
-An alternative to this design is to always construct and destruct the first member of a union, to match with the C semantics of initializing the first member of the union.
-This approach ultimately feels subtle and unsafe.
-Another option is to, like \CC, disallow unions from containing members that are themselves managed types.
-This restriction is a reasonable approach from a safety standpoint, but is not very C-like.
-Since the primary purpose of a union is to provide low-level memory optimization, it is assumed that the user has a certain level of maturity.
-It is therefore the responsibility of the user to define the special functions explicitly if they are appropriate, since it is impossible to accurately predict the ways that a union is intended to be used at compile-time.
-
-For example, given the union
-\begin{cfacode}
-union X {
-  Y y;
-  Z z;
-};
-\end{cfacode}
-The following functions are automatically generated.
-\begin{cfacode}
-void ?{}(union X *_dst){  // default constructor
-}
-void ?{}(union X *_dst, union X _src){  // copy constructor
-  __builtin_memcpy(_dst, &_src, sizeof(union X ));
-}
-void ^?{}(union X *_dst){  // destructor
-}
-union X ?=?(union X *_dst, union X _src){  // assignment
-  __builtin_memcpy(_dst, &_src, sizeof(union X));
-  return _src;
-}
-void ?{}(union X *_dst, struct Y src){  // construct first field
-  __builtin_memcpy(_dst, &src, sizeof(struct Y));
-}
-\end{cfacode}
-
-% This feature works in the \CFA model, since constructors are simply special functions and can be called explicitly, unlike in \CC. % this sentence isn't really true => placement new
-In \CCeleven, unions may have managed members, with the caveat that if there are any members with a user-defined operation, then that operation is not implicitly defined, forcing the user to define the operation if necessary.
-This restriction could easily be added into \CFA once \emph{deleted} functions are added.
-
-\subsection{Using Constructors and Destructors}
-Implicitly generated constructor and destructor calls ignore the outermost type qualifiers, \eg @const@ and @volatile@, on a type by way of a cast on the first argument to the function.
-For example,
-\begin{cfacode}
-struct S { int i; };
-void ?{}(S *, int);
-void ?{}(S *, S);
-
-const S s = { 11 };
-volatile S s2 = s;
-\end{cfacode}
-Generates the following code
-\begin{cfacode}
-const struct S s;
-?{}((struct S *)&s, 11);
-volatile struct S s2;
-?{}((struct S *)&s2, s);
-\end{cfacode}
-Here, @&s@ and @&s2@ are cast to unqualified pointer types.
-This mechanism allows the same constructors and destructors to be used for qualified objects as for unqualified objects.
-This rule applies only to implicitly generated constructor calls.
-Hence, explicitly re-initializing qualified objects with a constructor requires an explicit cast.
-
-As discussed in Section \ref{sub:c_background}, compound literals create unnamed objects.
-This mechanism can continue to be used seamlessly in \CFA with managed types to create temporary objects.
-The object created by a compound literal is constructed using the provided brace-enclosed initializer-list, and is destructed at the end of the scope it is used in.
-For example,
-\begin{cfacode}
-struct A { int x; };
-void ?{}(A *, int, int);
-{
-  int x = (A){ 10, 20 }.x;
-}
-\end{cfacode}
-is equivalent to
-\begin{cfacode}
-struct A { int x, y; };
-void ?{}(A *, int, int);
-{
-  A _tmp;
-  ?{}(&_tmp, 10, 20);
-  int x = _tmp.x;
-  ^?{}(&tmp);
-}
-\end{cfacode}
-
-Unlike \CC, \CFA provides an escape hatch that allows a user to decide at an object's definition whether it should be managed or not.
-An object initialized with \ateq is guaranteed to be initialized like a C object, and has no implicit destructor call.
-This feature provides all of the freedom that C programmers are used to having to optimize a program, while maintaining safety as a sensible default.
-\begin{cfacode}
-struct A { int * x; };
-// RAII
-void ?{}(A * a) { a->x = malloc(sizeof(int)); }
-void ^?{}(A * a) { free(a->x); }
-
-A a1;           // managed
-A a2 @= { 0 };  // unmanaged
-\end{cfacode}
-In this example, @a1@ is a managed object, and thus is default constructed and destructed at the start/end of @a1@'s lifetime, while @a2@ is an unmanaged object and is not implicitly constructed or destructed.
-Instead, @a2->x@ is initialized to @0@ as if it were a C object, because of the explicit initializer.
-
-In addition to freedom, \ateq provides a simple path for migrating legacy C code to \CFA, in that objects can be moved from C-style initialization to \CFA gradually and individually.
-It is worth noting that the use of unmanaged objects can be tricky to get right, since there is no guarantee that the proper invariants are established on an unmanaged object.
-It is recommended that most objects be managed by sensible constructors and destructors, except where absolutely necessary, such as memory-mapped devices, trigger devices, I/O controllers, etc.
-
-When a user declares any constructor or destructor, the corresponding intrinsic/generated function and all field constructors for that type are hidden, so that they are not found during expression resolution until the user-defined function goes out of scope.
-Furthermore, if the user declares any constructor, then the intrinsic/generated default constructor is also hidden, precluding default construction.
-These semantics closely mirror the rule for implicit declaration of constructors in \CC, wherein the default constructor is implicitly declared if there is no user-declared constructor \cite[p.~186]{ANSI98:C++}.
-\begin{cfacode}
-struct S { int x, y; };
-
-void f() {
-  S s0, s1 = { 0 }, s2 = { 0, 2 }, s3 = s2;  // okay
-  {
-    void ?{}(S * s, int i) { s->x = i*2; } // locally hide autogen ctors
-    S s4;  // error, no default constructor
-    S s5 = { 3 };  // okay, local constructor
-    S s6 = { 4, 5 };  // error, no field constructor
-    S s7 = s5; // okay
-  }
-  S s8, s9 = { 6 }, s10 = { 7, 8 }, s11 = s10;  // okay
-}
-\end{cfacode}
-In this example, the inner scope declares a constructor from @int@ to @S@, which hides the default constructor and field constructors until the end of the scope.
-
-When defining a constructor or destructor for a structure @S@, any members that are not explicitly constructed or destructed are implicitly constructed or destructed automatically.
-If an explicit call is present, then that call is taken in preference to any implicitly generated call.
-A consequence of this rule is that it is possible, unlike \CC, to precisely control the order of construction and destruction of sub-objects on a per-constructor basis, whereas in \CC sub-object initialization and destruction is always performed based on the declaration order.
-\begin{cfacode}
-struct A {
-  B w, x, y, z;
-};
-void ?{}(A * a, int i) {
-  (&a->x){ i };
-  (&a->z){ a->y };
-}
-\end{cfacode}
-Generates the following
-\begin{cfacode}
-void ?{}(A * a, int i) {
-  (&a->w){};   // implicit default ctor
-  (&a->y){};   // implicit default ctor
-  (&a->x){ i };
-  (&a->z){ a->y };
-}
-\end{cfacode}
-Finally, it is illegal for a sub-object to be explicitly constructed for the first time after it is used for the first time.
-If the translator cannot be reasonably sure that an object is constructed prior to its first use, but is constructed afterward, an error is emitted.
-More specifically, the translator searches the body of a constructor to ensure that every sub-object is initialized.
-\begin{cfacode}
-void ?{}(A * a, double x) {
-  f(a->x);
-  (&a->x){ (int)x }; // error, used uninitialized on previous line
-}
-\end{cfacode}
-However, if the translator sees a sub-object used within the body of a constructor, but does not see a constructor call that uses the sub-object as the target of a constructor, then the translator assumes the object is to be implicitly constructed (copy constructed in a copy constructor and default constructed in any other constructor).
-To override this rule, \ateq can be used to force the translator to trust the programmer's discretion.
-This form of \ateq is not yet implemented.
-\begin{cfacode}
-void ?{}(A * a) {
-  // default constructs all members
-  f(a->x);
-}
-
-void ?{}(A * a, A other) {
-  // copy constructs all members
-  f(a->y);
-}
-
-void ?{}(A * a, int x) {
-  // object forwarded to another constructor,
-  // does not implicitly construct any members
-  (&a){};
-}
-
-void ^?{}(A * a) {
-  ^(&a->x){}; // explicit destructor call
-} // z, y, w implicitly destructed, in this order
-\end{cfacode}
-If at any point, the @this@ parameter is passed directly as the target of another constructor, then it is assumed the other constructor handles the initialization of all of the object's members and no implicit constructor calls are added to the current constructor.
-
-Despite great effort, some forms of C syntax do not work well with constructors in \CFA.
-In particular, constructor calls cannot contain designations (see \ref{sub:c_background}), since this is equivalent to allowing designations on the arguments to arbitrary function calls.
-\begin{cfacode}
-// all legal forward declarations in C
-void f(int, int, int);
-void f(int a, int b, int c);
-void f(int b, int c, int a);
-void f(int c, int a, int b);
-void f(int x, int y, int z);
-
-f(b:10, a:20, c:30);  // which parameter is which?
-\end{cfacode}
-In C, function prototypes are permitted to have arbitrary parameter names, including no names at all, which may have no connection to the actual names used at function definition.
-Furthermore, a function prototype can be repeated an arbitrary number of times, each time using different names.
-As a result, it was decided that any attempt to resolve designated function calls with C's function prototype rules would be brittle, and thus it is not sensible to allow designations in constructor calls.
-
-\begin{sloppypar}
-In addition, constructor calls do not support unnamed nesting.
-\begin{cfacode}
-struct B { int x; };
-struct C { int y; };
-struct A { B b; C c; };
-void ?{}(A *, B);
-void ?{}(A *, C);
-
-A a = {
-  { 10 },  // construct B? - invalid
-};
-\end{cfacode}
-In C, nesting initializers means that the programmer intends to initialize sub-objects with the nested initializers.
-The reason for this omission is to both simplify the mental model for using constructors, and to make initialization simpler for the expression resolver.
-If this were allowed, it would be necessary for the expression resolver to decide whether each argument to the constructor call could initialize to some argument in one of the available constructors, making the problem highly recursive and potentially much more expensive.
-That is, in the previous example the line marked as an error could mean construct using @?{}(A *, B)@ or with @?{}(A *, C)@, since the inner initializer @{ 10 }@ could be taken as an intermediate object of type @B@ or @C@.
-In practice, however, there could be many objects that can be constructed from a given @int@ (or, indeed, any arbitrary parameter list), and thus a complete solution to this problem would require fully exploring all possibilities.
-\end{sloppypar}
-
-More precisely, constructor calls cannot have a nesting depth greater than the number of array dimensions in the type of the initialized object, plus one.
-For example,
-\begin{cfacode}
-struct A;
-void ?{}(A *, int);
-void ?{}(A *, A, A);
-
-A a1[3] = { { 3 }, { 4 }, { 5 } };
-A a2[2][2] = {
-  { { 9 }, { 10 } },  // a2[0]
-  { {14 }, { 15 } }   // a2[1]
-};
-A a3[4] = { // 1 dimension => max depth 2
-  { { 11 }, { 12 } },  // error, three levels deep
-  { 80 }, { 90 }, { 100 }
-}
-\end{cfacode}
-The body of @A@ has been omitted, since only the constructor interfaces are important.
-
-It should be noted that unmanaged objects, i.e. objects that have only trivial constructors, can still make use of designations and nested initializers in \CFA.
-It is simple to overcome this limitation for managed objects by making use of compound literals, so that the arguments to the constructor call are explicitly typed.
-%%%%%%%%%%%%%%%%%%%%%%%%%% line width %%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{cfacode}
-struct B { int x; };
-struct C { int y; };
-struct A { B b; C c; };
-void ?{}(A *, B);
-void ?{}(A *, C);
-
-A a = {
-  (C){ 10 } // disambiguate with compound literal
-};
-\end{cfacode}
-%%%%%%%%%%%%%%%%%%%%%%%%%% line width %%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\subsection{Implicit Destructors}
-\label{sub:implicit_dtor}
-Destructors are automatically called at the end of the block in which the object is declared.
-In addition to this, destructors are automatically called when statements manipulate control flow to leave a block in which the object is declared, \eg, with return, break, continue, and goto statements.
-The example below demonstrates a simple routine with multiple return statements.
-\begin{cfacode}
-struct A;
-void ^?{}(A *);
-
-void f(int i) {
-  A x;  // construct x
-  {
-    A y; // construct y
-    {
-      A z; // construct z
-      {
-        if (i == 0) return; // destruct x, y, z
-      }
-      if (i == 1) return; // destruct x, y, z
-    } // destruct z
-    if (i == 2) return; // destruct x, y
-  } // destruct y
-} // destruct x
-\end{cfacode}
-
-The next example illustrates the use of simple continue and break statements and the manner that they interact with implicit destructors.
-\begin{cfacode}
-for (int i = 0; i < 10; i++) {
-  A x;
-  if (i == 2) {
-    continue;  // destruct x
-  } else if (i == 3) {
-    break;     // destruct x
-  }
-} // destruct x
-\end{cfacode}
-Since a destructor call is automatically inserted at the end of the block, nothing special needs to happen to destruct @x@ in the case where control reaches the end of the loop.
-In the case where @i@ is @2@, the continue statement runs the loop update expression and attempts to begin the next iteration of the loop.
-Since continue is a C statement, which does not understand destructors, it is transformed into a @goto@ statement that branches to the end of the loop, just before the block's destructors, to ensure that @x@ is destructed.
-When @i@ is @3@, the break statement moves control to just past the end of the loop.
-Unlike the previous case, the destructor for @x@ cannot be reused, so a destructor call for @x@ is inserted just before the break statement.
-
-\CFA also supports labeled break and continue statements, which allow more precise manipulation of control flow.
-Labeled break and continue allow the programmer to specify which control structure to target by using a label attached to a control structure.
-\begin{cfacode}[emph={L1,L2}, emphstyle=\color{red}]
-L1: for (int i = 0; i < 10; i++) {
-  A x;
-  for (int j = 0; j < 10; j++) {
-    A y;
-    if (i == 1) {
-      continue L1; // destruct y
-    } else if (i == 2) {
-      break L1;    // destruct x,y
-    }
-  } // destruct y
-} // destruct X
-\end{cfacode}
-The statement @continue L1@ begins the next iteration of the outer for-loop.
-Since the semantics of continue require the loop update expression to execute, control branches to the end of the outer for loop, meaning that the block destructor for @x@ can be reused, and it is only necessary to generate the destructor for @y@.
-Break, on the other hand, requires jumping out of both loops, so the destructors for both @x@ and @y@ are generated and inserted before the @break L1@ statement.
-
-Finally, an example which demonstrates goto.
-Since goto is a general mechanism for jumping to different locations in the program, a more comprehensive approach is required.
-For each goto statement $G$ and each target label $L$, let $S_G$ be the set of all managed variables alive at $G$, and let $S_L$ be the set of all managed variables alive at $L$.
-If at any $G$, $S_L \setminus S_G = \emptyset$, then the translator emits an error, because control flow branches from a point where the object is not yet live to a point where it is live, skipping the object's constructor.
-Then, for every $G$, the destructors for each variable in the set $S_G \setminus S_L$ is inserted directly before $G$, which ensures each object that is currently live at $G$, but not at $L$, is destructed before control branches.
-\begin{cfacode}
-int i = 0;
-{
-  L0: ;     // S_L0 = { x }
-    A y;
-  L1: ;     // S_L1 = { x }
-    A x;
-  L2: ;     // S_L2 = { y, x }
-    if (i == 0) {
-      ++i;
-      goto L1;    // S_G = { y, x }
-      // S_G-S_L1 = { x } => destruct x
-    } else if (i == 1) {
-      ++i;
-      goto L2;    // S_G = { y, x }
-      // S_G-S_L2 = {} => destruct nothing
-    } else if (i == 2) {
-      ++i;
-      goto L3;    // S_G = { y, x }
-      // S_G-S_L3 = {}
-    } else if (false) {
-      ++i;
-      A z;
-      goto L3;    // S_G = { z, y, x }
-      // S_G-S_L3 = { z } => destruct z
-    } else {
-      ++i;
-      goto L4;    // S_G = { y, x }
-      // S_G-S_L4 = { y, x } => destruct y, x
-    }
-  L3: ;    // S_L3 = { y, x }
-    goto L2;      // S_G = { y, x }
-    // S_G-S_L2 = {}
-}
-L4: ;  // S_L4 = {}
-if (i == 4) {
-  goto L0;        // S_G = {}
-  // S_G-S_L0 = {}
-}
-\end{cfacode}
-All break and continue statements are implemented in \CFA in terms of goto statements, so the more constrained forms are precisely governed by these rules.
-
-The next example demonstrates the error case.
-\begin{cfacode}
-{
-    goto L1; // S_G = {}
-    // S_L1-S_G = { y } => error
-    A y;
-  L1: ; // S_L1 = { y }
-    A x;
-  L2: ; // S_L2 = { y, x }
-}
-goto L2; // S_G = {}
-// S_L2-S_G = { y, x } => error
-\end{cfacode}
-
-While \CFA supports the GCC computed-goto extension, the behaviour of managed objects in combination with computed-goto is undefined.
-\begin{cfacode}
-void f(int val) {
-  void * l = val == 0 ? &&L1 : &&L2;
-  {
-      A x;
-    L1: ;
-      goto *l;  // branches differently depending on argument
-  }
-  L2: ;
-}
-\end{cfacode}
-Likewise, destructors are not executed at scope-exit due to a computed-goto in \CC, as of g++ version 6.2.
-
-\subsection{Implicit Copy Construction}
-\label{s:implicit_copy_construction}
-When a function is called, the arguments supplied to the call are subject to implicit copy construction (and destruction of the generated temporary), and the return value is subject to destruction.
-When a value is returned from a function, the copy constructor is called to pass the value back to the call site.
-Exempt from these rules are intrinsic and built-in functions.
-It should be noted that unmanaged objects are subject to copy constructor calls when passed as arguments to a function or when returned from a function, since they are not the \emph{target} of the copy constructor call.
-That is, since the parameter is not marked as an unmanaged object using \ateq, it is copy constructed if it is returned by value or passed as an argument to another function, so to guarantee consistent behaviour, unmanaged objects must be copy constructed when passed as arguments.
-These semantics are important to bear in mind when using unmanaged objects, and could produce unexpected results when mixed with objects that are explicitly constructed.
-\begin{cfacode}
-struct A { ... };
-void ?{}(A *);
-void ?{}(A *, A);
-void ^?{}(A *);
-
-A identity(A x) { // pass by value => need local copy
-  return x;       // return by value => make call-site copy
-}
-
-A y, z @= {};
-identity(y);  // copy construct y into x
-identity(z);  // copy construct z into x
-\end{cfacode}
-Note that unmanaged argument @z@ is logically copy constructed into managed parameter @x@; however, the translator must copy construct into a temporary variable to be passed as an argument, which is also destructed after the call.
-A compiler could by-pass the argument temporaries since it is in control of the calling conventions and knows exactly where the called-function's parameters live.
-
-This generates the following
-\begin{cfacode}
-struct A f(struct A x){
-  struct A _retval_f;    // return value
-  ?{}((&_retval_f), x);  // copy construct return value
-  return _retval_f;
-}
-
-struct A y;
-?{}(&y);                 // default construct
-struct A z = { 0 };      // C default
-
-struct A _tmp_cp1;       // argument 1
-struct A _tmp_cp_ret0;   // return value
-_tmp_cp_ret0=f(
-  (?{}(&_tmp_cp1, y) , _tmp_cp1)  // argument is a comma expression
-), _tmp_cp_ret0;         // return value for cascading
-^?{}(&_tmp_cp_ret0);     // destruct return value
-^?{}(&_tmp_cp1);         // destruct argument 1
-
-struct A _tmp_cp2;       // argument 1
-struct A _tmp_cp_ret1;   // return value
-_tmp_cp_ret1=f(
-  (?{}(&_tmp_cp2, z), _tmp_cp2)  // argument is a common expression
-), _tmp_cp_ret1;         // return value for cascading
-^?{}(&_tmp_cp_ret1);     // destruct return value
-^?{}(&_tmp_cp2);         // destruct argument 1
-^?{}(&y);
-\end{cfacode}
-
-A special syntactic form, such as a variant of \ateq, can be implemented to specify at the call site that an argument should not be copy constructed, to regain some control for the C programmer.
-\begin{cfacode}
-identity(z@);  // do not copy construct argument
-               // - will copy construct/destruct return value
-A@ identity_nocopy(A @ x) {  // argument not copy constructed or destructed
-  return x;  // not copy constructed
-             // return type marked @ => not destructed
-}
-\end{cfacode}
-It should be noted that reference types will allow specifying that a value does not need to be copied, however reference types do not provide a means of preventing implicit copy construction from uses of the reference, so the problem is still present when passing or returning the reference by value.
-
-Adding implicit copy construction imposes the additional runtime cost of the copy constructor for every argument and return value in a function call.
-This cost is necessary to maintain appropriate value semantics when calling a function.
-In the future, return-value-optimization (RVO) can be implemented for \CFA to elide unnecessary copy construction and destruction of temporary objects.
-This cost is not present for types with trivial copy constructors and destructors.
-
-A known issue with this implementation is that the argument and return value temporaries are not guaranteed to have the same address for their entire lifetimes.
-In the previous example, since @_retval_f@ is allocated and constructed in @f@, then returned by value, the internal data is bitwise copied into the caller's stack frame.
-This approach works out most of the time, because typically destructors need to only access the fields of the object and recursively destroy.
-It is currently the case that constructors and destructors that use the @this@ pointer as a unique identifier to store data externally do not work correctly for return value objects.
-Thus, it is currently not safe to rely on an object's @this@ pointer to remain constant throughout execution of the program.
-\begin{cfacode}
-A * external_data[32];
-int ext_count;
-struct A;
-void ?{}(A * a) {
-  // ...
-  external_data[ext_count++] = a;
-}
-void ^?{}(A * a) {
-  for (int i = 0; i < ext_count) {
-    if (a == external_data[i]) { // may never be true
-      // ...
-    }
-  }
-}
-
-A makeA() {
-  A x;  // stores &x in external_data
-  return x;
-}
-makeA();  // return temporary has a different address than x
-// equivalent to:
-//   A _tmp;
-//   _tmp = makeA(), _tmp;
-//   ^?{}(&_tmp);
-\end{cfacode}
-In the above example, a global array of pointers is used to keep track of all of the allocated @A@ objects.
-Due to copying on return, the current object being destructed does not exist in the array if an @A@ object is ever returned by value from a function, such as in @makeA@.
-
-This problem could be solved in the translator by changing the function signatures so that the return value is moved into the parameter list.
-For example, the translator could restructure the code like so
-\begin{cfacode}
-void f(struct A x, struct A * _retval_f){
-  ?{}(_retval_f, x);  // construct directly into caller's stack frame
-}
-
-struct A y;
-?{}(&y);
-struct A z = { 0 };
-
-struct A _tmp_cp1;     // argument 1
-struct A _tmp_cp_ret0; // return value
-f((?{}(&_tmp_cp1, y) , _tmp_cp1), &_tmp_cp_ret0), _tmp_cp_ret0;
-^?{}(&_tmp_cp_ret0);   // return value
-^?{}(&_tmp_cp1);       // argument 1
-\end{cfacode}
-This transformation provides @f@ with the address of the return variable so that it can be constructed into directly.
-It is worth pointing out that this kind of signature rewriting already occurs in polymorphic functions that return by value, as discussed in \cite{Bilson03}.
-A key difference in this case is that every function would need to be rewritten like this, since types can switch between managed and unmanaged at different scope levels, \eg
-\begin{cfacode}
-struct A { int v; };
-A x; // unmanaged, since only trivial constructors are available
-{
-  void ?{}(A * a) { ... }
-  void ^?{}(A * a) { ... }
-  A y; // managed
-}
-A z; // unmanaged
-\end{cfacode}
-Hence there is not enough information to determine at function declaration whether a type is managed or not, and thus it is the case that all signatures have to be rewritten to account for possible copy constructor and destructor calls.
-Even with this change, it would still be possible to declare backwards compatible function prototypes with an @extern "C"@ block, which allows for the definition of C-compatible functions within \CFA code, however this would require actual changes to the way code inside of an @extern "C"@ function is generated as compared with normal code generation.
-Furthermore, it is not possible to overload C functions, so using @extern "C"@ to declare functions is of limited use.
-
-It would be possible to regain some control by adding an attribute to structures that specifies whether they can be managed or not (perhaps \emph{manageable} or \emph{unmanageable}), and to emit an error in the case that a constructor or destructor is declared for an unmanageable type.
-Ideally, structures should be manageable by default, since otherwise the default case becomes more verbose.
-This means that in general, function signatures would have to be rewritten, and in a select few cases the signatures would not be rewritten.
-\begin{cfacode}
-__attribute__((manageable)) struct A { ... };   // can declare ctors
-__attribute__((unmanageable)) struct B { ... }; // cannot declare ctors
-struct C { ... };                               // can declare ctors
-
-A f();  // rewritten void f(A *);
-B g();  // not rewritten
-C h();  // rewritten void h(C *);
-\end{cfacode}
-An alternative is to make the attribute \emph{identifiable}, which states that objects of this type use the @this@ parameter as an identity.
-This strikes more closely to the visible problem, in that only types marked as identifiable would need to have the return value moved into the parameter list, and every other type could remain the same.
-Furthermore, no restrictions would need to be placed on whether objects can be constructed.
-\begin{cfacode}
-__attribute__((identifiable)) struct A { ... };  // can declare ctors
-struct B { ... };                                // can declare ctors
-
-A f();  // rewritten void f(A *);
-B g();  // not rewritten
-\end{cfacode}
-
-Ultimately, both of these are patchwork solutions.
-Since a real compiler has full control over its calling conventions, it can seamlessly allow passing the return parameter without outwardly changing the signature of a routine.
-As such, it has been decided that this issue is not currently a priority and will be fixed when a full \CFA compiler is implemented.
-
-\section{Implementation}
-\subsection{Array Initialization}
-Arrays are a special case in the C type-system.
-Type checking largely ignores size information for C arrays, making it impossible to write a standalone \CFA function that constructs or destructs an array, while maintaining the standard interface for constructors and destructors.
-Instead, \CFA defines the initialization and destruction of an array recursively.
-That is, when an array is defined, each of its elements is constructed in order from element 0 up to element $n-1$.
-When an array is to be implicitly destructed, each of its elements is destructed in reverse order from element $n-1$ down to element 0.
-As in C, it is possible to explicitly provide different initializers for each element of the array through array initialization syntax.
-In this case, each of the initializers is taken in turn to construct a subsequent element of the array.
-If too many initializers are provided, only the initializers up to N are actually used.
-If too few initializers are provided, then the remaining elements are default constructed.
-
-For example, given the following code.
-\begin{cfacode}
-struct X {
-  int x, y, z;
-};
-void f() {
-  X x[10] = { { 1, 2, 3 }, { 4 }, { 7, 8 } };
-}
-\end{cfacode}
-The following code is generated for @f@.
-\begin{cfacode}
-void f(){
-  struct X x[((long unsigned int )10)];
-  // construct x
-  {
-    int _index0 = 0;
-    // construct with explicit initializers
-    {
-      if (_index0<10) ?{}(&x[_index0], 1, 2, 3);
-      ++_index0;
-      if (_index0<10) ?{}(&x[_index0], 4);
-      ++_index0;
-      if (_index0<10) ?{}(&x[_index0], 7, 8);
-      ++_index0;
-    }
-
-    // default construct remaining elements
-    for (;_index0<10;++_index0) {
-      ?{}(&x[_index0]);
-    }
-  }
-  // destruct x
-  {
-    int _index1 = 10-1;
-    for (;_index1>=0;--_index1) {
-      ^?{}(&x[_index1]);
-    }
-  }
-}
-\end{cfacode}
-Multidimensional arrays require more complexity.
-For example, a two dimensional array
-\begin{cfacode}
-void g() {
-  X x[10][10] = {
-    { { 1, 2, 3 }, { 4 } }, // x[0]
-    { { 7, 8 } }            // x[1]
-  };
-}\end{cfacode}
-Generates the following
-\begin{cfacode}
-void g(){
-  struct X x[10][10];
-  // construct x
-  {
-    int _index0 = 0;
-    for (;_index0<10;++_index0) {
-      {
-        int _index1 = 0;
-        // construct with explicit initializers
-        {
-          switch ( _index0 ) {
-            case 0:
-              // construct first array
-              if ( _index1<10 ) ?{}(&x[_index0][_index1], 1, 2, 3);
-              ++_index1;
-              if ( _index1<10 ) ?{}(&x[_index0][_index1], 4);
-              ++_index1;
-              break;
-            case 1:
-              // construct second array
-              if ( _index1<10 ) ?{}(&x[_index0][_index1], 7, 8);
-              ++_index1;
-              break;
-          }
-        }
-        // default construct remaining elements
-        for (;_index1<10;++_index1) {
-            ?{}(&x[_index0][_index1]);
-        }
-      }
-    }
-  }
-  // destruct x
-  {
-    int _index2 = 10-1;
-    for (;_index2>=0;--_index2) {
-      {
-        int _index3 = 10-1;
-        for (;_index3>=0;--_index3) {
-            ^?{}(&x[_index2][_index3]);
-        }
-      }
-    }
-  }
-}
-\end{cfacode}
-% It is possible to generate slightly simpler code for the switch cases, since the value of @_index1@ is known at compile-time within each case, however the procedure for generating constructor calls is complicated.
-% It is simple to remove the increment statements for @_index1@, but it is not simple to remove the
-%% technically, it's not hard either. I could easily downcast and change the second argument to ?[?], but is it really necessary/worth it??
-
-\subsection{Global Initialization}
-In standard C, global variables can only be initialized to compile-time constant expressions, which places strict limitations on the programmer's ability to control the default values of objects.
-In \CFA, constructors and destructors are guaranteed to be run on global objects, allowing arbitrary code to be run before and after the execution of the main routine.
-By default, objects within a translation unit are constructed in declaration order, and destructed in the reverse order.
-The default order of construction of objects amongst translation units is unspecified.
-It is, however, guaranteed that any global objects in the standard library are initialized prior to the initialization of any object in a user program.
-
-This feature is implemented in the \CFA translator by grouping every global constructor call into a function with the GCC attribute \emph{constructor}, which performs most of the heavy lifting \cite[6.31.1]{GCCExtensions}.
-A similar function is generated with the \emph{destructor} attribute, which handles all global destructor calls.
-At the time of writing, initialization routines in the library are specified with priority \emph{101}, which is the highest priority level that GCC allows, whereas initialization routines in the user's code are implicitly given the default priority level, which ensures they have a lower priority than any code with a specified priority level.
-This mechanism allows arbitrarily complicated initialization to occur before any user code runs, making it possible for library designers to initialize their modules without requiring the user to call specific startup or tear-down routines.
-
-For example, given the following global declarations.
-\begin{cfacode}
-struct X {
-  int y, z;
-};
-void ?{}(X *);
-void ?{}(X *, int, int);
-void ^?{}(X *);
-
-X a;
-X b = { 10, 3 };
-\end{cfacode}
-The following code is generated.
-\begin{cfacode}
-__attribute__ ((constructor)) static void _init_global_ctor(void){
-  ?{}(&a);
-  ?{}(&b, 10, 3);
-}
-__attribute__ ((destructor)) static void _destroy_global_ctor(void){
-  ^?{}(&b);
-  ^?{}(&a);
-}
-\end{cfacode}
-
-%   https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Attributes.html#C_002b_002b-Attributes
-% suggestion: implement this in CFA by picking objects with a specified priority and pulling them into their own init functions (could even group them by priority level -> map<int, list<ObjectDecl*>>) and pull init_priority forward into constructor and destructor attributes with the same priority level
-GCC provides an attribute @init_priority@ in \CC, which allows specifying the relative priority for initialization of global objects on a per-object basis.
-A similar attribute can be implemented in \CFA by pulling marked objects into global constructor/destructor-attribute functions with the specified priority.
-For example,
-\begin{cfacode}
-struct A { ... };
-void ?{}(A *, int);
-void ^?{}(A *);
-__attribute__((init_priority(200))) A x = { 123 };
-\end{cfacode}
-would generate
-\begin{cfacode}
-A x;
-__attribute__((constructor(200))) __init_x() {
-  ?{}(&x, 123);  // construct x with priority 200
-}
-__attribute__((destructor(200))) __destroy_x() {
-  ?{}(&x);       // destruct x with priority 200
-}
-\end{cfacode}
-
-\subsection{Static Local Variables}
-In standard C, it is possible to mark variables that are local to a function with the @static@ storage class.
-Unlike normal local variables, a @static@ local variable is defined to live for the entire duration of the program, so that each call to the function has access to the same variable with the same address and value as it had in the previous call to the function.
-Much like global variables, @static@ variables can only be initialized to a \emph{compile-time constant value} so that a compiler is able to create storage for the variable and initialize it at compile-time.
-
-Yet again, this rule is too restrictive for a language with constructors and destructors.
-Since the initializer expression is not necessarily a compile-time constant and can depend on the current execution state of the function, \CFA modifies the definition of a @static@ local variable so that objects are guaranteed to be live from the time control flow reaches their declaration, until the end of the program.
-Since standard C does not allow access to a @static@ local variable before the first time control flow reaches the declaration, this change does not preclude any valid C code.
-Local objects with @static@ storage class are only implicitly constructed and destructed once for the duration of the program.
-The object is constructed when its declaration is reached for the first time.
-The object is destructed once at the end of the program.
-
-Construction of @static@ local objects is implemented via an accompanying @static bool@ variable, which records whether the variable has already been constructed.
-A conditional branch checks the value of the companion @bool@, and if the variable has not yet been constructed then the object is constructed.
-The object's destructor is scheduled to be run when the program terminates using @atexit@ \footnote{When using the dynamic linker, it is possible to dynamically load and unload a shared library. Since glibc 2.2.3 \cite{atexit}, functions registered with @atexit@ within the shared library are called when unloading the shared library. As such, static local objects can be destructed using this mechanism even in shared libraries on Linux systems.}, and the companion @bool@'s value is set so that subsequent invocations of the function do not reconstruct the object.
-Since the parameter to @atexit@ is a parameter-less function, some additional tweaking is required.
-First, the @static@ variable must be hoisted up to global scope and uniquely renamed to prevent name clashes with other global objects.
-If necessary, a local structure may need to be hoisted, as well.
-Second, a function is built that calls the destructor for the newly hoisted variable.
-Finally, the newly generated function is registered with @atexit@, instead of registering the destructor directly.
-Since @atexit@ calls functions in the reverse order in which they are registered, @static@ local variables are guaranteed to be destructed in the reverse order that they are constructed, which may differ between multiple executions of the same program.
-Extending the previous example
-\begin{cfacode}
-int f(int x) {
-  static X a;
-  static X b = { x, x };  // depends on parameter value
-  static X c = b;         // depends on local variable
-}
-\end{cfacode}
-Generates the following.
-\begin{cfacode}
-static struct X a_static_var0;
-static void __a_dtor_atexit0(void){
-  ((void)^?{}(((struct X *)(&a_static_var0))));
-}
-static struct X b_static_var1;
-static void __b_dtor_atexit1(void){
-  ((void)^?{}(((struct X *)(&b_static_var1))));
-}
-static struct X c_static_var2;
-static void __c_dtor_atexit2(void){
-  ((void)^?{}(((struct X *)(&c_static_var2))));
-}
-int f(int x){
-  int _retval_f;
-  __attribute__ ((unused)) static void *_dummy0;
-  static _Bool __a_uninitialized = 1;
-  if ( __a_uninitialized ) {
-    ((void)?{}(((struct X *)(&a_static_var0))));
-    ((void)(__a_uninitialized=0));
-    ((void)atexit(__a_dtor_atexit0));
-  }
-
-  __attribute__ ((unused)) static void *_dummy1;
-  static _Bool __b_uninitialized = 1;
-  if ( __b_uninitialized ) {
-    ((void)?{}(((struct X *)(&b_static_var1)), x, x));
-    ((void)(__b_uninitialized=0));
-    ((void)atexit(__b_dtor_atexit1));
-  }
-
-  __attribute__ ((unused)) static void *_dummy2;
-  static _Bool __c_uninitialized = 1;
-  if ( __c_uninitialized ) {
-    ((void)?{}(((struct X *)(&c_static_var2)), b_static_var1));
-    ((void)(__c_uninitialized=0));
-    ((void)atexit(__c_dtor_atexit2));
-  }
-}
-\end{cfacode}
-
-This implementation comes at the runtime cost of an additional branch for every @static@ local variable, each time the function is called.
-Since initializers are not required to be compile-time constant expressions, they can involve global variables, function arguments, function calls, etc.
-As a direct consequence, @static@ local variables cannot be initialized with an attribute-constructor routines like global variables can.
-However, in the case where the variable is unmanaged and has a compile-time constant initializer, a C-compliant initializer is generated and the additional cost is not present.
-\CC shares the same semantics for its @static@ local variables.
-
-\subsection{Polymorphism}
-As mentioned in section \ref{sub:polymorphism}, \CFA currently has 3 type-classes that are used to designate polymorphic data types: @otype@, @dtype@, and @ftype@.
-In previous versions of \CFA, @otype@ was syntactic sugar for @dtype@ with known size/alignment information and an assignment function.
-That is,
-\begin{cfacode}
-forall(otype T)
-void f(T);
-\end{cfacode}
-was equivalent to
-\begin{cfacode}
-forall(dtype T | sized(T) | { T ?=?(T *, T); })
-void f(T);
-\end{cfacode}
-This allows easily specifying constraints that are common to all complete object-types very simply.
-
-Now that \CFA has constructors and destructors, more of a complete object's behaviour can be specified than was previously possible.
-As such, @otype@ has been augmented to include assertions for a default constructor, copy constructor, and destructor.
-That is, the previous example is now equivalent to
-\begin{cfacode}
-forall(dtype T | sized(T) |
-  { T ?=?(T *, T); void ?{}(T *); void ?{}(T *, T); void ^?{}(T *); })
-void f(T);
-\end{cfacode}
-These additions allow @f@'s body to create and destroy objects of type @T@, and pass objects of type @T@ as arguments to other functions, following the normal \CFA rules.
-A point of note here is that objects can be missing default constructors (and eventually other functions through deleted functions), so it is important for \CFA programmers to think carefully about the operations needed by their function, as to not over-constrain the acceptable parameter types and prevent potential reuse.
-
-These additional assertion parameters impose a runtime cost on all managed temporary objects created in polymorphic code, even those with trivial constructors and destructors.
-This cost is necessary because polymorphic code does not know the actual type at compile-time, due to separate compilation.
-Since trivial constructors and destructors either do not perform operations or are simply bit-wise copy operations, the imposed cost is essentially the cost of the function calls.
-
-\section{Summary}
-
-When creating a new object of a managed type, it is guaranteed that a constructor is be called to initialize the object at its definition point, and is destructed when the object's lifetime ends.
-Destructors are called in the reverse order of construction.
-
-Every argument passed to a function is copy constructed into a temporary object that is passed by value to the functions and destructed at the end of the statement.
-Function return values are copy constructed inside the function at the return statement, passed by value to the call-site, and destructed at the call-site at the end of the statement.
-
-Every complete object type has a default constructor, copy constructor, assignment operator, and destructor.
-To accomplish this, these functions are generated as appropriate for new types.
-User-defined functions shadow built-in and automatically generated functions, so it is possible to specialize the behaviour of a type.
-Furthermore, default constructors and aggregate field constructors are hidden when \emph{any} constructor is defined.
-
-Objects dynamically allocated with @malloc@, \ateq objects, and objects with only trivial constructors and destructors are unmanaged.
-Unmanaged objects are never the target of an implicit constructor or destructor call.
Index: doc/theses/rob/examples/conclusions/dtor.c
===================================================================
--- doc/theses/rob/examples/conclusions/dtor.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,11 +1,0 @@
-forall(otype T)
-struct Box {
-  T x;
-};
-forall(otype T) void ^?{}(Box(T) * x);
-
-forall(otype T)
-void f(T x) {
-  T y = x;
-  Box(T) z = { x };
-}
Index: doc/theses/rob/examples/conclusions/except.c
===================================================================
--- doc/theses/rob/examples/conclusions/except.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,20 +1,0 @@
-#include <stdio.h>
-typedef struct S {
-  int x;
-} S;
-
-void _dtor_S(S * s);
-//  {
-//   printf("called destructor!\n");
-// }
-
-void _ctor_S(struct S *s);
-//  {
-//   s->x = 123;
-// }
-
-int main() {
-  struct S _tmp3;
-  __attribute__((cleanup(_dtor_S))) struct S _tmp2 = (_ctor_S(&_tmp2), _tmp2);
-  printf("%d\n", _tmp2.x);
-}
Index: doc/theses/rob/examples/conclusions/except.cc
===================================================================
--- doc/theses/rob/examples/conclusions/except.cc	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,31 +1,0 @@
-#include <iostream>
-using namespace std;
-
-struct S {
-  int x;
-};
-
-void _dtor_S(S * s) {
-  cout << "called destructor!" << endl;
-}
-
-S f() {
-  throw 3;
-  return (S) { 0 };
-}
-
-void _ctor_S(struct S *s, struct S) {
-  s->x = 123;
-}
-
-int main() {
-  try {
-//    __attribute__((cleanup(_dtor_S))) S s = f();
-  struct S _tmp1;
-  struct S _tmp2 = (_ctor_S(&_tmp2, _tmp1), _tmp2);
-  cout << _tmp2.x << endl;
-
-  } catch(...) {
-
-  }
-}
Index: doc/theses/rob/examples/ctor/array_ctor.c
===================================================================
--- doc/theses/rob/examples/ctor/array_ctor.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,16 +1,0 @@
-struct X { int x, y, z; };
-void ?{}(X *);
-void ?{}(X *, int);
-void ?{}(X *, int, int);
-void ?{}(X *, int, int, int);
-void ^?{}(X *);
-void f() {
-  X x[10] = { { 1, 2, 3 }, { 4 }, { 7, 8 } };
-}
-
-void g() {
-  X x[10][10] = {
-    { { 1, 2, 3 }, { 4 } },
-    { { 7, 8 } }
-  };
-}
Index: doc/theses/rob/examples/ctor/copy_ctor.c
===================================================================
--- doc/theses/rob/examples/ctor/copy_ctor.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,14 +1,0 @@
-struct A;
-void ?{}(A *);
-void ?{}(A *, A);
-void ^?{}(A *);
-
-A f(A x) {
-  return x;
-}
-
-int main() {
-	A y, z @= {};
-	f(y);
-	f(z);
-}
Index: doc/theses/rob/examples/ctor/cv_ctor.c
===================================================================
--- doc/theses/rob/examples/ctor/cv_ctor.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,10 +1,0 @@
-struct S { int i; };
-void ?{}(S *, int);
-void ?{}(S *, S);
-
-int main() {
-  const int i = 5;
-  volatile int j = i;
-  const S s = { 11 };
-  volatile S s2 = s;
-}
Index: doc/theses/rob/examples/ctor/enum_ctor.c
===================================================================
--- doc/theses/rob/examples/ctor/enum_ctor.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,3 +1,0 @@
-enum Colour {
-  R, G, B
-};
Index: doc/theses/rob/examples/ctor/expr_ctor.c
===================================================================
--- doc/theses/rob/examples/ctor/expr_ctor.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,6 +1,0 @@
-struct X {};
-void ?{}(X *, double);
-
-int f() {
-  X * x = malloc(sizeof(X)){ 1.5 };
-}
Index: doc/theses/rob/examples/ctor/global_ctor.c
===================================================================
--- doc/theses/rob/examples/ctor/global_ctor.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,9 +1,0 @@
-struct X {
-  int y, z;
-};
-void ?{}(X *);
-void ?{}(X *, int, int);
-void ^?{}(X *);
-
-X a;
-X b = { 10, 3 };
Index: doc/theses/rob/examples/ctor/hide_ctor.c
===================================================================
--- doc/theses/rob/examples/ctor/hide_ctor.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,12 +1,0 @@
-struct S { int x; };
-
-int main() {
-  S s0; // okay
-  {
-    void ?{}(S * s, int i) { s->x = i*2; }
-    void ?{}(S *s) { }
-//    void ^?{}(S *s ) { }
-    S s1; // error
-  }
-  S s2; // okay
-}
Index: doc/theses/rob/examples/ctor/member.c
===================================================================
--- doc/theses/rob/examples/ctor/member.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,26 +1,0 @@
-struct T {
-  int x;
-};
-const int val = 12223344;
-void ?{}(T * t) {
-  if (t->x == val) printf("uh-oh, constructed twice!\n");
-  t->x = val;
-}
-
-struct S {
-  T t1, t2;
-};
-
-void ?{}(S * this) {
-  // construct both members
-}
-
-void ?{}(S * this, int x) {
-  // forward
-  ?{}(this);
-  ?{}(&this->t1);
-}
-
-int main() {
-  S s = 5;
-}
Index: doc/theses/rob/examples/ctor/placement_ctor.c
===================================================================
--- doc/theses/rob/examples/ctor/placement_ctor.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,51 +1,0 @@
-struct memory_pool {
-  char * start;
-  char * cur;
-  size_t size;
-  char * free;
-};
-
-void ?{}(memory_pool * pool, size_t size) {
-  pool->[start, cur] = malloc(size);
-  pool->size = size;
-  printf("initializing memory pool with size %lu at location %p\n", pool->size, pool->start);
-}
-
-void ^?{}(memory_pool * pool) {
-  free(pool->start);
-}
-
-forall(dtype T | sized(T))
-T * allocate(memory_pool * pool, unsigned int array_size = 1) {
-  size_t size = sizeof(T) * array_size;
-  printf("allocating block of size %lu...", size);
-  if (pool->cur + size < pool->start + pool->size) {
-    T * x = (T*)pool->cur;
-    pool->cur += size;
-    printf("success!\n");
-    printf("next address is %p\n", pool->cur);
-    return x;
-  } else {
-    printf("failed!\n");
-    // fail to allocate
-    return 0;
-  }
-}
-
-struct A {
-  int x, y, z;
-};
-void ?{}(A * a) {
-  a->[x,y,z] = [123, 456, 789];
-}
-
-int main() {
-  memory_pool pool = { 1024 };
-
-  int * x = allocate(&pool);
-  A * a = allocate(&pool);
-  A * b = allocate(&pool, 1000);
-  a{};
-  printf("%p\n", x);
-  printf("%p %d %d %d\n", a, a->[x,y,z]);
-}
Index: doc/theses/rob/examples/ctor/return_dtor.c
===================================================================
--- doc/theses/rob/examples/ctor/return_dtor.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,20 +1,0 @@
-struct A;
-void ?{}(A *);
-void ^?{}(A *);
-
-void f(int i) {
-  A x;  // construct x
-  {
-    A y; // construct y
-    {
-      A z; // construct z
-      {
-        if (i == 0) return; // destruct x, y, z
-      }
-      if (i == 1) return; // destruct x, y, z
-      // destruct z
-    }
-    if (i == 2) return; // destruct x, y
-    // destruct y
-  }
-}
Index: doc/theses/rob/examples/ctor/static_ctor.c
===================================================================
--- doc/theses/rob/examples/ctor/static_ctor.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,12 +1,0 @@
-struct X {
-  int y, z;
-};
-void ?{}(X *);
-void ?{}(X *, int, int);
-void ^?{}(X *);
-
-int f(int x) {
-  static X a;
-  static X b = { x, x };
-  static X c = b;
-}
Index: doc/theses/rob/examples/ctor/union_ctor.c
===================================================================
--- doc/theses/rob/examples/ctor/union_ctor.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,6 +1,0 @@
-struct Y { int a; };
-struct Z { double z; };
-union X {
-  Y y;
-  Z z;
-};
Index: doc/theses/rob/examples/intro/FileOutputStream.java
===================================================================
--- doc/theses/rob/examples/intro/FileOutputStream.java	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,38 +1,0 @@
-import java.io.IOException;
-import java.io.FileNotFoundException;
-
-public class FileOutputStream implements AutoCloseable {
-	public static int throwOnWrite;
-	public static int throwOnClose;
-	public static int throwOnOpen;
-
-	public static int numWrites;
-	public static int numCloses;
-	public static int numOpens;
-
-	private String filename;
-	private <EX extends Throwable> void doexcept(EX ex, boolean pred) throws EX {
-		if (pred) {
-			System.out.println("Stream: " + filename + " threw exception: " + ex);
-			throw ex;
-		}
-	}
-
-	public FileOutputStream(String filename) throws FileNotFoundException {
-		doexcept(new FileNotFoundException(), throwOnOpen == ++numOpens);
-		System.out.println("Opened file: " + filename);
-		this.filename = filename;
-	}
-	public void write(byte[] bytes) throws IOException {
-		doexcept(new IOException(), throwOnWrite == ++numWrites);
-		System.out.println("wrote message: " + new String(bytes) + " to file: " + filename);
-	}
-	public void close() throws IOException {
-		System.out.println("Closing file: " + filename);
-		filename = null;
-		doexcept(new IOException(), throwOnClose == ++numCloses);
-	}
-	protected void finalize() {
-		if (filename != null) System.out.println("Finalize closing file: " + filename);
-	}
-}
Index: doc/theses/rob/examples/intro/compound_lit.c
===================================================================
--- doc/theses/rob/examples/intro/compound_lit.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,16 +1,0 @@
-int printf(const char *, ...);
-
-struct A { int x, y; };
-int f(struct A a, int z) {
-	printf("%d %d %d\n", a.x, a.y, z);
-}
-int g(int * x) {
-	if (x == 0) printf("NULL\n");
-	else printf("%d\n", *x);
-}
-
-int main() {
-	f((struct A){ 3, 4 }, (int){ 5 } = 10);
-	g((int[]){ 1, 2, 3 });
-	g(&(int){ 0 });
-}
Index: doc/theses/rob/examples/intro/designation.c
===================================================================
--- doc/theses/rob/examples/intro/designation.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,24 +1,0 @@
-int printf(const char *, ...);
-
-struct A {
-  int w, x, y, z;
-};
-
-void print(struct A a) {
-	printf("{ %d, %d, %d, %d }\n", a.w, a.x, a.y, a.z);
-}
-
-int main() {
-	struct A a0 = { .x=4, .z=1, .x=8 };
-	struct A a1 = { 1, .y=7, 6 };
-	struct A a2[3] = { [2]=a0, [0]=a1, { .z=3 } };
-
-	print(a0);
-	print(a1);
-	printf("{\n");
-	for (int i = 0; i < 3; i++) {
-		printf("  ");
-		print(a2[i]);
-	}
-	printf("}\n");
-}
Index: doc/theses/rob/examples/intro/ignore.c
===================================================================
--- doc/theses/rob/examples/intro/ignore.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,22 +1,0 @@
-struct __ignore_t__ {
-};
-__ignore_t__ __ignore__;
-
-forall(dtype T | sized(T))
-__ignore_t__ ?=?(__ignore_t__ * dst, T src) {
-	return *dst;
-}
-
-forall(dtype T | sized(T) | { void ?{}(T *, T); })
-T ?=?(T * dst, __ignore_t__ src) {
-	return *dst;
-}
-
-int main() {
-	int x = 123, y = 456, z = 789;
-	double j = 3.14, i = 8.77;
-	[x, __ignore__, z] = [y, z, x];
-	[i, j, __ignore__] = [0, i, j];
-	printf("%d %d %d\n", x, y, z);
-	printf("%g %g\n", i, j);
-}
Index: doc/theses/rob/examples/intro/ires.java
===================================================================
--- doc/theses/rob/examples/intro/ires.java	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,3 +1,0 @@
-public interface ires {
-	public void write(String filename, String msg) throws Exception;
-}
Index: doc/theses/rob/examples/intro/res.java
===================================================================
--- doc/theses/rob/examples/intro/res.java	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,34 +1,0 @@
-public class res {
-	private ires res;
-	public res(ires res) {
-		this.res = res;
-	}
-
-	public void dotest(String msg, int open, int write, int close) {
-		try {
-			System.out.println(msg);
-			FileOutputStream.throwOnOpen = open;
-			FileOutputStream.throwOnWrite = write;
-			FileOutputStream.throwOnClose = close;
-			res.write("foo.txt", "output message");
-		} catch (Exception ex) {
-		}
-		FileOutputStream.numOpens = 0;
-		FileOutputStream.numWrites = 0;
-		FileOutputStream.numCloses = 0;
-		System.gc();
-		System.runFinalization();
-		System.out.println();
-		System.out.flush();
-	}
-
-	public static void dotest(ires res) {
-		res r = new res(res);
-		r.dotest("Exception on open 1",  1, 0, 0);
-		r.dotest("Exception on open 2",  2, 0, 0);
-		r.dotest("Exception on write 1", 0, 1, 0);
-		r.dotest("Exception on write 2", 0, 2, 0);
-		r.dotest("Exception on close 1", 0, 0, 1);
-		r.dotest("Exception on close 2", 0, 0, 2);
-	}
-}
Index: doc/theses/rob/examples/intro/res1.java
===================================================================
--- doc/theses/rob/examples/intro/res1.java	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,16 +1,0 @@
-import java.io.IOException;
-
-public class res1 implements ires {
-	public void write(String filename, String msg) throws IOException {
-	  FileOutputStream out = new FileOutputStream(filename);  // may throw FileNotFoundException
-	  FileOutputStream log = new FileOutputStream("log.txt"); //  or SecurityException
-	  out.write(msg.getBytes()); // may throw an IOException
-	  log.write(msg.getBytes()); // may throw an IOException
-	  log.close(); // may throw an IOException
-	  out.close(); // may throw an IOException
-	}
-
-	public static void main(String[] args) {
-		res.dotest(new res1());
-	}
-}
Index: doc/theses/rob/examples/intro/res2.java
===================================================================
--- doc/theses/rob/examples/intro/res2.java	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,22 +1,0 @@
-import java.io.IOException;
-
-public class res2 implements ires {
-  public void write(String filename, String msg) throws Exception {
-    FileOutputStream out = new FileOutputStream(filename); // may throw FileNotFoundException
-    try {
-      FileOutputStream log = new FileOutputStream("log.txt"); //  or SecurityException
-      try {
-        out.write(msg.getBytes()); // may throw an IOException
-        log.write(msg.getBytes()); // may throw an IOException
-      } finally {
-        log.close(); // may throw an IOException
-      }
-    } finally {
-      out.close(); // may throw an IOException
-    }
-  }
-
-  public static void main(String[] args) {
-    res.dotest(new res2());
-  }
-}
Index: doc/theses/rob/examples/intro/res3.java
===================================================================
--- doc/theses/rob/examples/intro/res3.java	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,17 +1,0 @@
-import java.io.IOException;
-
-public class res3 implements ires {
-  public void write(String filename, String msg) throws Exception {
-    try (
-      FileOutputStream out = new FileOutputStream(filename); // may throw FileNotFoundException
-      FileOutputStream log = new FileOutputStream("log.txt"); //  or SecurityException
-    ) {
-      out.write(msg.getBytes()); // may throw an IOException
-      log.write(msg.getBytes()); // may throw an IOException
-    }
-  }
-
-  public static void main(String[] args) {
-    res.dotest(new res3());
-  }
-}
Index: doc/theses/rob/examples/intro/tuple.cc
===================================================================
--- doc/theses/rob/examples/intro/tuple.cc	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,10 +1,0 @@
-#include <iostream>
-#include <tuple>
-using namespace std;
-
-int main() {
-	tuple<int, int, int> triple(10, 20, 30);
-	cout << get<1>(triple) << endl;
-	tuple_element<2, tuple<int, float, double>>::type x = 3.14;
-	cout << tuple_size<decltype(triple)>::value << endl;
-}
Index: doc/theses/rob/examples/intro/variadic.java
===================================================================
--- doc/theses/rob/examples/intro/variadic.java	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,25 +1,0 @@
-class variadic {
-  int sum(int... args) {
-    int s = 0;
-    for (int x : args) {
-      s += x;
-    }
-    print(args.length, " ", args[0], " ", args[args.length-1], "\n");
-    return s;
-  }
-
-  void print(Object... objs) {
-    for (Object obj : objs) {
-      System.out.print(obj);
-    }
-  }
-
-  public void run() {
-    print("The sum from 1 to 10 is ", sum(1,2,3,4,5,6,7,8,9,10), ".\n");
-    print(sum(new int[]{1, 2,3}), "\n");
-  }
-
-  public static void main(String args[]) {
-    new variadic().run();
-  }
-}
Index: doc/theses/rob/examples/malloc.cc
===================================================================
--- doc/theses/rob/examples/malloc.cc	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,20 +1,0 @@
-#include <cstdlib>
-#include <iostream>
-using namespace std;
-
-class A {
-public:
-  A() {
-    cout << "A()" << endl;  
-  }
-  ~A(){
-    cout << "~A()" << endl;
-  }
-};
-
-int main() {
-  A * x = (A*)malloc(sizeof(A));
-  A * y = new A;
-  delete y;
-  free(x);
-}
Index: doc/theses/rob/examples/nested.c
===================================================================
--- doc/theses/rob/examples/nested.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,8 +1,0 @@
-struct S {
-  int x;
-};
-void ^?{}(S * s) { }
-
-int main() {
-  [S, [S, S]] x;
-}
Index: doc/theses/rob/examples/poly.c
===================================================================
--- doc/theses/rob/examples/poly.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,14 +1,0 @@
-forall(dtype T)
-void foo(T x) {
-
-}
-
-forall(dtype T)
-void bar(T * y) { }
-
-int main() {
-  foo(5);
-  foo("baz");
-  foo(foo);
-  bar(foo);
-}
Index: doc/theses/rob/examples/scope_guard.h
===================================================================
--- doc/theses/rob/examples/scope_guard.h	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,29 +1,0 @@
-#ifndef SCOPE_GUARD_H
-#define SCOPE_GUARD_H
-
-struct ScopeGuard {
-  void (*fn)(void *);
-  // Args args;
-};
-
-// forall(ttype Args, ttype Ret)
-// void ?{}(ScopeGuard(Args, Ret) * this) {
-void ?{}(ScopeGuard * this) {
-
-}
-
-// // inline
-// forall(ttype Args, ttype Ret)
-// void ?{}(ScopeGuard(Args, Ret) * this, Ret (*fn)(Args), Args args) {
-//   this->fn = fn;
-//   // this->args = args;
-// }
-
-// inline
-// forall(ttype Args, ttype Ret)
-// void ^?{}(ScopeGuard(Args, Ret) * this) {
-void ^?{}(ScopeGuard * this) {
-  this->fn(0);
-}
-
-#endif
Index: doc/theses/rob/examples/test_scoped_guard.c
===================================================================
--- doc/theses/rob/examples/test_scoped_guard.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,12 +1,0 @@
-#include "scope_guard.h"
-
-extern "C" {
-  void free(void *);
-}
-
-int main() {
-  int * x = malloc(sizeof(10));
-  // ScopeGuard(int*, void) foo;
-  ScopeGuard foo;
-  foo.fn = free;
-}
Index: doc/theses/rob/examples/tuples/assign.c
===================================================================
--- doc/theses/rob/examples/tuples/assign.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,9 +1,0 @@
-int x, z;
-double y;
-[double, double] f();
-
-int main () {
-  [x, y, z] = [f(), 3];       // multiple assignment
-  // [x, y, z] = 1.5;            // mass assignment
-}
-
Index: doc/theses/rob/examples/tuples/cast.c
===================================================================
--- doc/theses/rob/examples/tuples/cast.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,10 +1,0 @@
-[int, int, int] f();
-[int, [int, int], int] g();
-
-int main() {
-  ([int, double])f();           // (1)
-  ([int, [int], int])g();         // (2)
-  printf("%d %d\n", ([void, [int, int]])g());      // (3) -- should work and doesn't -- tries to construct void object, but should ignore that component in terms of the type of the tuple
-  // ([int, int, int, int])g();    // (4) -- should not work and doesn't
-  // ([int, [int, int, int]])g();  // (5) -- should not work and doesn't
-}
Index: doc/theses/rob/examples/tuples/ctor.c
===================================================================
--- doc/theses/rob/examples/tuples/ctor.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,10 +1,0 @@
-struct S { int x; double y; };
-[void] ?{}(* [int, double] this, S s) {
-  this->0 = s.x;
-  this->1 = s.y;
-}
-int main() {
-  S s = { 123, 345 };
-  [int, double] x = s;
-  printf("%d %g\n", x);
-}
Index: doc/theses/rob/examples/tuples/mrv.c
===================================================================
--- doc/theses/rob/examples/tuples/mrv.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,2 +1,0 @@
-[int, int] foo();
-[double, int] bar();
Index: doc/theses/rob/examples/tuples/mrv_1.c
===================================================================
--- doc/theses/rob/examples/tuples/mrv_1.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,34 +1,0 @@
-#include <stdio.h>
-#include <ctype.h>
-struct mf_ret {
-  int freq;
-  char ch;
-};
-
-struct mf_ret most_frequent(const char * str) {
-  char freqs [26] = { 0 };
-  struct mf_ret ret = { 0, 'a' };
-  for (int i = 0; str[i] != '\0'; ++i) {
-    if (isalpha(str[i])) {        // only count letters
-      int ch = tolower(str[i]);   // convert to lower case
-      int idx = ch-'a';
-      if (++freqs[idx] > ret.freq) {  // update on new max
-        ret.freq = freqs[idx];
-        ret.ch = ch;
-      }
-    }
-  }
-  return ret;
-}
-
-void dothing(const char * str) {
-  struct mf_ret ret = most_frequent(str);
-  printf("%s -- %d %c\n", str, ret.freq, ret.ch);
-}
-
-int main() {
-  dothing("hello");
-  dothing("hello, world!");
-  dothing("aaabbbba");
-  dothing("");
-}
Index: doc/theses/rob/examples/tuples/mrv_2.c
===================================================================
--- doc/theses/rob/examples/tuples/mrv_2.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,31 +1,0 @@
-#include <stdio.h>
-#include <ctype.h>
-
-int most_frequent(const char * str, char * ret_ch) {
-  char freqs [26] = { 0 };
-  int ret_freq = 0;
-  for (int i = 0; str[i] != '\0'; ++i) {
-    if (isalpha(str[i])) {        // only count letters
-      int ch = tolower(str[i]);   // convert to lower case
-      int idx = ch-'a';
-      if (++freqs[idx] > ret_freq) {  // update on new max
-        ret_freq = freqs[idx];
-        *ret_ch = ch;
-      }
-    }
-  }
-  return ret_freq;
-}
-
-void dothing(const char * str) {
-  char ch;
-  int freq = most_frequent(str, &ch);
-  printf("%s -- %d %c\n", str, freq, ch);
-}
-
-int main() {
-  dothing("hello");
-  dothing("hello, world!");
-  dothing("aaabbbba");
-  dothing("");
-}
Index: doc/theses/rob/examples/tuples/mrv_3.c
===================================================================
--- doc/theses/rob/examples/tuples/mrv_3.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,33 +1,0 @@
-#include <stdio.h>
-#include <ctype.h>
-
-[int, char] most_frequent(const char * str) {
-  char freqs [26] = { 0 };
-  int ret_freq = 0;
-  char ret_ch = 'a';
-  for (int i = 0; str[i] != '\0'; ++i) {
-    if (isalpha(str[i])) {        // only count letters
-      int ch = tolower(str[i]);   // convert to lower case
-      int idx = ch-'a';
-      if (++freqs[idx] > ret_freq) {  // update on new max
-        ret_freq = freqs[idx];
-        ret_ch = ch;
-      }
-    }
-  }
-  return [ret_freq, ret_ch];
-}
-
-void dothing(const char * str) {
-  int freq;
-  char ch;
-  [freq, ch] = most_frequent(str);
-  printf("%s -- %d %c\n", str, ret_freq, ret_ch);
-}
-
-int main() {
-  dothing("hello");
-  dothing("hello, world!");
-  dothing("aaabbbba");
-  dothing("");
-}
Index: doc/theses/rob/examples/tuples/named.c
===================================================================
--- doc/theses/rob/examples/tuples/named.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,6 +1,0 @@
-typedef [int x, int y] Point2D;
-Point2D p1, p2;
-int main() {
-  p1.x + p1.y + p2.x + p2.y;
-  p1.0 + p1.1 + p2.0 + p2.1;  // equivalent
-}
Index: doc/theses/rob/examples/variadic/new.c
===================================================================
--- doc/theses/rob/examples/variadic/new.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,13 +1,0 @@
-forall(dtype T | sized(T)) T * malloc(void);
-
-forall(dtype T, ttype Params | sized(T) | { void ?{}(T *, Params); })
-T * new(Params p) {
-  return ((T*)malloc()){ p }; // construct result of malloc
-}
-
-struct S { int x, y; }; 
-void ?{}(S *, int, int);
-
-int main() {
-  S * s = new(3, 4);
-}
Index: doc/theses/rob/examples/variadic/print.c
===================================================================
--- doc/theses/rob/examples/variadic/print.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,11 +1,0 @@
-forall(otype T, ttype Params |
-  { void print(T); void print(Params); })
-void print(T arg, Params rest) {
-  print(arg);
-  print(rest);
-}
-void print(const char * x) { printf("%s", x); }
-void print(int x) { printf("%d", x);  }
-int main() {
-  print("x = ", 123, ".");
-}
Index: doc/theses/rob/examples/variadic/sum1.c
===================================================================
--- doc/theses/rob/examples/variadic/sum1.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,8 +1,0 @@
-int sum(void){ return 0; }        // (0)
-forall(ttype Params | { int sum(Params); })
-int sum(int x, Params rest) { // (1)
-  return x+sum(rest);
-}
-int main() {
-  printf("%d\n", sum(10, 20, 30, 40, 50, 60));
-}
Index: doc/theses/rob/examples/variadic/sum2.c
===================================================================
--- doc/theses/rob/examples/variadic/sum2.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,10 +1,0 @@
-int sum(int x, int y){
-  return x+y;
-}
-forall(ttype Params | { int sum(int, Params); })
-int sum(int x, int y, Params rest) {
-  return sum(x+y, rest);
-}
-int main() {
-  printf("%d\n", sum(10, 20, 30, 40, 50, 60));
-}
Index: doc/theses/rob/intro.tex
===================================================================
--- doc/theses/rob/intro.tex	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,910 +1,0 @@
-%======================================================================
-\chapter{Introduction}
-%======================================================================
-
-\section{\protect\CFA Background}
-\label{s:background}
-\CFA \footnote{Pronounced ``C-for-all'', and written \CFA or Cforall.} is a modern non-object-oriented extension to the C programming language.
-As it is an extension of C, there is already a wealth of existing C code and principles that govern the design of the language.
-Among the goals set out in the original design of \CFA, four points stand out \cite{Bilson03}.
-\begin{enumerate}
-\item The behaviour of standard C code must remain the same when translated by a \CFA compiler as when translated by a C compiler.
-\item Standard C code must be as fast and as small when translated by a \CFA compiler as when translated by a C compiler.
-\item \CFA code must be at least as portable as standard C code.
-\item Extensions introduced by \CFA must be translated in the most efficient way possible.
-\end{enumerate}
-Therefore, these design principles must be kept in mind throughout the design and development of new language features.
-In order to appeal to existing C programmers, great care must be taken to ensure that new features naturally feel like C.
-These goals ensure existing C code-bases can be converted to \CFA incrementally with minimal effort, and C programmers can productively generate \CFA code without training beyond the features being used.
-Unfortunately, \CC is actively diverging from C, so incremental additions require significant effort and training, coupled with multiple legacy design-choices that cannot be updated.
-
-The current implementation of \CFA is a source-to-source translator from \CFA to GNU C \cite{GCCExtensions}.
-
-The remainder of this section describes some of the important features that currently exist in \CFA, to give the reader the necessary context in which the new features presented in this thesis must dovetail.
-
-\subsection{C Background}
-\label{sub:c_background}
-In the context of this work, the term \emph{object} refers to a region of data storage in the execution environment, the contents of which can represent values \cite[p.~6]{C11}.
-
-One of the lesser-known features of standard C is \emph{designations}.
-Designations are similar to named parameters in languages such as Python and Scala, except that they only apply to aggregate initializers.
-Note that in \CFA, designations use a colon separator, rather than an equals sign as in C, because this syntax is one of the few places that conflicts with the new language features.
-\begin{cfacode}
-struct A {
-  int w, x, y, z;
-};
-A a0 = { .x:4 .z:1, .x:8 };
-A a1 = { 1, .y:7, 6 };
-A a2[4] = { [2]:a0, [0]:a1, { .z:3 } };
-// equivalent to
-// A a0 = { 0, 8, 0, 1 };
-// A a1 = { 1, 0, 7, 6 };
-// A a2[4] = { a1, { 0, 0, 0, 3 }, a0, { 0, 0, 0, 0 } };
-\end{cfacode}
-Designations allow specifying the field to initialize by name, rather than by position.
-Any field not explicitly initialized is initialized as if it had static storage duration \cite[p.~141]{C11}.
-A designator specifies the current object for initialization, and as such any undesignated sub-objects pick up where the last initialization left off.
-For example, in the initialization of @a1@, the initializer of @y@ is @7@, and the unnamed initializer @6@ initializes the next sub-object, @z@.
-Later initializers override earlier initializers, so a sub-object for which there is more than one initializer is only initialized by its last initializer.
-These semantics can be seen in the initialization of @a0@, where @x@ is designated twice, and thus initialized to @8@.
-
-C also provides \emph{compound literal} expressions, which provide a first-class mechanism for creating unnamed objects.
-\begin{cfacode}
-struct A { int x, y; };
-int f(A, int);
-int g(int *);
-
-f((A){ 3, 4 }, (int){ 5 } = 10);
-g((int[]){ 1, 2, 3 });
-g(&(int){ 0 });
-\end{cfacode}
-Compound literals create an unnamed object, and result in an lvalue, so it is legal to assign a value into a compound literal or to take its address \cite[p.~86]{C11}.
-Syntactically, compound literals look like a cast operator followed by a brace-enclosed initializer, but semantically are different from a C cast, which only applies basic conversions and coercions and is never an lvalue.
-
-The \CFA translator makes use of several GNU C extensions, including \emph{nested functions} and \emph{attributes}.
-Nested functions make it possible to access data that is lexically in scope in the nested function's body.
-\begin{cfacode}
-int f() {
-  int x = 0;
-  void g() {
-    x++;
-  }
-  g();  // changes x
-}
-\end{cfacode}
-Nested functions come with the usual C caveat that they should not leak into the containing environment, since they are only valid as long as the containing function's stack frame is active.
-
-Attributes make it possible to inform the compiler of certain properties of the code.
-For example, a function can be marked as deprecated, so that legacy APIs can be identified and slowly removed, or as \emph{hot}, so that the compiler knows the function is called frequently and should be aggresively optimized.
-\begin{cfacode}
-__attribute__((deprecated("foo is deprecated, use bar instead")))
-void foo();
-__attribute__((hot)) void bar(); // heavily optimized
-
-foo();  // warning
-bar();
-\end{cfacode}
-
-\subsection{Overloading}
-\label{sub:overloading}
-Overloading is the ability to specify multiple entities with the same name.
-The most common form of overloading is function overloading, wherein multiple functions can be defined with the same name, but with different signatures.
-C provides a small amount of built-in overloading, \eg + is overloaded for the basic types.
-Like in \CC, \CFA allows user-defined overloading based both on the number of parameters and on the types of parameters.
-\begin{cfacode}
-void f(void);  // (1)
-void f(int);   // (2)
-void f(char);  // (3)
-
-f('A');        // selects (3)
-\end{cfacode}
-In this case, there are three @f@ procedures, where @f@ takes either 0 or 1 arguments, and if an argument is provided then it may be of type @int@ or of type @char@.
-Exactly which procedure is executed depends on the number and types of arguments passed.
-If there is no exact match available, \CFA attempts to find a suitable match by examining the C built-in conversion heuristics.
-The \CFA expression resolution algorithm uses a cost function to determine the interpretation that uses the fewest conversions and polymorphic type bindings.
-\begin{cfacode}
-void g(long long);
-
-g(12345);
-\end{cfacode}
-In the above example, there is only one instance of @g@, which expects a single parameter of type @long long@.
-Here, the argument provided has type @int@, but since all possible values of type @int@ can be represented by a value of type @long long@, there is a safe conversion from @int@ to @long long@, and so \CFA calls the provided @g@ routine.
-
-Overloading solves the problem present in C where there can only be one function with a given name, requiring multiple names for functions that perform the same operation but take in different types.
-This can be seen in the example of the absolute value functions C:
-\begin{cfacode}
-// stdlib.h
-int abs(int);
-long int labs(long int);
-long long int llabs(long long int);
-\end{cfacode}
-In \CFA, the functions @labs@ and @llabs@ are replaced by appropriate overloads of @abs@.
-
-In addition to this form of overloading, \CFA also allows overloading based on the number and types of \emph{return} values.
-This extension is a feature that is not available in \CC, but is available in other programming languages such as Ada \cite{Ada95}.
-\begin{cfacode}
-int g();         // (1)
-double g();      // (2)
-
-int x = g();     // selects (1)
-\end{cfacode}
-Here, the only difference between the signatures of the different versions of @g@ is in the return values.
-The result context is used to select an appropriate routine definition.
-In this case, the result of @g@ is assigned into a variable of type @int@, so \CFA prefers the routine that returns a single @int@, because it is an exact match.
-
-Return-type overloading solves similar problems to parameter-list overloading, in that multiple functions that perform similar operations can have the same, but produce different values.
-One use case for this feature is to provide two versions of the @bsearch@ routine:
-\begin{cfacode}
-forall(otype T | { int ?<?( T, T ); })
-T * bsearch(T key, const T * arr, size_t dimension) {
-  int comp(const void * t1, const void * t2) {
-    return *(T *)t1 < *(T *)t2 ? -1 : *(T *)t2 < *(T *)t1 ? 1 : 0;
-  }
-  return (T *)bsearch(&key, arr, dimension, sizeof(T), comp);
-}
-forall(otype T | { int ?<?( T, T ); })
-unsigned int bsearch(T key, const T * arr, size_t dimension) {
-  T *result = bsearch(key, arr, dimension);
-  // pointer subtraction includes sizeof(T)
-  return result ? result - arr : dimension;
-}
-double key = 5.0;
-double vals[10] = { /* 10 floating-point values */ };
-
-double * val = bsearch( 5.0, vals, 10 ); // selection based on return type
-int posn = bsearch( 5.0, vals, 10 );
-\end{cfacode}
-The first version provides a thin wrapper around the C @bsearch@ routine, converting untyped @void *@ to the polymorphic type @T *@, allowing the \CFA compiler to catch errors when the type of @key@, @arr@, and the target at the call-site do not agree.
-The second version provides an alternate return of the index in the array of the selected element, rather than its address.
-
-There are times when a function should logically return multiple values.
-Since a function in standard C can only return a single value, a programmer must either take in additional return values by address, or the function's designer must create a wrapper structure to package multiple return-values.
-For example, the first approach:
-\begin{cfacode}
-int f(int * ret) {        // returns a value through parameter ret
-  *ret = 37;
-  return 123;
-}
-
-int res1, res2;           // allocate return value
-int res1 = g(&res2);      // explicitly pass storage
-\end{cfacode}
-is awkward because it requires the caller to explicitly allocate memory for $n$ result variables, even if they are only temporary values used as a subexpression, or even not used at all.
-The second approach:
-\begin{cfacode}
-struct A {
-  int x, y;
-};
-struct A g() {            // returns values through a structure
-  return (struct A) { 123, 37 };
-}
-struct A res3 = g();
-... res3.x ... res3.y ... // use result values
-\end{cfacode}
-is awkward because the caller has to either learn the field names of the structure or learn the names of helper routines to access the individual return values.
-Both approaches are syntactically unnatural.
-
-In \CFA, it is possible to directly declare a function returning multiple values.
-This extension provides important semantic information to the caller, since return values are only for output.
-\begin{cfacode}
-[int, int] f() {       // no new type
-  return [123, 37];
-}
-\end{cfacode}
-However, the ability to return multiple values is useless without a syntax for accepting the results from the function.
-
-In standard C, return values are most commonly assigned directly into local variables, or are used as the arguments to another function call.
-\CFA allows both of these contexts to accept multiple return values.
-\begin{cfacode}
-int res1, res2;
-[res1, res2] = f();    // assign return values into local variables
-
-void g(int, int);
-g(f());                // pass both return values of f to g
-\end{cfacode}
-As seen in the example, it is possible to assign the results from a return value directly into local variables.
-These local variables can be referenced naturally, without requiring any unpacking as in structured return values.
-Perhaps more interesting is the fact that multiple return values can be passed to multiple parameters seamlessly, as in the call @g(f())@.
-In this call, the return values from @f@ are linked to the parameters of @g@ so that each of the return values is passed directly to the corresponding parameter of @g@, without any explicit storing, unpacking, or additional naming.
-
-An extra quirk introduced by multiple return values is in the resolution of function calls.
-\begin{cfacode}
-int f();            // (1)
-[int, int] f();     // (2)
-
-void g(int, int);
-
-int x, y;
-[x, y] = f();       // selects (2)
-g(f());             // selects (2)
-\end{cfacode}
-In this example, the only possible call to @f@ that can produce the two @int@s required for assigning into the variables @x@ and @y@ is the second option.
-A similar reasoning holds calling the function @g@.
-
-This duality between aggregation and aliasing can be seen in the C standard library in the @div@ and @remquo@ functions, which return the quotient and remainder for a division of integer and floating-point values, respectively.
-\begin{cfacode}
-typedef struct { int quo, rem; } div_t; // from stdlib.h
-div_t div( int num, int den );
-double remquo( double num, double den, int * quo );
-div_t qr = div( 13, 5 );            // return quotient/remainder aggregate
-int q;
-double r = remquo( 13.5, 5.2, &q ); // return remainder, alias quotient
-\end{cfacode}
-@div@ aggregates the quotient/remainder in a structure, while @remquo@ aliases a parameter to an argument.
-Alternatively, a programming language can directly support returning multiple values, \eg in \CFA:
-\begin{lstlisting}
-[int, int] div(int num, int den);               // return two integers
-[double, double] div( double num, double den ); // return two doubles
-int q, r;                     // overloaded variable names
-double q, r;
-[q, r] = div(13, 5);          // select appropriate div and q, r
-[q, r] = div(13.5, 5.2);
-\end{lstlisting}
-
-In \CFA, overloading also applies to operator names, known as \emph{operator overloading}.
-Similar to function overloading, a single operator is given multiple meanings by defining new versions of the operator with different signatures.
-In \CC, this can be done as follows
-\begin{cppcode}
-struct A { int i; };
-A operator+(A x, A y);
-bool operator<(A x, A y);
-\end{cppcode}
-
-In \CFA, the same example can be written as follows.
-\begin{cfacode}
-struct A { int i; };
-A ?+?(A x, A y);    // '?'s represent operands
-int ?<?(A x, A y);
-\end{cfacode}
-Notably, the only difference is syntax.
-Most of the operators supported by \CC for operator overloading are also supported in \CFA.
-Of notable exception are the logical operators (\eg @||@), the sequence operator (\ie @,@), and the member-access operators (\eg @.@ and \lstinline{->}).
-
-Finally, \CFA also permits overloading variable identifiers.
-This feature is not available in \CC.
-\begin{cfacode}
-struct Rational { int numer, denom; };
-int x = 3;               // (1)
-double x = 1.27;         // (2)
-Rational x = { 4, 11 };  // (3)
-
-void g(double);
-
-x += 1;                  // chooses (1)
-g(x);                    // chooses (2)
-Rational y = x;          // chooses (3)
-\end{cfacode}
-In this example, there are three definitions of the variable @x@.
-Based on the context, \CFA attempts to choose the variable whose type best matches the expression context.
-When used judiciously, this feature allows names like @MAX@, @MIN@, and @PI@ to apply across many types.
-
-Finally, the values @0@ and @1@ have special status in standard C.
-In particular, the value @0@ is both an integer and a pointer literal, and thus its meaning depends on the context.
-In addition, several operations can be redefined in terms of other operations and the values @0@ and @1@.
-For example,
-\begin{cfacode}
-int x;
-if (x) {  // if (x != 0)
-  x++;    //   x += 1;
-}
-\end{cfacode}
-Every if- and iteration-statement in C compares the condition with @0@, and every increment and decrement operator is semantically equivalent to adding or subtracting the value @1@ and storing the result.
-Due to these rewrite rules, the values @0@ and @1@ have the types \zero and \one in \CFA, which allow for overloading various operations that connect to @0@ and @1@ \footnote{In the original design of \CFA, @0@ and @1@ were overloadable names \cite[p.~7]{cforall}.}.
-The types \zero and \one have special built-in implicit conversions to the various integral types, and a conversion to pointer types for @0@, which allows standard C code involving @0@ and @1@ to work as normal.
-\begin{cfacode}
-// lvalue is similar to returning a reference in C++
-lvalue Rational ?+=?(Rational *a, Rational b);
-Rational ?=?(Rational * dst, zero_t) {
-  return *dst = (Rational){ 0, 1 };
-}
-
-Rational sum(Rational *arr, int n) {
-  Rational r;
-  r = 0;     // use rational-zero_t assignment
-  for (; n > 0; n--) {
-    r += arr[n-1];
-  }
-  return r;
-}
-\end{cfacode}
-This function takes an array of @Rational@ objects and produces the @Rational@ representing the sum of the array.
-Note the use of an overloaded assignment operator to set an object of type @Rational@ to an appropriate @0@ value.
-
-\subsection{Polymorphism}
-\label{sub:polymorphism}
-In its most basic form, polymorphism grants the ability to write a single block of code that accepts different types.
-In particular, \CFA supports the notion of parametric polymorphism.
-Parametric polymorphism allows a function to be written generically, for all values of all types, without regard to the specifics of a particular type.
-For example, in \CC, the simple identity function for all types can be written as:
-\begin{cppcode}
-template<typename T>
-T identity(T x) { return x; }
-\end{cppcode}
-\CC uses the template mechanism to support parametric polymorphism. In \CFA, an equivalent function can be written as:
-\begin{cfacode}
-forall(otype T)
-T identity(T x) { return x; }
-\end{cfacode}
-Once again, the only visible difference in this example is syntactic.
-Fundamental differences can be seen by examining more interesting examples.
-In \CC, a generic sum function is written as follows:
-\begin{cppcode}
-template<typename T>
-T sum(T *arr, int n) {
-  T t;  // default construct => 0
-  for (; n > 0; n--) t += arr[n-1];
-  return t;
-}
-\end{cppcode}
-Here, the code assumes the existence of a default constructor, assignment operator, and an addition operator over the provided type @T@.
-If any of these required operators are not available, the \CC compiler produces an error message stating which operators could not be found.
-
-A similar sum function can be written in \CFA as follows:
-\begin{cfacode}
-forall(otype T | **R**{ T ?=?(T *, zero_t); T ?+=?(T *, T); }**R**)
-T sum(T *arr, int n) {
-  T t = 0;
-  for (; n > 0; n--) t = t += arr[n-1];
-  return t;
-}
-\end{cfacode}
-The first thing to note here is that immediately following the declaration of @otype T@ is a list of \emph{type assertions} that specify restrictions on acceptable choices of @T@.
-In particular, the assertions above specify that there must be an assignment from \zero to @T@ and an addition assignment operator from @T@ to @T@.
-The existence of an assignment operator from @T@ to @T@ and the ability to create an object of type @T@ are assumed implicitly by declaring @T@ with the @otype@ type-class.
-In addition to @otype@, there are currently two other type-classes.
-
-@dtype@, short for \emph{data type}, serves as the top type for object types; any object type, complete or incomplete, can be bound to a @dtype@ type variable.
-To contrast, @otype@, short for \emph{object type}, is a @dtype@ with known size, alignment, and an assignment operator, and thus bind only to complete object types.
-With this extra information, complete objects can be used in polymorphic code in the same way they are used in monomorphic code, providing familiarity and ease of use.
-The third type-class is @ftype@, short for \emph{function type}, matching only function types.
-The three type parameter kinds are summarized in \autoref{table:types}
-
-\begin{table}[h!]
-  \begin{center}
-    \begin{tabular}{|c||c|c|c||c|c|c|}
-                                                                                                    \hline
-    name    & object type & incomplete type & function type & can assign & can create & has size \\ \hline
-    @otype@ & X           &                 &               & X                & X          & X        \\ \hline
-    @dtype@ & X           & X               &               &                  &            &          \\ \hline
-    @ftype@ &             &                 & X             &                  &            &          \\ \hline
-    \end{tabular}
-  \end{center}
-  \caption{\label{table:types} The different kinds of type parameters in \protect\CFA}
-\end{table}
-
-A major difference between the approaches of \CC and \CFA to polymorphism is that the set of assumed properties for a type is \emph{explicit} in \CFA.
-One of the major limiting factors of \CC's approach is that templates cannot be separately compiled.
-In contrast, the explicit nature of assertions allows \CFA's polymorphic functions to be separately compiled, as the function prototype states all necessary requirements separate from the implementation.
-For example, the prototype for the previous sum function is
-\begin{cfacode}
-forall(otype T | **R**{ T ?=?(T *, zero_t); T ?+=?(T *, T); }**R**)
-T sum(T *arr, int n);
-\end{cfacode}
-With this prototype, a caller in another translation unit knows all of the constraints on @T@, and thus knows all of the operations that need to be made available to @sum@.
-
-In \CFA, a set of assertions can be factored into a \emph{trait}.
-\begin{cfacode}
-trait Addable(otype T) {
-  T ?+?(T, T);
-  T ++?(T);
-  T ?++(T);
-}
-forall(otype T | Addable(T)) void f(T);
-forall(otype T | Addable(T) | { T --?(T); }) T g(T);
-forall(otype T, U | Addable(T) | { T ?/?(T, U); }) U h(T, U);
-\end{cfacode}
-This capability allows specifying the same set of assertions in multiple locations, without the repetition and likelihood of mistakes that come with manually writing them out for each function declaration.
-
-An interesting application of return-type resolution and polymorphism is a polymorphic version of @malloc@.
-\begin{cfacode}
-forall(dtype T | sized(T))
-T * malloc() {
-  return (T*)malloc(sizeof(T)); // call C malloc
-}
-int * x = malloc();     // malloc(sizeof(int))
-double * y = malloc();  // malloc(sizeof(double))
-
-struct S { ... };
-S * s = malloc();       // malloc(sizeof(S))
-\end{cfacode}
-The built-in trait @sized@ ensures that size and alignment information for @T@ is available in the body of @malloc@ through @sizeof@ and @_Alignof@ expressions respectively.
-In calls to @malloc@, the type @T@ is bound based on call-site information, allowing \CFA code to allocate memory without the potential for errors introduced by manually specifying the size of the allocated block.
-
-\subsection{Planned Features}
-
-One of the planned features \CFA is \emph{reference types}.
-At a high level, the current proposal is to add references as a way to cleanup pointer syntax.
-With references, it will be possible to store any address, as with a pointer, with the key difference being that references are automatically dereferenced.
-\begin{cfacode}
-int x = 0;
-int * p = &x;  // needs &
-int & ref = x; // no &
-
-printf("%d %d\n", *p, ref); // pointer needs *, ref does not
-\end{cfacode}
-
-It is possible to add new functions or shadow existing functions for the duration of a scope, using normal C scoping rules.
-One application of this feature is to reverse the order of @qsort@.
-\begin{cfacode}
-forall(otype T | { int ?<?( T, T ); })
-void qsort(const T * arr, size_t size) {
-  int comp(const void * t1, const void * t2) {
-    return *(T *)t1 < *(T *)t2 ? -1 : *(T *)t2 < *(T *)t1 ? 1 : 0;
-  }
-  qsort(arr, dimension, sizeof(T), comp);
-
-}
-double vals[10] = { ... };
-qsort(vals, 10);                // ascending order
-{
-  int ?<?(double x, double y) { // locally override behaviour
-    return x > y;
-  }
-  qsort(vals, 10);              // descending sort
-}
-\end{cfacode}
-Currently, there is no way to \emph{remove} a function from consideration from the duration of a scope.
-For example, it may be desirable to eliminate assignment from a scope, to reduce accidental mutation.
-To address this desire, \emph{deleted functions} are a planned feature for \CFA.
-\begin{cfacode}
-forall(otype T) void f(T *);
-
-int x = 0;
-f(&x);  // might modify x
-{
-  int ?=?(int *, int) = delete;
-  f(&x);   // error, no assignment for int
-}
-\end{cfacode}
-Now, if the deleted function is chosen as the best match, the expression resolver emits an error.
-
-\section{Invariants}
-An \emph{invariant} is a logical assertion that is true for some duration of a program's execution.
-Invariants help a programmer to reason about code correctness and prove properties of programs.
-
-\begin{sloppypar}
-In object-oriented programming languages, type invariants are typically established in a constructor and maintained throughout the object's lifetime.
-These assertions are typically achieved through a combination of access-control modifiers and a restricted interface.
-Typically, data which requires the maintenance of an invariant is hidden from external sources using the \emph{private} modifier, which restricts reads and writes to a select set of trusted routines, including member functions.
-It is these trusted routines that perform all modifications to internal data in a way that is consistent with the invariant, by ensuring that the invariant holds true at the end of the routine call.
-\end{sloppypar}
-
-In C, the @assert@ macro is often used to ensure invariants are true.
-Using @assert@, the programmer can check a condition and abort execution if the condition is not true.
-This powerful tool forces the programmer to deal with logical inconsistencies as they occur.
-For production, assertions can be removed by simply defining the preprocessor macro @NDEBUG@, making it simple to ensure that assertions are 0-cost for a performance intensive application.
-\begin{cfacode}
-struct Rational {
-  int n, d;
-};
-struct Rational create_rational(int n, int d) {
-  assert(d != 0);  // precondition
-  if (d < 0) {
-    n *= -1;
-    d *= -1;
-  }
-  assert(d > 0);  // postcondition
-  // rational invariant: d > 0
-  return (struct Rational) { n, d };
-}
-struct Rational rat_abs(struct Rational r) {
-  assert(r.d > 0); // check invariant, since no access control
-  r.n = abs(r.n);
-  assert(r.d > 0); // ensure function preserves invariant on return value
-  return r;
-}
-\end{cfacode}
-
-Some languages, such as D, provide language-level support for specifying program invariants.
-In addition to providing a C-like @assert@ expression, D allows specifying type invariants that are automatically checked at the end of a constructor, beginning of a destructor, and at the beginning and end of every public member function.
-\begin{dcode}
-import std.math;
-struct Rational {
-  invariant {
-    assert(d > 0, "d <= 0");
-  }
-  int n, d;
-  this(int n, int d) {  // constructor
-    assert(d != 0);
-    this.n = n;
-    this.d = d;
-    // implicitly check invariant
-  }
-  Rational abs() {
-    // implicitly check invariant
-    return Rational(std.math.abs(n), d);
-    // implicitly check invariant
-  }
-}
-\end{dcode}
-The D compiler is able to assume that assertions and invariants hold true and perform optimizations based on those assumptions.
-Note, these invariants are internal to the type's correct behaviour.
-
-Types also have external invariants with the state of the execution environment, including the heap, the open-file table, the state of global variables, etc.
-Since resources are finite and shared (concurrency), it is important to ensure that objects clean up properly when they are finished, restoring the execution environment to a stable state so that new objects can reuse resources.
-
-\section{Resource Management}
-\label{s:ResMgmt}
-
-Resource management is a problem that pervades every programming language.
-
-In standard C, resource management is largely a manual effort on the part of the programmer, with a notable exception to this rule being the program stack.
-The program stack grows and shrinks automatically with each function call, as needed for local variables.
-However, whenever a program needs a variable to outlive the block it is created in, the storage must be allocated dynamically with @malloc@ and later released with @free@.
-This pattern is extended to more complex objects, such as files and sockets, which can also outlive the block where they are created, and thus require their own resource management.
-Once allocated storage escapes\footnote{In garbage collected languages, such as Java, escape analysis \cite{Choi:1999:EAJ:320385.320386} is used to determine when dynamically allocated objects are strictly contained within a function, which allows the optimizer to allocate them on the stack.} a block, the responsibility for deallocating the storage is not specified in a function's type, that is, that the return value is owned by the caller.
-This implicit convention is provided only through documentation about the expectations of functions.
-
-In other languages, a hybrid situation exists where resources escape the allocation block, but ownership is precisely controlled by the language.
-This pattern requires a strict interface and protocol for a data structure, consisting of a pre-initialization and a post-termination call, and all intervening access is done via interface routines.
-This kind of encapsulation is popular in object-oriented programming languages, and like the stack, it takes care of a significant portion of resource-management cases.
-
-For example, \CC directly supports this pattern through class types and an idiom known as RAII \footnote{Resource Acquisition is Initialization} by means of constructors and destructors.
-Constructors and destructors are special routines that are automatically inserted into the appropriate locations to bookend the lifetime of an object.
-Constructors allow the designer of a type to establish invariants for objects of that type, since it is guaranteed that every object must be initialized through a constructor.
-In particular, constructors allow a programmer to ensure that all objects are initially set to a valid state.
-On the other hand, destructors provide a simple mechanism for tearing down an object and resetting the environment in which the object lived.
-RAII ensures that if all resources are acquired in a constructor and released in a destructor, there are no resource leaks, even in exceptional circumstances.
-A type with at least one non-trivial constructor or destructor is henceforth referred to as a \emph{managed type}.
-In the context of \CFA, a non-trivial constructor is either a user defined constructor or an auto-generated constructor that calls a non-trivial constructor.
-
-For the remaining resource ownership cases, a programmer must follow a brittle, explicit protocol for freeing resources or an implicit protocol enforced by the programming language.
-
-In garbage collected languages, such as Java, resources are largely managed by the garbage collector.
-Still, garbage collectors typically focus only on memory management.
-There are many kinds of resources that the garbage collector does not understand, such as sockets, open files, and database connections.
-In particular, Java supports \emph{finalizers}, which are similar to destructors.
-Unfortunately, finalizers are only guaranteed to be called before an object is reclaimed by the garbage collector \cite[p.~373]{Java8}, which may not happen if memory use is not contentious.
-Due to operating-system resource-limits, this is unacceptable for many long running programs.
-Instead, the paradigm in Java requires programmers to manually keep track of all resources \emph{except} memory, leading many novices and experts alike to forget to close files, etc.
-Complicating the picture, uncaught exceptions can cause control flow to change dramatically, leaking a resource that appears on first glance to be released.
-\begin{javacode}
-void write(String filename, String msg) throws Exception {
-  FileOutputStream out = new FileOutputStream(filename);
-  FileOutputStream log = new FileOutputStream(filename);
-  out.write(msg.getBytes());
-  log.write(msg.getBytes());
-  log.close();
-  out.close();
-}
-\end{javacode}
-Any line in this program can throw an exception, which leads to a profusion of finally blocks around many function bodies, since it is not always clear when an exception may be thrown.
-\begin{javacode}
-public void write(String filename, String msg) throws Exception {
-  FileOutputStream out = new FileOutputStream(filename);
-  try {
-    FileOutputStream log = new FileOutputStream("log.txt");
-    try {
-      out.write(msg.getBytes());
-      log.write(msg.getBytes());
-    } finally {
-      log.close();
-    }
-  } finally {
-    out.close();
-  }
-}
-\end{javacode}
-In Java 7, a new \emph{try-with-resources} construct was added to alleviate most of the pain of working with resources, but ultimately it still places the burden squarely on the user rather than on the library designer.
-Furthermore, for complete safety this pattern requires nested objects to be declared separately, otherwise resources that can throw an exception on close can leak nested resources \footnote{Since close is only guaranteed to be called on objects declared in the try-list and not objects passed as constructor parameters, the @B@ object may not be closed in @new A(new B())@ if @A@'s close raises an exception.} \cite{TryWithResources}.
-\begin{javacode}
-public void write(String filename, String msg) throws Exception {
-  try (  // try-with-resources
-    FileOutputStream out = new FileOutputStream(filename);
-    FileOutputStream log = new FileOutputStream("log.txt");
-  ) {
-    out.write(msg.getBytes());
-    log.write(msg.getBytes());
-  } // automatically closes out and log in every exceptional situation
-}
-\end{javacode}
-Variables declared as part of a try-with-resources statement must conform to the @AutoClosable@ interface, and the compiler implicitly calls @close@ on each of the variables at the end of the block.
-Depending on when the exception is raised, both @out@ and @log@ are null, @log@ is null, or both are non-null, therefore, the cleanup for these variables at the end is automatically guarded and conditionally executed to prevent null-pointer exceptions.
-
-While Rust \cite{Rust} does not enforce the use of a garbage collector, it does provide a manual memory management environment, with a strict ownership model that automatically frees allocated memory and prevents common memory management errors.
-In particular, a variable has ownership over its associated value, which is freed automatically when the owner goes out of scope.
-Furthermore, values are \emph{moved} by default on assignment, rather than copied, which invalidates the previous variable binding.
-\begin{rustcode}
-struct S {
-  x: i32
-}
-let s = S { x: 123 };
-let z = s;           // move, invalidate s
-println!("{}", s.x); // error, s has been moved
-\end{rustcode}
-Types can be made copyable by implementing the @Copy@ trait.
-
-Rust allows multiple unowned views into an object through references, also known as borrows, provided that a reference does not outlive its referent.
-A mutable reference is allowed only if it is the only reference to its referent, preventing data race errors and iterator invalidation errors.
-\begin{rustcode}
-let mut x = 10;
-{
-  let y = &x;
-  let z = &x;
-  println!("{} {}", y, z); // prints 10 10
-}
-{
-  let y = &mut x;
-  // let z1 = &x;     // not allowed, have mutable reference
-  // let z2 = &mut x; // not allowed, have mutable reference
-  *y = 5;
-  println!("{}", y); // prints 5
-}
-println!("{}", x); // prints 5
-\end{rustcode}
-Since references are not owned, they do not release resources when they go out of scope.
-There is no runtime cost imposed on these restrictions, since they are enforced at compile-time.
-
-Rust provides RAII through the @Drop@ trait, allowing arbitrary code to execute when the object goes out of scope, providing automatic clean up of auxiliary resources, much like a \CC program.
-\begin{rustcode}
-struct S {
-  name: &'static str
-}
-
-impl Drop for S {  // RAII for S
-  fn drop(&mut self) {  // destructor
-    println!("dropped {}", self.name);
-  }
-}
-
-{
-  let x = S { name: "x" };
-  let y = S { name: "y" };
-} // prints "dropped y" "dropped x"
-\end{rustcode}
-
-% D has constructors and destructors that are worth a mention (under classes) https://dlang.org/spec/spec.html
-%  also https://dlang.org/spec/struct.html#struct-constructor
-% these are declared in the struct, so they're closer to C++ than to CFA, at least syntactically. Also do not allow for default constructors
-% D has a GC, which already makes the situation quite different from C/C++
-The programming language D also manages resources with constructors and destructors \cite{D}.
-In D, @struct@s are stack allocatable and managed via scoping like in \CC, whereas @class@es are managed automatically by the garbage collector.
-Like Java, using the garbage collector means that destructors are called indeterminately, requiring the use of finally statements to ensure dynamically allocated resources that are not managed by the garbage collector, such as open files, are cleaned up.
-Since D supports RAII, it is possible to use the same techniques as in \CC to ensure that resources are released in a timely manner.
-Finally, D provides a scope guard statement, which allows an arbitrary statement to be executed at normal scope exit with \emph{success}, at exceptional scope exit with \emph{failure}, or at normal and exceptional scope exit with \emph{exit}. % https://dlang.org/spec/statement.html#ScopeGuardStatement
-It has been shown that the \emph{exit} form of the scope guard statement can be implemented in a library in \CC \cite{ExceptSafe}.
-
-To provide managed types in \CFA, new kinds of constructors and destructors are added to \CFA and discussed in Chapter 2.
-
-\section{Tuples}
-\label{s:Tuples}
-In mathematics, tuples are finite-length sequences which, unlike sets, are ordered and allow duplicate elements.
-In programming languages, tuples provide fixed-sized heterogeneous lists of elements.
-Many programming languages have tuple constructs, such as SETL, \KWC, ML, and Scala.
-
-\KWC, a predecessor of \CFA, introduced tuples to C as an extension of the C syntax, rather than as a full-blown data type \cite{Till89}.
-In particular, Till noted that C already contains a tuple context in the form of function parameter lists.
-The main contributions of that work were in the form of adding tuple contexts to assignment in the form of multiple assignment and mass assignment (discussed in detail in section \ref{s:TupleAssignment}), function return values (see section \ref{s:MRV_Functions}), and record field access (see section \ref{s:MemberAccessTuple}).
-Adding tuples to \CFA has previously been explored by Esteves \cite{Esteves04}.
-
-The design of tuples in \KWC took much of its inspiration from SETL \cite{SETL}.
-SETL is a high-level mathematical programming language, with tuples being one of the primary data types.
-Tuples in SETL allow a number of operations, including subscripting, dynamic expansion, and multiple assignment.
-
-\CCeleven introduced @std::tuple@ as a library variadic template struct.
-Tuples are a generalization of @std::pair@, in that they allow for arbitrary length, fixed-size aggregation of heterogeneous values.
-\begin{cppcode}
-tuple<int, int, int> triple(10, 20, 30);
-get<1>(triple); // access component 1 => 20
-
-tuple<int, double> f();
-int i;
-double d;
-tie(i, d) = f(); // assign fields of return value into local variables
-
-tuple<int, int, int> greater(11, 0, 0);
-triple < greater; // true
-\end{cppcode}
-Tuples are simple data structures with few specific operations.
-In particular, it is possible to access a component of a tuple using @std::get<N>@.
-Another interesting feature is @std::tie@, which creates a tuple of references, allowing assignment of the results of a tuple-returning function into separate local variables, without requiring a temporary variable.
-Tuples also support lexicographic comparisons, making it simple to write aggregate comparators using @std::tie@.
-
-There is a proposal for \CCseventeen called \emph{structured bindings} \cite{StructuredBindings}, that introduces new syntax to eliminate the need to pre-declare variables and use @std::tie@ for binding the results from a function call.
-\begin{cppcode}
-tuple<int, double> f();
-auto [i, d] = f(); // unpacks into new variables i, d
-
-tuple<int, int, int> triple(10, 20, 30);
-auto & [t1, t2, t3] = triple;
-t2 = 0; // changes middle element of triple
-
-struct S { int x; double y; };
-S s = { 10, 22.5 };
-auto [x, y] = s; // unpack s
-\end{cppcode}
-Structured bindings allow unpacking any structure with all public non-static data members into fresh local variables.
-The use of @&@ allows declaring new variables as references, which is something that cannot be done with @std::tie@, since \CC references do not support rebinding.
-This extension requires the use of @auto@ to infer the types of the new variables, so complicated expressions with a non-obvious type must be documented with some other mechanism.
-Furthermore, structured bindings are not a full replacement for @std::tie@, as it always declares new variables.
-
-Like \CC, D provides tuples through a library variadic-template structure.
-In D, it is possible to name the fields of a tuple type, which creates a distinct type.
-% http://dlang.org/phobos/std_typecons.html
-\begin{dcode}
-Tuple!(float, "x", float, "y") point2D;
-Tuple!(float, float) float2;  // different type from point2D
-
-point2D[0]; // access first element
-point2D.x;  // access first element
-
-float f(float x, float y) {
-  return x+y;
-}
-
-f(point2D.expand);
-\end{dcode}
-Tuples are 0-indexed and can be subscripted using an integer or field name, if applicable.
-The @expand@ method produces the components of the tuple as a list of separate values, making it possible to call a function that takes $N$ arguments using a tuple with $N$ components.
-
-Tuples are a fundamental abstraction in most functional programming languages, such as Standard ML \cite{sml}.
-A function in SML always accepts exactly one argument.
-There are two ways to mimic multiple argument functions: the first through currying and the second by accepting tuple arguments.
-\begin{smlcode}
-fun fact (n : int) =
-  if (n = 0) then 1
-  else n*fact(n-1)
-
-fun binco (n: int, k: int) =
-  real (fact n) / real (fact k * fact (n-k))
-\end{smlcode}
-Here, the function @binco@ appears to take 2 arguments, but it actually takes a single argument which is implicitly decomposed via pattern matching.
-Tuples are a foundational tool in SML, allowing the creation of arbitrarily-complex structured data-types.
-
-Scala, like \CC, provides tuple types through the standard library \cite{Scala}.
-Scala provides tuples of size 1 through 22 inclusive through generic data structures.
-Tuples support named access and subscript access, among a few other operations.
-\begin{scalacode}
-val a = new Tuple3(0, "Text", 2.1) // explicit creation
-val b = (6, 'a', 1.1f)       // syntactic sugar: Tuple3[Int, Char, Float]
-val (i, _, d) = triple       // extractor syntax, ignore middle element
-
-println(a._2)                // named access => print "Text"
-println(b.productElement(0)) // subscript access => print 6
-\end{scalacode}
-In Scala, tuples are primarily used as simple data structures for carrying around multiple values or for returning multiple values from a function.
-The 22-element restriction is an odd and arbitrary choice, but in practice it does not cause problems since large tuples are uncommon.
-Subscript access is provided through the @productElement@ method, which returns a value of the top-type @Any@, since it is impossible to receive a more precise type from a general subscripting method due to type erasure.
-The disparity between named access beginning at @_1@ and subscript access starting at @0@ is likewise an oddity, but subscript access is typically avoided since it discards type information.
-Due to the language's pattern matching facilities, it is possible to extract the values from a tuple into named variables, which is a more idiomatic way of accessing the components of a tuple.
-
-
-\Csharp also has tuples, but has similarly strange limitations, allowing tuples of size up to 7 components. % https://msdn.microsoft.com/en-us/library/system.tuple(v=vs.110).aspx
-The officially supported workaround for this shortcoming is to nest tuples in the 8th component.
-\Csharp allows accessing a component of a tuple by using the field @Item$N$@ for components 1 through 7, and @Rest@ for the nested tuple.
-
-In Python \cite{Python}, tuples are immutable sequences that provide packing and unpacking operations.
-While the tuple itself is immutable, and thus does not allow the assignment of components, there is nothing preventing a component from being internally mutable.
-The components of a tuple can be accessed by unpacking into multiple variables, indexing, or via field name, like D.
-Tuples support multiple assignment through a combination of packing and unpacking, in addition to the common sequence operations.
-
-Swift \cite{Swift}, like D, provides named tuples, with components accessed by name, index, or via extractors.
-Tuples are primarily used for returning multiple values from a function.
-In Swift, @Void@ is an alias for the empty tuple, and there are no single element tuples.
-
-Tuples comparable to those described above are added to \CFA and discussed in Chapter 3.
-
-\section{Variadic Functions}
-\label{sec:variadic_functions}
-In statically-typed programming languages, functions are typically defined to receive a fixed number of arguments of specified types.
-Variadic argument functions provide the ability to define a function that can receive a theoretically unbounded number of arguments.
-
-C provides a simple implementation of variadic functions.
-A function whose parameter list ends with @, ...@ is a variadic function.
-Among the most common variadic functions is @printf@.
-\begin{cfacode}
-int printf(const char * fmt, ...);
-printf("%d %g %c %s", 10, 3.5, 'X', "a string");
-\end{cfacode}
-Through the use of a format string, C programmers can communicate argument type information to @printf@, allowing C programmers to print any of the standard C data types.
-Still, @printf@ is extremely limited, since the format codes are specified by the C standard, meaning users cannot define their own format codes to extend @printf@ for new data types or new formatting rules.
-
-\begin{sloppypar}
-C provides manipulation of variadic arguments through the @va_list@ data type, which abstracts details of the manipulation of variadic arguments.
-Since the variadic arguments are untyped, it is up to the function to interpret any data that is passed in.
-Additionally, the interface to manipulate @va_list@ objects is essentially limited to advancing to the next argument, without any built-in facility to determine when the last argument is read.
-This limitation requires the use of an \emph{argument descriptor} to pass information to the function about the structure of the argument list, including the number of arguments and their types.
-The format string in @printf@ is one such example of an argument descriptor.
-\begin{cfacode}
-int f(const char * fmt, ...) {
-  va_list args;
-  va_start(args, fmt);  // initialize va_list
-  for (const char * c = fmt; *c != '\0'; ++c) {
-    if (*c == '%') {
-      ++c;
-      switch (*c) {
-        case 'd': {
-          int i = va_arg(args, int);  // have to specify type
-          // ...
-          break;
-        }
-        case 'g': {
-          double d = va_arg(args, double);
-          // ...
-          break;
-        }
-        ...
-      }
-    }
-  }
-  va_end(args);
-  return ...;
-}
-\end{cfacode}
-Every case must be handled explicitly, since the @va_arg@ macro requires a type argument to determine how the next set of bytes is to be interpreted.
-Furthermore, if the user makes a mistake, compile-time checking is typically restricted to standard format codes and their corresponding types.
-In general, this means that C's variadic functions are not type-safe, making them difficult to use properly.
-\end{sloppypar}
-
-% When arguments are passed to a variadic function, they undergo \emph{default argument promotions}.
-% Specifically, this means that
-
-\CCeleven added support for \emph{variadic templates}, which add much needed type-safety to C's variadic landscape.
-It is possible to use variadic templates to define variadic functions and variadic data types.
-\begin{cppcode}
-void print(int);
-void print(char);
-void print(double);
-...
-
-void f() {}    // base case
-
-template<typename T, typename... Args>
-void f(const T & arg, const Args &... rest) {
-  print(arg);  // print the current element
-  f(rest...);  // handle remaining arguments recursively
-}
-\end{cppcode}
-Variadic templates work largely through recursion on the \emph{parameter pack}, which is the argument with @...@ following its type.
-A parameter pack matches 0 or more elements, which can be types or expressions depending on the context.
-Like other templates, variadic template functions rely on an implicit set of constraints on a type, in this example a @print@ routine.
-That is, it is possible to use the @f@ routine on any type provided there is a corresponding @print@ routine, making variadic templates fully open to extension, unlike variadic functions in C.
-
-Recent \CC standards (\CCfourteen, \CCseventeen) expand on the basic premise by allowing variadic template variables and providing convenient expansion syntax to remove the need for recursion in some cases, amongst other things.
-
-% D has variadic templates that deserve a mention http://dlang.org/ctarguments.html
-
-In Java, a variadic function appears similar to a C variadic function in syntax.
-\begin{javacode}
-int sum(int... args) {
-  int s = 0;
-  for (int x : args) {
-    s += x;
-  }
-  return s;
-}
-
-void print(Object... objs) {
-  for (Object obj : objs) {
-    System.out.print(obj);
-  }
-}
-
-print("The sum from 1 to 10 is ", sum(1,2,3,4,5,6,7,8,9,10), ".\n");
-\end{javacode}
-The key difference is that Java variadic functions are type-safe, because they specify the type of the argument immediately prior to the ellipsis.
-In Java, variadic arguments are syntactic sugar for arrays, allowing access to length, subscripting operations, and for-each iteration on the variadic arguments, among other things.
-Since the argument type is specified explicitly, the top-type @Object@ can be used to accept arguments of any type, but to do anything interesting on the argument requires a down-cast to a more specific type, landing Java in a similar situation to C in that writing a function open to extension is difficult.
-
-The other option is to restrict the number of types that can be passed to the function by using a more specific type.
-Unfortunately, Java's use of nominal inheritance means that types must explicitly inherit from classes or interfaces in order to be considered a subclass.
-The combination of these two issues greatly restricts the usefulness of variadic functions in Java.
-
-Type-safe variadic functions are added to \CFA and discussed in Chapter 4.
-
-\section{Contributions}
-\label{s:contributions}
-
-No prior work on constructors or destructors had been done for \CFA.
-I did both the design and implementation work.
-While the overall design is based on constructors and destructors in object-oriented C++, it had to be re-engineered into non-object-oriented \CFA.
-I also had to make changes to the \CFA expression-resolver to integrate constructors and destructors into the type system.
-
-Prior work on the design of tuples for \CFA was done by Till, and some initial implementation work by Esteves.
-I largely took the Till design but added tuple indexing, which exists in a number of programming languages with tuples, simplified the implicit tuple conversions, and integrated with the \CFA polymorphism and assertion satisfaction model.
-I did a new implementation of tuples, and extensively
-augmented initial work by Bilson to incorporate tuples into the \CFA expression-resolver and type-unifier.
-
-No prior work on variadic functions had been done for \CFA.
-I did both the design and implementation work.
-While the overall design is based on variadic templates in C++, my design is novel in the way it is incorporated into the \CFA polymorphism model, and is engineered into \CFA so it dovetails with tuples.
Index: doc/theses/rob/thesis-frontpgs.tex
===================================================================
--- doc/theses/rob/thesis-frontpgs.tex	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,163 +1,0 @@
-% T I T L E   P A G E
-% -------------------
-% Last updated May 24, 2011, by Stephen Carr, IST-Client Services
-% The title page is counted as page `i' but we need to suppress the
-% page number.  We also don't want any headers or footers.
-\pagestyle{empty}
-\pagenumbering{roman}
-
-% The contents of the title page are specified in the "titlepage"
-% environment.
-\begin{titlepage}
-        \begin{center}
-        \vspace*{1.0cm}
-
-        \Huge
-        {\bf Resource Management and Tuples in \CFA}
-
-        \vspace*{1.0cm}
-
-        \normalsize
-        by \\
-
-        \vspace*{1.0cm}
-
-        \Large
-        Robert Schluntz \\
-
-        \vspace*{3.0cm}
-
-        \normalsize
-        A thesis \\
-        presented to the University of Waterloo \\
-        in fulfillment of the \\
-        thesis requirement for the degree of \\
-        Master of Mathematics \\
-        in \\
-        Computer Science \\
-
-        \vspace*{2.0cm}
-
-        Waterloo, Ontario, Canada, 2017 \\
-
-        \vspace*{1.0cm}
-
-        \copyright\ Robert Schluntz 2017 \\
-        \end{center}
-\end{titlepage}
-
-% The rest of the front pages should contain no headers and be numbered using Roman numerals starting with `ii'
-\pagestyle{plain}
-\setcounter{page}{2}
-
-\cleardoublepage % Ends the current page and causes all figures and tables that have so far appeared in the input to be printed.
-% In a two-sided printing style, it also makes the next page a right-hand (odd-numbered) page, producing a blank page if necessary.
-
-
-
-% D E C L A R A T I O N   P A G E
-% -------------------------------
-  % The following is the sample Delaration Page as provided by the GSO
-  % December 13th, 2006.  It is designed for an electronic thesis.
-  \noindent
-I hereby declare that I am the sole author of this thesis. This is a true copy of the thesis, including any required final revisions, as accepted by my examiners.
-
-  \bigskip
-
-  \noindent
-I understand that my thesis may be made electronically available to the public.
-
-\cleardoublepage
-%\newpage
-
-% A B S T R A C T
-% ---------------
-
-\begin{center}\textbf{Abstract}\end{center}
-
-\CFA is a modern, non-object-oriented extension of the C programming language.
-This thesis addresses several critical deficiencies of C, notably: resource management, a limited function-return mechanism, and unsafe variadic functions.
-To solve these problems, two fundamental language features are introduced: tuples and constructors/destructors.
-While these features exist in prior programming languages, the contribution of this work is engineering these features into a highly complex type system.
-C is an established language with a dedicated user-base.
-An important goal is to add new features in a way that naturally feels like C, to appeal to this core user-base, and due to huge amounts of legacy code, maintaining backwards compatibility is crucial.
-
-\cleardoublepage
-%\newpage
-
-% A C K N O W L E D G E M E N T S
-% -------------------------------
-
-\begin{center}\textbf{Acknowledgements}\end{center}
-
-I would like to thank my supervisor, Professor Peter Buhr, for all of his help, including reading the many drafts of this thesis and providing guidance throughout my degree.
-This work would not have been as enjoyable, nor would it have been as strong without Peter's knowledge, help, and encouragement.
-
-I would like to thank my readers, Professors Gregor Richards and Patrick Lam for all of their helpful feedback.
-
-Thanks to Aaron Moss and Thierry Delisle for many helpful discussions, both work-related and not, and for all of the work they have put into the \CFA project.
-This thesis would not have been the same without their efforts.
-
-I thank Glen Ditchfield and Richard Bilson, for all of their help with both the design and implementation of \CFA.
-
-I thank my partner, Erin Blackmere, for all of her love and support.
-Without her, I would not be who I am today.
-
-Thanks to my parents, Bob and Jackie Schluntz, for their love and support throughout my life, and for always encouraging me to be my best.
-
-Thanks to my best friends, Travis Bartlett, Abraham Dubrisingh, and Kevin Wu, whose companionship is always appreciated.
-The time we've spent together over the past 4 years has always kept me entertained.
-An extra shout-out to Kaleb Alway, Max Bardakov, Ten Bradley, and Ed Lee, with whom I've shared many a great meal; thank you for being my friend.
-
-Finally, I would like to acknowledge financial support in the form of a David R. Cheriton Graduate Scholarship and a corporate partnership with Huawei Ltd.
-
-\cleardoublepage
-%\newpage
-
-% % D E D I C A T I O N
-% % -------------------
-
-% \begin{center}\textbf{Dedication}\end{center}
-
-% % This is dedicated to the one I love.
-% TODO
-% \cleardoublepage
-% %\newpage
-
-% T A B L E   O F   C O N T E N T S
-% ---------------------------------
-\renewcommand\contentsname{Table of Contents}
-\tableofcontents
-\cleardoublepage
-\phantomsection
-%\newpage
-
-% L I S T   O F   T A B L E S
-% ---------------------------
-\addcontentsline{toc}{chapter}{List of Tables}
-\listoftables
-\cleardoublepage
-\phantomsection		% allows hyperref to link to the correct page
-%\newpage
-
-% % L I S T   O F   F I G U R E S
-% % -----------------------------
-% \addcontentsline{toc}{chapter}{List of Figures}
-% \listoffigures
-% \cleardoublepage
-% \phantomsection		% allows hyperref to link to the correct page
-% %\newpage
-
-% L I S T   O F   S Y M B O L S
-% -----------------------------
-% To include a Nomenclature section
-% \addcontentsline{toc}{chapter}{\textbf{Nomenclature}}
-% \renewcommand{\nomname}{Nomenclature}
-% \printglossary
-% \cleardoublepage
-% \phantomsection % allows hyperref to link to the correct page
-% \newpage
-
-% Change page numbering back to Arabic numerals
-\pagenumbering{arabic}
-
Index: doc/theses/rob/thesis.bib
===================================================================
--- doc/theses/rob/thesis.bib	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,67 +1,0 @@
-@article{Choi:1999:EAJ:320385.320386,
-  author = {Choi, Jong-Deok and Gupta, Manish and Serrano, Mauricio and Sreedhar, Vugranam C. and Midkiff, Sam},
-  title = {Escape Analysis for Java},
-  journal = {SIGPLAN Not.},
-  issue_date = {Oct. 1999},
-  volume = {34},
-  number = {10},
-  month = oct,
-  year = {1999},
-  issn = {0362-1340},
-  pages = {1--19},
-  numpages = {19},
-  url = {http://doi.acm.org/10.1145/320385.320386},
-  doi = {10.1145/320385.320386},
-  acmid = {320386},
-  publisher = {ACM},
-  address = {New York, NY, USA},
-}
-
-@online{TryWithResources,
-  author = {Julien Ponge},
-  contributer = {rschlunt@uwaterloo.ca},
-  title = {Better Resource Management with Java SE 7: Beyond Syntactic Sugar},
-  year = 2011,
-  url = {http://www.oracle.com/technetwork/articles/java/trywithresources-401775.html},
-  note = {\url{http://www.oracle.com/technetwork/articles/java/trywithresources-401775.html}},
-  urldate = {2017-04-03}
-}
-
-@online{ExceptSafe,
-  author = {Andrei Alexandrescu and Petru Marginean},
-  contributer = {rschlunt@uwaterloo.ca},
-  title = {Generic: Change the Way You Write Exception-Safe Code - Forever},
-  year = 2000,
-  url = {http://www.drdobbs.com/cpp/generic-change-the-way-you-write-excepti/184403758},
-  note = {\url{http://www.drdobbs.com/cpp/generic-change-the-way-you-write-excepti/184403758}},
-  urldate = {2017-04-03}
-}
-
-@manual{Swift,
-  keywords  = {Swift programming language},
-  contributer = {pabuhr@plg},
-  title = {The {Swift} Programming Language (Swift 3.1)},
-  organization= {Apple Inc.},
-  year  = 2017,
-  note  = {\url{https://developer.apple.com/library/content/documentation/Swift/Conceptual/Swift_Programming_Language/AboutTheLanguageReference.html}},
-}
-
-@article{StructuredBindings,
-  author = {Herb Sutter and Bjarne Stroustrup and Gabriel Dos Reis},
-  title = {Structured bindings},
-  issue_date = {2015-10-14},
-  month = oct,
-  year = {2015},
-  pages = {1--6},
-  numpages = {6},
-  note = {\url{http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/p0144r0.pdf}},
-}
-
-@manual{atexit,
-  keywords  = {The Linux Programmer's Manual atexit},
-  contributer = {rschlunt@uwaterloo.ca},
-  title = {The Linux Programmer's Manual},
-  organization= {The GNU Project},
-  year  = 2017,
-  note  = {\url{http://man7.org/linux/man-pages/man3/atexit.3.html}},
-}
Index: doc/theses/rob/thesis.tex
===================================================================
--- doc/theses/rob/thesis.tex	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,298 +1,0 @@
-% uWaterloo Thesis Template for LaTeX
-% Last Updated May 24, 2011 by Stephen Carr, IST Client Services
-% FOR ASSISTANCE, please send mail to rt-IST-CSmathsci@ist.uwaterloo.ca
-
-% Effective October 2006, the University of Waterloo
-% requires electronic thesis submission. See the uWaterloo thesis regulations at
-% http://www.grad.uwaterloo.ca/Thesis_Regs/thesistofc.asp.
-
-% DON'T FORGET TO ADD YOUR OWN NAME AND TITLE in the "hyperref" package
-% configuration below. THIS INFORMATION GETS EMBEDDED IN THE PDF FINAL PDF DOCUMENT.
-% You can view the information if you view Properties of the PDF document.
-
-% Many faculties/departments also require one or more printed
-% copies. This template attempts to satisfy both types of output.
-% It is based on the standard "book" document class which provides all necessary
-% sectioning structures and allows multi-part theses.
-
-% DISCLAIMER
-% To the best of our knowledge, this template satisfies the current uWaterloo requirements.
-% However, it is your responsibility to assure that you have met all
-% requirements of the University and your particular department.
-% Many thanks to the feedback from many graduates that assisted the development of this template.
-
-% -----------------------------------------------------------------------
-
-% By default, output is produced that is geared toward generating a PDF
-% version optimized for viewing on an electronic display, including
-% hyperlinks within the PDF.
-
-% E.g. to process a thesis called "mythesis.tex" based on this template, run:
-
-% pdflatex mythesis	-- first pass of the pdflatex processor
-% bibtex mythesis	-- generates bibliography from .bib data file(s)
-% pdflatex mythesis	-- fixes cross-references, bibliographic references, etc
-% pdflatex mythesis	-- fixes cross-references, bibliographic references, etc
-
-% If you use the recommended LaTeX editor, Texmaker, you would open the mythesis.tex
-% file, then click the pdflatex button. Then run BibTeX (under the Tools menu).
-% Then click the pdflatex button two more times. If you have an index as well,
-% you'll need to run MakeIndex from the Tools menu as well, before running pdflatex
-% the last two times.
-
-% N.B. The "pdftex" program allows graphics in the following formats to be
-% included with the "\includegraphics" command: PNG, PDF, JPEG, TIFF
-% Tip 1: Generate your figures and photos in the size you want them to appear
-% in your thesis, rather than scaling them with \includegraphics options.
-% Tip 2: Any drawings you do should be in scalable vector graphic formats:
-% SVG, PNG, WMF, EPS and then converted to PNG or PDF, so they are scalable in
-% the final PDF as well.
-% Tip 3: Photographs should be cropped and compressed so as not to be too large.
-
-% To create a PDF output that is optimized for double-sided printing:
-%
-% 1) comment-out the \documentclass statement in the preamble below, and
-% un-comment the second \documentclass line.
-%
-% 2) change the value assigned below to the boolean variable
-% "PrintVersion" from "false" to "true".
-
-% --------------------- Start of Document Preamble -----------------------
-
-% Specify the document class, default style attributes, and page dimensions
-% For hyperlinked PDF, suitable for viewing on a computer, use this:
-\PassOptionsToPackage{
-dvipsnames
-% ,monochrome % toggle black and white mode
-}{xcolor}
-\PassOptionsToPackage{pdftex}{graphicx}
-\documentclass[letterpaper,12pt,titlepage,oneside,final]{book}
-
-% For PDF, suitable for double-sided printing, change the PrintVersion variable below
-% to "true" and use this \documentclass line instead of the one above:
-% \documentclass[letterpaper,12pt,titlepage,openright,twoside,final]{book}
-
-\usepackage[T1]{fontenc}                                % allow Latin1 (extended ASCII) characters
-\usepackage{textcomp}
-% \usepackage[utf8]{inputenc}
-% \usepackage[latin1]{inputenc}
-\usepackage{fullpage,times,comment}
-% \usepackage{epic,eepic}
-\usepackage{upquote}                                    % switch curled `'" to straight
-% \usepackage{calc}
-\usepackage{xspace}
-% \usepackage{graphicx}
-\usepackage{varioref}                                   % extended references
-\usepackage{listings}                                   % format program code
-% \usepackage[flushmargin]{footmisc}                      % support label/reference in footnote
-% \usepackage{latexsym}                                   % \Box glyph
-% \usepackage{mathptmx}                                   % better math font with "times"
-% \usepackage[usenames]{color}
-% \usepackage[pagewise]{lineno}
-% \renewcommand{\linenumberfont}{\scriptsize\sffamily}
-\usepackage{courier}
-\input{common}                                          % bespoke macros used in the document
-
-\usepackage{bigfoot}
-
-\interfootnotelinepenalty=10000
-
-% Some LaTeX commands I define for my own nomenclature.
-% If you have to, it's better to change nomenclature once here than in a
-% million places throughout your thesis!
-\newcommand{\package}[1]{\textbf{#1}} % package names in bold text
-\newcommand{\cmmd}[1]{\textbackslash\texttt{#1}} % command name in tt font
-\newcommand{\href}[1]{#1} % does nothing, but defines the command so the
-    % print-optimized version will ignore \href tags (redefined by hyperref pkg).
-%\newcommand{\texorpdfstring}[2]{#1} % does nothing, but defines the command
-% Anything defined here may be redefined by packages added below...
-
-% This package allows if-then-else control structures.
-\usepackage{ifthen}
-\newboolean{PrintVersion}
-\setboolean{PrintVersion}{false}
-% CHANGE THIS VALUE TO "true" as necessary, to improve printed results for hard copies
-% by overriding some options of the hyperref package below.
-
-%\usepackage{nomencl} % For a nomenclature (optional; available from ctan.org)
-\usepackage{amsmath,amssymb,amstext} % Lots of math symbols and environments
-\usepackage[pdftex]{graphicx} % For including graphics N.B. pdftex graphics driver
-
-\usepackage{xcolor}
-\usepackage{listings}
-
-\input{cfa-format.tex}
-
-% Hyperlinks make it very easy to navigate an electronic document.
-% In addition, this is where you should specify the thesis title
-% and author as they appear in the properties of the PDF document.
-% Use the "hyperref" package
-% N.B. HYPERREF MUST BE THE LAST PACKAGE LOADED; ADD ADDITIONAL PKGS ABOVE
-\usepackage[pdftex,letterpaper=true,pagebackref=false]{hyperref} % with basic options
-		% N.B. pagebackref=true provides links back from the References to the body text. This can cause trouble for printing.
-\hypersetup{
-    plainpages=false,       % needed if Roman numbers in frontpages
-    pdfpagelabels=true,     % adds page number as label in Acrobat's page count
-    bookmarks=true,         % show bookmarks bar?
-    unicode=false,          % non-Latin characters in Acrobat's bookmarks
-    pdftoolbar=true,        % show Acrobat's toolbar?
-    pdfmenubar=true,        % show Acrobat's menu?
-    pdffitwindow=false,     % window fit to page when opened
-    pdfstartview={FitH},    % fits the width of the page to the window
-    pdftitle={Resource Management and Tuples in \CFA},    % title: CHANGE THIS TEXT!
-    pdfauthor={Rob Schluntz},    % author: CHANGE THIS TEXT! and uncomment this line
-    pdfsubject={Programming Languages},  % subject: CHANGE THIS TEXT! and uncomment this line
-%    pdfkeywords={keyword1} {key2} {key3}, % list of keywords, and uncomment this line if desired
-    pdfnewwindow=true,      % links in new window
-    colorlinks=true,        % false: boxed links; true: colored links
-    linkcolor=blue,         % color of internal links
-    citecolor=green,        % color of links to bibliography
-    filecolor=magenta,      % color of file links
-    urlcolor=cyan           % color of external links
-}
-\ifthenelse{\boolean{PrintVersion}}{   % for improved print quality, change some hyperref options
-\hypersetup{	% override some previously defined hyperref options
-%    colorlinks,%
-    citecolor=black,%
-    filecolor=black,%
-    linkcolor=black,%
-    urlcolor=black}
-}{} % end of ifthenelse (no else)
-
-% Setting up the page margins...
-% uWaterloo thesis requirements specify a minimum of 1 inch (72pt) margin at the
-% top, bottom, and outside page edges and a 1.125 in. (81pt) gutter
-% margin (on binding side). While this is not an issue for electronic
-% viewing, a PDF may be printed, and so we have the same page layout for
-% both printed and electronic versions, we leave the gutter margin in.
-% Set margins to minimum permitted by uWaterloo thesis regulations:
-\setlength{\marginparwidth}{0pt} % width of margin notes
-% N.B. If margin notes are used, you must adjust \textwidth, \marginparwidth
-% and \marginparsep so that the space left between the margin notes and page
-% edge is less than 15 mm (0.6 in.)
-\setlength{\marginparsep}{0pt} % width of space between body text and margin notes
-\setlength{\evensidemargin}{0.125in} % Adds 1/8 in. to binding side of all
-% even-numbered pages when the "twoside" printing option is selected
-\setlength{\oddsidemargin}{0.125in} % Adds 1/8 in. to the left of all pages
-% when "oneside" printing is selected, and to the left of all odd-numbered
-% pages when "twoside" printing is selected
-\setlength{\textwidth}{6.375in} % assuming US letter paper (8.5 in. x 11 in.) and
-% side margins as above
-\raggedbottom
-
-% The following statement specifies the amount of space between
-% paragraphs. Other reasonable specifications are \bigskipamount and \smallskipamount.
-\setlength{\parskip}{\medskipamount}
-
-% The following statement controls the line spacing.  The default
-% spacing corresponds to good typographic conventions and only slight
-% changes (e.g., perhaps "1.2"), if any, should be made.
-\renewcommand{\baselinestretch}{1} % this is the default line space setting
-
-% By default, each chapter will start on a recto (right-hand side)
-% page.  We also force each section of the front pages to start on
-% a recto page by inserting \cleardoublepage commands.
-% In many cases, this will require that the verso page be
-% blank and, while it should be counted, a page number should not be
-% printed.  The following statements ensure a page number is not
-% printed on an otherwise blank verso page.
-\let\origdoublepage\cleardoublepage
-\newcommand{\clearemptydoublepage}{%
-  \clearpage{\pagestyle{empty}\origdoublepage}}
-\let\cleardoublepage\clearemptydoublepage
-
-%======================================================================
-%   L O G I C A L    D O C U M E N T -- the content of your thesis
-%======================================================================
-\begin{document}
-
-% For a large document, it is a good idea to divide your thesis
-% into several files, each one containing one chapter.
-% To illustrate this idea, the "front pages" (i.e., title page,
-% declaration, borrowers' page, abstract, acknowledgements,
-% dedication, table of contents, list of tables, list of figures,
-% nomenclature) are contained within the file "thesis-frontpgs.tex" which is
-% included into the document by the following statement.
-%----------------------------------------------------------------------
-% FRONT MATERIAL
-%----------------------------------------------------------------------
-\input{thesis-frontpgs}
-
-%----------------------------------------------------------------------
-% MAIN BODY
-%----------------------------------------------------------------------
-
-\input{intro}
-
-\input{ctordtor}
-
-\input{tuples}
-
-\input{variadic}
-
-\input{conclusions}
-
-% The \appendix statement indicates the beginning of the appendices.
-% \appendix
-
-% % Add a title page before the appendices and a line in the Table of Contents
-% \chapter*{APPENDICES}
-% \addcontentsline{toc}{chapter}{APPENDICES}
-% %======================================================================
-% \chapter[PDF Plots From Matlab]{Matlab Code for Making a PDF Plot}
-% \label{AppendixA}
-% % Tip 4: Example of how to get a shorter chapter title for the Table of Contents
-% %======================================================================
-% \section{Using the GUI}
-% Properties of Matab plots can be adjusted from the plot window via a graphical interface. Under the Desktop menu in the Figure window, select the Property Editor. You may also want to check the Plot Browser and Figure Palette for more tools. To adjust properties of the axes, look under the Edit menu and select Axes Properties.
-
-% To set the figure size and to save as PDF or other file formats, click the Export Setup button in the figure Property Editor.
-
-% \section{From the Command Line}
-% All figure properties can also be manipulated from the command line. Here's an example:
-% \begin{verbatim}
-% x=[0:0.1:pi];
-% hold on % Plot multiple traces on one figure
-% plot(x,sin(x))
-% plot(x,cos(x),'--r')
-% plot(x,tan(x),'.-g')
-% title('Some Trig Functions Over 0 to \pi') % Note LaTeX markup!
-% legend('{\it sin}(x)','{\it cos}(x)','{\it tan}(x)')
-% hold off
-% set(gca,'Ylim',[-3 3]) % Adjust Y limits of "current axes"
-% set(gcf,'Units','inches') % Set figure size units of "current figure"
-% set(gcf,'Position',[0,0,6,4]) % Set figure width (6 in.) and height (4 in.)
-% cd n:\thesis\plots % Select where to save
-% print -dpdf plot.pdf % Save as PDF
-% \end{verbatim}
-
-%----------------------------------------------------------------------
-% END MATERIAL
-%----------------------------------------------------------------------
-
-% B I B L I O G R A P H Y
-% -----------------------
-
-% The following statement selects the style to use for references.  It controls the sort order of the entries in the bibliography and also the formatting for the in-text labels.
-\bibliographystyle{plain}
-% This specifies the location of the file containing the bibliographic information.
-% It assumes you're using BibTeX (if not, why not?).
-\cleardoublepage % This is needed if the book class is used, to place the anchor in the correct page,
-                 % because the bibliography will start on its own page.
-                 % Use \clearpage instead if the document class uses the "oneside" argument
-\phantomsection  % With hyperref package, enables hyperlinking from the table of contents to bibliography
-% The following statement causes the title "References" to be used for the bibliography section:
-\renewcommand*{\bibname}{References}
-
-% Add the References to the Table of Contents
-\addcontentsline{toc}{chapter}{\textbf{References}}
-
-\bibliography{cfa,thesis}
-% Tip 5: You can create multiple .bib files to organize your references.
-% Just list them all in the \bibliogaphy command, separated by commas (no spaces).
-
-% The following statement causes the specified references to be added to the bibliography% even if they were not
-% cited in the text. The asterisk is a wildcard that causes all entries in the bibliographic database to be included (optional).
-% \nocite{*}
-
-\end{document}
Index: doc/theses/rob/tuples.tex
===================================================================
--- doc/theses/rob/tuples.tex	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,801 +1,0 @@
-%======================================================================
-\chapter{Tuples}
-%======================================================================
-
-\section{Multiple-Return-Value Functions}
-\label{s:MRV_Functions}
-In standard C, functions can return at most one value.
-This restriction results in code which emulates functions with multiple return values by \emph{aggregation} or by \emph{aliasing}.
-In the former situation, the function designer creates a record type that combines all of the return values into a single type.
-For example, consider a function returning the most frequently occurring letter in a string, and its frequency.
-This example is complex enough to illustrate that an array is insufficient, since arrays are homogeneous, and demonstrates a potential pitfall that exists with aliasing.
-\begin{cfacode}
-struct mf_ret {
-  int freq;
-  char ch;
-};
-
-struct mf_ret most_frequent(const char * str) {
-  char freqs [26] = { 0 };
-  struct mf_ret ret = { 0, 'a' };
-  for (int i = 0; str[i] != '\0'; ++i) {
-    if (isalpha(str[i])) {        // only count letters
-      int ch = tolower(str[i]);   // convert to lower case
-      int idx = ch-'a';
-      if (++freqs[idx] > ret.freq) {  // update on new max
-        ret.freq = freqs[idx];
-        ret.ch = ch;
-      }
-    }
-  }
-  return ret;
-}
-
-const char * str = "hello world";
-struct mf_ret ret = most_frequent(str);
-printf("%s -- %d %c\n", str, ret.freq, ret.ch);
-\end{cfacode}
-Of note, the designer must come up with a name for the return type and for each of its fields.
-Unnecessary naming is a common programming language issue, introducing verbosity and a complication of the user's mental model.
-That is, adding another named type creates another association in the programmer's mind that needs to be kept track of when reading and writing code.
-As such, this technique is effective when used sparingly, but can quickly get out of hand if many functions need to return different combinations of types.
-
-In the latter approach, the designer simulates multiple return values by passing the additional return values as pointer parameters.
-The pointer parameters are assigned inside of the routine body to emulate a return.
-Using the same example,
-\begin{cfacode}
-int most_frequent(const char * str, char * ret_ch) {
-  char freqs [26] = { 0 };
-  int ret_freq = 0;
-  for (int i = 0; str[i] != '\0'; ++i) {
-    if (isalpha(str[i])) {        // only count letters
-      int ch = tolower(str[i]);   // convert to lower case
-      int idx = ch-'a';
-      if (++freqs[idx] > ret_freq) {  // update on new max
-        ret_freq = freqs[idx];
-        *ret_ch = ch;   // assign to out parameter
-      }
-    }
-  }
-  return ret_freq;  // only one value returned directly
-}
-
-const char * str = "hello world";
-char ch;                            // pre-allocate return value
-int freq = most_frequent(str, &ch); // pass return value as out parameter
-printf("%s -- %d %c\n", str, freq, ch);
-\end{cfacode}
-Notably, using this approach, the caller is directly responsible for allocating storage for the additional temporary return values, which complicates the call site with a sequence of variable declarations leading up to the call.
-Also, while a disciplined use of @const@ can give clues about whether a pointer parameter is going to be used as an out parameter, it is not immediately obvious from only the routine signature whether the callee expects such a parameter to be initialized before the call.
-Furthermore, while many C routines that accept pointers are designed so that it is safe to pass @NULL@ as a parameter, there are many C routines that are not null-safe.
-On a related note, C does not provide a standard mechanism to state that a parameter is going to be used as an additional return value, which makes the job of ensuring that a value is returned more difficult for the compiler.
-Interestingly, there is a subtle bug in the previous example, in that @ret_ch@ is never assigned for a string that does not contain any letters, which can lead to undefined behaviour.
-In this particular case, it turns out that the frequency return value also doubles as an error code, where a frequency of 0 means the character return value should be ignored.
-Still, not every routine with multiple return values should be required to return an error code, and error codes are easily ignored, so this is not a satisfying solution.
-As with the previous approach, this technique can simulate multiple return values, but in practice it is verbose and error prone.
-
-In \CFA, functions can be declared to return multiple values with an extension to the function declaration syntax.
-Multiple return values are declared as a comma-separated list of types in square brackets in the same location that the return type appears in standard C function declarations.
-The ability to return multiple values from a function requires a new syntax for the return statement.
-For consistency, the return statement in \CFA accepts a comma-separated list of expressions in square brackets.
-The expression resolution phase of the \CFA translator ensures that the correct form is used depending on the values being returned and the return type of the current function.
-A multiple-returning function with return type @T@ can return any expression that is implicitly convertible to @T@.
-Using the running example, the @most_frequent@ function can be written using multiple return values as such,
-\begin{cfacode}
-[int, char] most_frequent(const char * str) {
-  char freqs [26] = { 0 };
-  int ret_freq = 0;
-  char ret_ch = 'a';  // arbitrary default value for consistent results
-  for (int i = 0; str[i] != '\0'; ++i) {
-    if (isalpha(str[i])) {        // only count letters
-      int ch = tolower(str[i]);   // convert to lower case
-      int idx = ch-'a';
-      if (++freqs[idx] > ret_freq) {  // update on new max
-        ret_freq = freqs[idx];
-        ret_ch = ch;
-      }
-    }
-  }
-  return [ret_freq, ret_ch];
-}
-\end{cfacode}
-This approach provides the benefits of compile-time checking for appropriate return statements as in aggregation, but without the required verbosity of declaring a new named type, which precludes the bug seen with out-parameters.
-
-The addition of multiple-return-value functions necessitates a syntax for accepting multiple values at the call-site.
-The simplest mechanism for retaining a return value in C is variable assignment.
-By assigning the return value into a variable, its value can be retrieved later at any point in the program.
-As such, \CFA allows assigning multiple values from a function into multiple variables, using a square-bracketed list of lvalue expressions on the left side.
-\begin{cfacode}
-const char * str = "hello world";
-int freq;
-char ch;
-[freq, ch] = most_frequent(str);  // assign into multiple variables
-printf("%s -- %d %c\n", str, freq, ch);
-\end{cfacode}
-It is also common to use a function's output as the input to another function.
-\CFA also allows this case, without any new syntax.
-When a function call is passed as an argument to another call, the expression resolver attempts to find the best match of actual arguments to formal parameters given all of the possible expression interpretations in the current scope \cite{Bilson03}.
-For example,
-\begin{cfacode}
-void process(int);       // (1)
-void process(char);      // (2)
-void process(int, char); // (3)
-void process(char, int); // (4)
-
-process(most_frequent("hello world"));  // selects (3)
-\end{cfacode}
-In this case, there is only one option for a function named @most_frequent@ that takes a string as input.
-This function returns two values, one @int@ and one @char@.
-There are four options for a function named @process@, but only two that accept two arguments, and of those the best match is (3), which is also an exact match.
-This expression first calls @most_frequent("hello world")@, which produces the values @3@ and @'l'@, which are fed directly to the first and second parameters of (3), respectively.
-
-\section{Tuple Expressions}
-Multiple-return-value functions provide \CFA with a new syntax for expressing a combination of expressions in the return statement and a combination of types in a function signature.
-These notions can be generalized to provide \CFA with \emph{tuple expressions} and \emph{tuple types}.
-A tuple expression is an expression producing a fixed-size, ordered list of values of heterogeneous types.
-The type of a tuple expression is the tuple of the subexpression types, or a \emph{tuple type}.
-In \CFA, a tuple expression is denoted by a comma-separated list of expressions enclosed in square brackets.
-For example, the expression @[5, 'x', 10.5]@ has type @[int, char, double]@.
-The previous expression has 3 \emph{components}.
-Each component in a tuple expression can be any \CFA expression, including another tuple expression.
-The order of evaluation of the components in a tuple expression is unspecified, to allow a compiler the greatest flexibility for program optimization.
-It is, however, guaranteed that each component of a tuple expression is evaluated for side-effects, even if the result is not used.
-Multiple-return-value functions can equivalently be called \emph{tuple-returning functions}.
-
-\subsection{Tuple Variables}
-The call-site of the @most_frequent@ routine has a notable blemish, in that it required the preallocation of return variables in a manner similar to the aliasing example, since it is impossible to declare multiple variables of different types in the same declaration in standard C.
-In \CFA, it is possible to overcome this restriction by declaring a \emph{tuple variable}.
-\begin{cfacode}[emph=ret, emphstyle=\color{red}]
-const char * str = "hello world";
-[int, char] ret = most_frequent(str);  // initialize tuple variable
-printf("%s -- %d %c\n", str, ret);
-\end{cfacode}
-It is now possible to accept multiple values into a single piece of storage, in much the same way that it was previously possible to pass multiple values from one function call to another.
-These variables can be used in any of the contexts where a tuple expression is allowed, such as in the @printf@ function call.
-As in the @process@ example, the components of the tuple value are passed as separate parameters to @printf@, allowing very simple printing of tuple expressions.
-One way to access the individual components is with a simple assignment, as in previous examples.
-\begin{cfacode}
-int freq;
-char ch;
-[freq, ch] = ret;
-\end{cfacode}
-
-\begin{sloppypar}
-In addition to variables of tuple type, it is also possible to have pointers to tuples, and arrays of tuples.
-Tuple types can be composed of any types, except for array types, since array assignment is disallowed, which makes tuple assignment difficult when a tuple contains an array.
-\begin{cfacode}
-[double, int] di;
-[double, int] * pdi
-[double, int] adi[10];
-\end{cfacode}
-This examples declares a variable of type @[double, int]@, a variable of type pointer to @[double, int]@, and an array of ten @[double, int]@.
-\end{sloppypar}
-
-\subsection{Tuple Indexing}
-At times, it is desirable to access a single component of a tuple-valued expression without creating unnecessary temporary variables to assign to.
-Given a tuple-valued expression @e@ and a compile-time constant integer $i$ where $0 \leq i < n$, where $n$ is the number of components in @e@, @e.i@ accesses the $i$\textsuperscript{th} component of @e@.
-For example,
-\begin{cfacode}
-[int, double] x;
-[char *, int] f();
-void g(double, int);
-[int, double] * p;
-
-int y = x.0;              // access int component of x
-y = f().1;                // access int component of f
-p->0 = 5;                 // access int component of tuple pointed-to by p
-g(x.1, x.0);              // rearrange x to pass to g
-double z = [x, f()].0.1;  // access second component of first component
-                          // of tuple expression
-\end{cfacode}
-As seen above, tuple-index expressions can occur on any tuple-typed expression, including tuple-returning functions, square-bracketed tuple expressions, and other tuple-index expressions, provided the retrieved component is also a tuple.
-This feature was proposed for \KWC but never implemented \cite[p.~45]{Till89}.
-
-\subsection{Flattening and Structuring}
-As evident in previous examples, tuples in \CFA do not have a rigid structure.
-In function call contexts, tuples support implicit flattening and restructuring conversions.
-Tuple flattening recursively expands a tuple into the list of its basic components.
-Tuple structuring packages a list of expressions into a value of tuple type.
-\begin{cfacode}
-int f(int, int);
-int g([int, int]);
-int h(int, [int, int]);
-[int, int] x;
-int y;
-
-f(x);      // flatten
-g(y, 10);  // structure
-h(x, y);   // flatten & structure
-\end{cfacode}
-In \CFA, each of these calls is valid.
-In the call to @f@, @x@ is implicitly flattened so that the components of @x@ are passed as the two arguments to @f@.
-For the call to @g@, the values @y@ and @10@ are structured into a single argument of type @[int, int]@ to match the type of the parameter of @g@.
-Finally, in the call to @h@, @x@ is flattened to yield an argument list of length 3, of which the first component of @x@ is passed as the first parameter of @h@, and the second component of @x@ and @y@ are structured into the second argument of type @[int, int]@.
-The flexible structure of tuples permits a simple and expressive function-call syntax to work seamlessly with both single- and multiple-return-value functions, and with any number of arguments of arbitrarily complex structure.
-
-In \KWC \cite{Buhr94a,Till89}, there were 4 tuple coercions: opening, closing, flattening, and structuring.
-Opening coerces a tuple value into a tuple of values, while closing converts a tuple of values into a single tuple value.
-Flattening coerces a nested tuple into a flat tuple, \ie it takes a tuple with tuple components and expands it into a tuple with only non-tuple components.
-Structuring moves in the opposite direction, \ie it takes a flat tuple value and provides structure by introducing nested tuple components.
-
-In \CFA, the design has been simplified to require only the two conversions previously described, which trigger only in function call and return situations.
-This simplification is a primary contribution of this thesis to the design of tuples in \CFA.
-Specifically, the expression resolution algorithm examines all of the possible alternatives for an expression to determine the best match.
-In resolving a function call expression, each combination of function value and list of argument alternatives is examined.
-Given a particular argument list and function value, the list of argument alternatives is flattened to produce a list of non-tuple valued expressions.
-Then the flattened list of expressions is compared with each value in the function's parameter list.
-If the parameter's type is not a tuple type, then the current argument value is unified with the parameter type, and on success the next argument and parameter are examined.
-If the parameter's type is a tuple type, then the structuring conversion takes effect, recursively applying the parameter matching algorithm using the tuple's component types as the parameter list types.
-Assuming a successful unification, eventually the algorithm gets to the end of the tuple type, which causes all of the matching expressions to be consumed and structured into a tuple expression.
-For example, in
-\begin{cfacode}
-int f(int, [double, int]);
-f([5, 10.2], 4);
-\end{cfacode}
-There is only a single definition of @f@, and 3 arguments with only single interpretations.
-First, the argument alternative list @[5, 10.2], 4@ is flattened to produce the argument list @5, 10.2, 4@.
-Next, the parameter matching algorithm begins, with $P = $@int@ and $A = $@int@, which unifies exactly.
-Moving to the next parameter and argument, $P = $@[double, int]@ and $A = $@double@.
-This time, the parameter is a tuple type, so the algorithm applies recursively with $P' = $@double@ and $A = $@double@, which unifies exactly.
-Then $P' = $@int@ and $A = $@double@, which again unifies exactly.
-At this point, the end of $P'$ has been reached, so the arguments @10.2, 4@ are structured into the tuple expression @[10.2, 4]@.
-Finally, the end of the parameter list $P$ has also been reached, so the final expression is @f(5, [10.2, 4])@.
-
-\section{Tuple Assignment}
-\label{s:TupleAssignment}
-An assignment where the left side of the assignment operator has a tuple type is called tuple assignment.
-There are two kinds of tuple assignment depending on whether the right side of the assignment operator has a tuple type or a non-tuple type, called \emph{Multiple} and \emph{Mass} Assignment, respectively.
-\begin{cfacode}
-int x;
-double y;
-[int, double] z;
-[y, x] = 3.14;  // mass assignment
-[x, y] = z;     // multiple assignment
-z = 10;         // mass assignment
-z = [x, y];     // multiple assignment
-\end{cfacode}
-Let $L_i$ for $i$ in $[0, n)$ represent each component of the flattened left side, $R_i$ represent each component of the flattened right side of a multiple assignment, and $R$ represent the right side of a mass assignment.
-
-For a multiple assignment to be valid, both tuples must have the same number of elements when flattened.
-For example, the following is invalid because the number of components on the left does not match the number of components on the right.
-\begin{cfacode}
-[int, int] x, y, z;
-[x, y] = z;   // multiple assignment, invalid 4 != 2
-\end{cfacode}
-Multiple assignment assigns $R_i$ to $L_i$ for each $i$.
-That is, @?=?(&$L_i$, $R_i$)@ must be a well-typed expression.
-In the previous example, @[x, y] = z@, @z@ is flattened into @z.0, z.1@, and the assignments @x = z.0@ and @y = z.1@ happen.
-
-A mass assignment assigns the value $R$ to each $L_i$.
-For a mass assignment to be valid, @?=?(&$L_i$, $R$)@ must be a well-typed expression.
-These semantics differ from C cascading assignment (\eg @a=b=c@) in that conversions are applied to $R$ in each individual assignment, which prevents data loss from the chain of conversions that can happen during a cascading assignment.
-For example, @[y, x] = 3.14@ performs the assignments @y = 3.14@ and @x = 3.14@, which results in the value @3.14@ in @y@ and the value @3@ in @x@.
-On the other hand, the C cascading assignment @y = x = 3.14@ performs the assignments @x = 3.14@ and @y = x@, which results in the value @3@ in @x@, and as a result the value @3@ in @y@ as well.
-
-Both kinds of tuple assignment have parallel semantics, such that each value on the left side and right side is evaluated \emph{before} any assignments occur.
-As a result, it is possible to swap the values in two variables without explicitly creating any temporary variables or calling a function.
-\begin{cfacode}
-int x = 10, y = 20;
-[x, y] = [y, x];
-\end{cfacode}
-After executing this code, @x@ has the value @20@ and @y@ has the value @10@.
-
-In \CFA, tuple assignment is an expression where the result type is the type of the left side of the assignment, as in normal assignment.
-That is, a tuple assignment produces the value of the left-hand side after assignment.
-These semantics allow cascading tuple assignment to work out naturally in any context where a tuple is permitted.
-These semantics are a change from the original tuple design in \KWC \cite{Till89}, wherein tuple assignment was a statement that allows cascading assignments as a special case.
-Restricting tuple assignment to statements was an attempt to to fix what was seen as a problem with side-effects, wherein assignment can be used in many different locations, such as in function-call argument position.
-While permitting assignment as an expression does introduce the potential for subtle complexities, it is impossible to remove assignment expressions from \CFA without affecting backwards compatibility.
-Furthermore, there are situations where permitting assignment as an expression improves readability by keeping code succinct and reducing repetition, and complicating the definition of tuple assignment puts a greater cognitive burden on the user.
-In another language, tuple assignment as a statement could be reasonable, but it would be inconsistent for tuple assignment to be the only kind of assignment that is not an expression.
-In addition, \KWC permits the compiler to optimize tuple assignment as a block copy, since it does not support user-defined assignment operators.
-This optimization could be implemented in \CFA, but it requires the compiler to verify that the selected assignment operator is trivial.
-
-The following example shows multiple, mass, and cascading assignment used in one expression
-\begin{cfacode}
-  int a, b;
-  double c, d;
-  [void] f([int, int]);
-  f([c, a] = [b, d] = 1.5);  // assignments in parameter list
-\end{cfacode}
-The tuple expression begins with a mass assignment of @1.5@ into @[b, d]@, which assigns @1.5@ into @b@, which is truncated to @1@, and @1.5@ into @d@, producing the tuple @[1, 1.5]@ as a result.
-That tuple is used as the right side of the multiple assignment (\ie, @[c, a] = [1, 1.5]@) that assigns @1@ into @c@ and @1.5@ into @a@, which is truncated to @1@, producing the result @[1, 1]@.
-Finally, the tuple @[1, 1]@ is used as an expression in the call to @f@.
-
-\subsection{Tuple Construction}
-Tuple construction and destruction follow the same rules and semantics as tuple assignment, except that in the case where there is no right side, the default constructor or destructor is called on each component of the tuple.
-As constructors and destructors did not exist in previous versions of \CFA or in \KWC, this is a primary contribution of this thesis to the design of tuples.
-\begin{cfacode}
-struct S;
-void ?{}(S *);         // (1)
-void ?{}(S *, int);    // (2)
-void ?{}(S * double);  // (3)
-void ?{}(S *, S);      // (4)
-
-[S, S] x = [3, 6.28];  // uses (2), (3), specialized constructors
-[S, S] y;              // uses (1), (1), default constructor
-[S, S] z = x.0;        // uses (4), (4), copy constructor
-\end{cfacode}
-In this example, @x@ is initialized by the multiple constructor calls @?{}(&x.0, 3)@ and @?{}(&x.1, 6.28)@, while @y@ is initialized by two default constructor calls @?{}(&y.0)@ and @?{}(&y.1)@.
-@z@ is initialized by mass copy constructor calls @?{}(&z.0, x.0)@ and @?{}(&z.1, x.0)@.
-Finally, @x@, @y@, and @z@ are destructed, \ie the calls @^?{}(&x.0)@, @^?{}(&x.1)@, @^?{}(&y.0)@, @^?{}(&y.1)@, @^?{}(&z.0)@, and @^?{}(&z.1)@.
-
-It is possible to define constructors and assignment functions for tuple types that provide new semantics, if the existing semantics do not fit the needs of an application.
-For example, the function @void ?{}([T, U] *, S);@ can be defined to allow a tuple variable to be constructed from a value of type @S@.
-\begin{cfacode}
-struct S { int x; double y; };
-void ?{}([int, double] * this, S s) {
-  this->0 = s.x;
-  this->1 = s.y;
-}
-\end{cfacode}
-Due to the structure of generated constructors, it is possible to pass a tuple to a generated constructor for a type with a member prefix that matches the type of the tuple.
-For example,
-\begin{cfacode}
-struct S { int x; double y; int z };
-[int, double] t;
-S s = t;
-\end{cfacode}
-The initialization of @s@ with @t@ works by default because @t@ is flattened into its components, which satisfies the generated field constructor @?{}(S *, int, double)@ to initialize the first two values.
-
-\section{Member-Access Tuple Expression}
-\label{s:MemberAccessTuple}
-It is possible to access multiple fields from a single expression using a \emph{Member-Access Tuple Expression}.
-The result is a single tuple-valued expression whose type is the tuple of the types of the members.
-For example,
-\begin{cfacode}
-struct S { int x; double y; char * z; } s;
-s.[x, y, z];
-\end{cfacode}
-Here, the type of @s.[x, y, z]@ is @[int, double, char *]@.
-A member tuple expression has the form @a.[x, y, z];@ where @a@ is an expression with type @T@, where @T@ supports member access expressions, and @x, y, z@ are all members of @T@ with types @T$_x$@, @T$_y$@, and @T$_z$@ respectively.
-Then the type of @a.[x, y, z]@ is @[T_x, T_y, T_z]@.
-
-Since tuple index expressions are a form of member-access expression, it is possible to use tuple-index expressions in conjunction with member tuple expressions to manually restructure a tuple (\eg, rearrange components, drop components, duplicate components, etc.).
-\begin{cfacode}
-[int, int, long, double] x;
-void f(double, long);
-
-f(x.[0, 3]);          // f(x.0, x.3)
-x.[0, 1] = x.[1, 0];  // [x.0, x.1] = [x.1, x.0]
-[long, int, long] y = x.[2, 0, 2];
-\end{cfacode}
-
-It is possible for a member tuple expression to contain other member access expressions.
-For example,
-\begin{cfacode}
-struct A { double i; int j; };
-struct B { int * k; short l; };
-struct C { int x; A y; B z; } v;
-v.[x, y.[i, j], z.k];
-\end{cfacode}
-This expression is equivalent to @[v.x, [v.y.i, v.y.j], v.z.k]@.
-That is, the aggregate expression is effectively distributed across the tuple, which allows simple and easy access to multiple components in an aggregate, without repetition.
-It is guaranteed that the aggregate expression to the left of the @.@ in a member tuple expression is evaluated exactly once.
-As such, it is safe to use member tuple expressions on the result of a side-effecting function.
-\begin{cfacode}
-[int, float, double] f();
-[double, float] x = f().[2, 1];
-\end{cfacode}
-
-In \KWC, member tuple expressions are known as \emph{record field tuples} \cite{Till89}.
-Since \CFA permits these tuple-access expressions using structures, unions, and tuples, \emph{member tuple expression} or \emph{field tuple expression} is more appropriate.
-
-It is possible to extend member-access expressions further.
-Currently, a member-access expression whose member is a name requires that the aggregate is a structure or union, while a constant integer member requires the aggregate to be a tuple.
-In the interest of orthogonal design, \CFA could apply some meaning to the remaining combinations as well.
-For example,
-\begin{cfacode}
-struct S { int x, y; } s;
-[S, S] z;
-
-s.x;  // access member
-z.0;  // access component
-
-s.1;  // ???
-z.y;  // ???
-\end{cfacode}
-One possibility is for @s.1@ to select the second member of @s@.
-Under this interpretation, it becomes possible to not only access members of a struct by name, but also by position.
-Likewise, it seems natural to open this mechanism to enumerations as well, wherein the left side would be a type, rather than an expression.
-One benefit of this interpretation is familiarity, since it is extremely reminiscent of tuple-index expressions.
-On the other hand, it could be argued that this interpretation is brittle in that changing the order of members or adding new members to a structure becomes a brittle operation.
-This problem is less of a concern with tuples, since modifying a tuple affects only the code that directly uses the tuple, whereas modifying a structure has far reaching consequences for every instance of the structure.
-
-As for @z.y@, one interpretation is to extend the meaning of member tuple expressions.
-That is, currently the tuple must occur as the member, \ie to the right of the dot.
-Allowing tuples to the left of the dot could distribute the member across the elements of the tuple, in much the same way that member tuple expressions distribute the aggregate across the member tuple.
-In this example, @z.y@ expands to @[z.0.y, z.1.y]@, allowing what is effectively a very limited compile-time field-sections map operation, where the argument must be a tuple containing only aggregates having a member named @y@.
-It is questionable how useful this would actually be in practice, since structures often do not have names in common with other structures, and further this could cause maintainability issues in that it encourages programmers to adopt very simple naming conventions to maximize the amount of overlap between different types.
-Perhaps more useful would be to allow arrays on the left side of the dot, which would likewise allow mapping a field access across the entire array, producing an array of the contained fields.
-The immediate problem with this idea is that C arrays do not carry around their size, which would make it impossible to use this extension for anything other than a simple stack allocated array.
-
-Supposing this feature works as described, it would be necessary to specify an ordering for the expansion of member-access expressions versus member-tuple expressions.
-\begin{cfacode}
-struct { int x, y; };
-[S, S] z;
-z.[x, y];  // ???
-// => [z.0, z.1].[x, y]
-// => [z.0.x, z.0.y, z.1.x, z.1.y]
-// or
-// => [z.x, z.y]
-// => [[z.0, z.1].x, [z.0, z.1].y]
-// => [z.0.x, z.1.x, z.0.y, z.1.y]
-\end{cfacode}
-Depending on exactly how the two tuples are combined, different results can be achieved.
-As such, a specific ordering would need to be imposed to make this feature useful.
-Furthermore, this addition moves a member-tuple expression's meaning from being clear statically to needing resolver support, since the member name needs to be distributed appropriately over each member of the tuple, which could itself be a tuple.
-
-A second possibility is for \CFA to have named tuples, as they exist in Swift and D.
-\begin{cfacode}
-typedef [int x, int y] Point2D;
-Point2D p1, p2;
-p1.x + p1.y + p2.x + p2.y;
-p1.0 + p1.1 + p2.0 + p2.1;  // equivalent
-\end{cfacode}
-In this simpler interpretation, a tuple type carries with it a list of possibly empty identifiers.
-This approach fits naturally with the named return-value feature, and would likely go a long way towards implementing it.
-
-Ultimately, the first two extensions introduce complexity into the model, with relatively little perceived benefit, and so were dropped from consideration.
-Named tuples are a potentially useful addition to the language, provided they can be parsed with a reasonable syntax.
-
-
-\section{Casting}
-In C, the cast operator is used to explicitly convert between types.
-In \CFA, the cast operator has a secondary use, which is type ascription, since it forces the expression resolution algorithm to choose the lowest cost conversion to the target type.
-That is, a cast can be used to select the type of an expression when it is ambiguous, as in the call to an overloaded function.
-\begin{cfacode}
-int f();     // (1)
-double f();  // (2)
-
-f();       // ambiguous - (1),(2) both equally viable
-(int)f();  // choose (2)
-\end{cfacode}
-Since casting is a fundamental operation in \CFA, casts need to be given a meaningful interpretation in the context of tuples.
-Taking a look at standard C provides some guidance with respect to the way casts should work with tuples.
-\begin{cfacode}[numbers=left]
-int f();
-void g();
-
-(void)f();  // valid, ignore results
-(int)g();   // invalid, void cannot be converted to int
-
-struct A { int x; };
-(struct A)f();  // invalid, int cannot be converted to A
-\end{cfacode}
-In C, line 4 is a valid cast, which calls @f@ and discards its result.
-On the other hand, line 5 is invalid, because @g@ does not produce a result, so requesting an @int@ to materialize from nothing is nonsensical.
-Finally, line 8 is also invalid, because in C casts only provide conversion between scalar types \cite[p.~91]{C11}.
-For consistency, this implies that any case wherein the number of components increases as a result of the cast is invalid, while casts that have the same or fewer number of components may be valid.
-
-Formally, a cast to tuple type is valid when $T_n \leq S_m$, where $T_n$ is the number of components in the target type and $S_m$ is the number of components in the source type, and for each $i$ in $[0, n)$, $S_i$ can be cast to $T_i$.
-Excess elements ($S_j$ for all $j$ in $[n, m)$) are evaluated, but their values are discarded so that they are not included in the result expression.
-This discarding naturally follows the way that a cast to void works in C.
-
-For example,
-\begin{cfacode}
-  [int, int, int] f();
-  [int, [int, int], int] g();
-
-  ([int, double])f();           // (1) valid
-  ([int, int, int])g();         // (2) valid
-  ([void, [int, int]])g();      // (3) valid
-  ([int, int, int, int])g();    // (4) invalid
-  ([int, [int, int, int]])g();  // (5) invalid
-\end{cfacode}
-
-(1) discards the last element of the return value and converts the second element to type double.
-Since @int@ is effectively a 1-element tuple, (2) discards the second component of the second element of the return value of @g@.
-If @g@ is free of side effects, this is equivalent to @[(int)(g().0), (int)(g().1.0), (int)(g().2)]@.
-Since @void@ is effectively a 0-element tuple, (3) discards the first and third return values, which is effectively equivalent to @[(int)(g().1.0), (int)(g().1.1)]@).
-% will this always hold true? probably, as constructors should give all of the conversion power we need. if casts become function calls, what would they look like? would need a way to specify the target type, which seems awkward. Also, C++ basically only has this because classes are closed to extension, while we don't have that problem (can have floating constructors for any type).
-Note that a cast is not a function call in \CFA, so flattening and structuring conversions do not occur for cast expressions.
-As such, (4) is invalid because the cast target type contains 4 components, while the source type contains only 3.
-Similarly, (5) is invalid because the cast @([int, int, int])(g().1)@ is invalid.
-That is, it is invalid to cast @[int, int]@ to @[int, int, int]@.
-
-\section{Polymorphism}
-Due to the implicit flattening and structuring conversions involved in argument passing, @otype@ and @dtype@ parameters are restricted to matching only with non-tuple types.
-The integration of polymorphism, type assertions, and monomorphic specialization of tuple-assertions are a primary contribution of this thesis to the design of tuples.
-\begin{cfacode}
-forall(otype T, dtype U)
-void f(T x, U * y);
-
-f([5, "hello"]);
-\end{cfacode}
-In this example, @[5, "hello"]@ is flattened, so that the argument list appears as @5, "hello"@.
-The argument matching algorithm binds @T@ to @int@ and @U@ to @const char@, and calls the function as normal.
-
-Tuples can contain otype and dtype components.
-For example, a plus operator can be written to add two triples of a type together.
-\begin{cfacode}
-forall(otype T | { T ?+?(T, T); })
-[T, T, T] ?+?([T, T, T] x, [T, T, T] y) {
-  return [x.0+y.0, x.1+y.1, x.2+y.2];
-}
-[int, int, int] x;
-int i1, i2, i3;
-[i1, i2, i3] = x + ([10, 20, 30]);
-\end{cfacode}
-Note that due to the implicit tuple conversions, this function is not restricted to the addition of two triples.
-A call to this plus operator type checks as long as a total of 6 non-tuple arguments are passed after flattening, and all of the arguments have a common type that can bind to @T@, with a pairwise @?+?@ over @T@.
-For example, these expressions also succeed and produce the same value.
-\begin{cfacode}
-([x.0, x.1]) + ([x.2, 10, 20, 30]);  // x + ([10, 20, 30])
-x.0 + ([x.1, x.2, 10, 20, 30]);      // x + ([10, 20, 30])
-\end{cfacode}
-This presents a potential problem if structure is important, as these three expressions look like they should have different meanings.
-Furthermore, these calls can be made ambiguous by introducing seemingly different functions.
-\begin{cfacode}
-forall(otype T | { T ?+?(T, T); })
-[T, T, T] ?+?([T, T] x, [T, T, T, T]);
-forall(otype T | { T ?+?(T, T); })
-[T, T, T] ?+?(T x, [T, T, T, T, T]);
-\end{cfacode}
-It is also important to note that these calls could be disambiguated if the function return types were different, as they likely would be for a reasonable implementation of @?+?@, since the return type is used in overload resolution.
-Still, these semantics are a deficiency of the current argument matching algorithm, and depending on the function, differing return values may not always be appropriate.
-These issues could be rectified by applying an appropriate conversion cost to the structuring and flattening conversions, which are currently 0-cost conversions in the expression resolver.
-Care would be needed in this case to ensure that exact matches do not incur such a cost.
-\begin{cfacode}
-void f([int, int], int, int);
-
-f([0, 0], 0, 0);    // no cost
-f(0, 0, 0, 0);      // cost for structuring
-f([0, 0,], [0, 0]); // cost for flattening
-f([0, 0, 0], 0);    // cost for flattening and structuring
-\end{cfacode}
-
-Until this point, it has been assumed that assertion arguments must match the parameter type exactly, modulo polymorphic specialization (\ie, no implicit conversions are applied to assertion arguments).
-This decision presents a conflict with the flexibility of tuples.
-\subsection{Assertion Inference}
-\begin{cfacode}
-int f([int, double], double);
-forall(otype T, otype U | { T f(T, U, U); })
-void g(T, U);
-g(5, 10.21);
-\end{cfacode}
-If assertion arguments must match exactly, then the call to @g@ cannot be resolved, since the expected type of @f@ is flat, while the only @f@ in scope requires a tuple type.
-Since tuples are fluid, this requirement reduces the usability of tuples in polymorphic code.
-To ease this pain point, function parameter and return lists are flattened for the purposes of type unification, which allows the previous example to pass expression resolution.
-
-This relaxation is made possible by extending the existing thunk generation scheme, as described by Bilson \cite{Bilson03}.
-Now, whenever a candidate's parameter structure does not exactly match the formal parameter's structure, a thunk is generated to specialize calls to the actual function.
-\begin{cfacode}
-int _thunk(int _p0, double _p1, double _p2) {
-  return f([_p0, _p1], _p2);
-}
-\end{cfacode}
-Essentially, this provides flattening and structuring conversions to inferred functions, improving the compatibility of tuples and polymorphism.
-
-\section{Implementation}
-Tuples are implemented in the \CFA translator via a transformation into generic types.
-Generic types are an independent contribution developed at the same time.
-The transformation into generic types and the generation of tuple-specific code are primary contributions of this thesis to tuples.
-
-The first time an $N$-tuple is seen for each $N$ in a scope, a generic type with $N$ type parameters is generated.
-For example,
-\begin{cfacode}
-[int, int] f() {
-  [double, double] x;
-  [int, double, int] y;
-}
-\end{cfacode}
-is transformed into
-\begin{cfacode}
-forall(dtype T0, dtype T1 | sized(T0) | sized(T1))
-struct _tuple2_ {  // generated before the first 2-tuple
-  T0 field_0;
-  T1 field_1;
-};
-_tuple2_(int, int) f() {
-  _tuple2_(double, double) x;
-  forall(dtype T0, dtype T1, dtype T2 | sized(T0) | sized(T1) | sized(T2))
-  struct _tuple3_ {  // generated before the first 3-tuple
-    T0 field_0;
-    T1 field_1;
-    T2 field_2;
-  };
-  _tuple3_(int, double, int) y;
-}
-\end{cfacode}
-
-Tuple expressions are then simply converted directly into compound literals
-\begin{cfacode}
-[5, 'x', 1.24];
-\end{cfacode}
-becomes
-\begin{cfacode}
-(_tuple3_(int, char, double)){ 5, 'x', 1.24 };
-\end{cfacode}
-
-Since tuples are essentially structures, tuple indexing expressions are just field accesses.
-\begin{cfacode}
-void f(int, [double, char]);
-[int, double] x;
-
-x.0+x.1;
-printf("%d %g\n", x);
-f(x, 'z');
-\end{cfacode}
-is transformed into
-\begin{cfacode}
-void f(int, _tuple2_(double, char));
-_tuple2_(int, double) x;
-
-x.field_0+x.field_1;
-printf("%d %g\n", x.field_0, x.field_1);
-f(x.field_0, (_tuple2){ x.field_1, 'z' });
-\end{cfacode}
-Note that due to flattening, @x@ used in the argument position is converted into the list of its fields.
-In the call to @f@, the second and third argument components are structured into a tuple argument.
-
-Expressions that may contain side effects are made into \emph{unique expressions} before being expanded by the flattening conversion.
-Each unique expression is assigned an identifier and is guaranteed to be executed exactly once.
-\begin{cfacode}
-void g(int, double);
-[int, double] h();
-g(h());
-\end{cfacode}
-Internally, this is converted to pseudo-\CFA
-\begin{cfacode}
-void g(int, double);
-[int, double] h();
-lazy [int, double] unq0 = h(); // deferred execution
-g(unq0.0, unq0.1);             // execute h() once
-\end{cfacode}
-That is, the function @h@ is evaluated lazily and its result is stored for subsequent accesses.
-Ultimately, unique expressions are converted into two variables and an expression.
-\begin{cfacode}
-void g(int, double);
-[int, double] h();
-
-_Bool _unq0_finished_ = 0;
-[int, double] _unq0;
-g(
-  (_unq0_finished_ ? _unq0 : (_unq0 = h(), _unq0_finished_ = 1, _unq0)).0,
-  (_unq0_finished_ ? _unq0 : (_unq0 = h(), _unq0_finished_ = 1, _unq0)).1,
-);
-\end{cfacode}
-Since argument evaluation order is not specified by the C programming language, this scheme is built to work regardless of evaluation order.
-The first time a unique expression is executed, the actual expression is evaluated and the accompanying boolean is set to true.
-Every subsequent evaluation of the unique expression then results in an access to the stored result of the actual expression.
-
-Currently, the \CFA translator has a very broad, imprecise definition of impurity (side-effects), where every function call is assumed to be impure.
-This notion could be made more precise for certain intrinsic, auto-generated, and built-in functions, and could analyze function bodies, when they are available, to recursively detect impurity, to eliminate some unique expressions.
-It is possible that lazy evaluation could be exposed to the user through a lazy keyword with little additional effort.
-
-Tuple-member expressions are recursively expanded into a list of member-access expressions.
-\begin{cfacode}
-[int, [double, int, double], int]] x;
-x.[0, 1.[0, 2]];
-\end{cfacode}
-becomes
-\begin{cfacode}
-[x.0, [x.1.0, x.1.2]];
-\end{cfacode}
-Tuple-member expressions also take advantage of unique expressions in the case of possible impurity.
-
-Finally, the various kinds of tuple assignment, constructors, and destructors generate GNU C statement expressions.
-For example, a mass assignment
-\begin{cfacode}
-int x, z;
-double y;
-[double, double] f();
-
-[x, y, z] = 1.5;            // mass assignment
-\end{cfacode}
-generates the following
-\begin{cfacode}
-// [x, y, z] = 1.5;
-_tuple3_(int, double, int) _tmp_stmtexpr_ret0;
-({ // GNU C statement expression
-  // assign LHS address temporaries
-  int *__massassign_L0 = &x;    // ?{}
-  double *__massassign_L1 = &y; // ?{}
-  int *__massassign_L2 = &z;    // ?{}
-
-  // assign RHS value temporary
-  double __massassign_R0 = 1.5; // ?{}
-
-  ({ // tuple construction - construct statement expr return variable
-    // assign LHS address temporaries
-    int *__multassign_L0 = (int *)&_tmp_stmtexpr_ret0.0;       // ?{}
-    double *__multassign_L1 = (double *)&_tmp_stmtexpr_ret0.1; // ?{}
-    int *__multassign_L2 = (int *)&_tmp_stmtexpr_ret0.2;       // ?{}
-
-    // assign RHS value temporaries and mass-assign to L0, L1, L2
-    int __multassign_R0 = (*__massassign_L0=(int)__massassign_R0); // ?{}
-    double __multassign_R1 = (*__massassign_L1=__massassign_R0);   // ?{}
-    int __multassign_R2 = (*__massassign_L2=(int)__massassign_R0); // ?{}
-
-    // perform construction of statement expr return variable using
-    // RHS value temporary
-    ((*__multassign_L0 = __multassign_R0 /* ?{} */),
-     (*__multassign_L1 = __multassign_R1 /* ?{} */),
-     (*__multassign_L2 = __multassign_R2 /* ?{} */));
-  });
-  _tmp_stmtexpr_ret0;
-});
-({ // tuple destruction - destruct assign expr value
-  int *__massassign_L3 = (int *)&_tmp_stmtexpr_ret0.0;       // ?{}
-  double *__massassign_L4 = (double *)&_tmp_stmtexpr_ret0.1; // ?{}
-  int *__massassign_L5 = (int *)&_tmp_stmtexpr_ret0.2;       // ?{}
-  ((*__massassign_L3 /* ^?{} */),
-   (*__massassign_L4 /* ^?{} */),
-   (*__massassign_L5 /* ^?{} */));
-});
-\end{cfacode}
-A variable is generated to store the value produced by a statement expression, since its fields may need to be constructed with a non-trivial constructor and it may need to be referred to multiple time, \eg, in a unique expression.
-$N$ LHS variables are generated and constructed using the address of the tuple components, and a single RHS variable is generated to store the value of the RHS without any loss of precision.
-A nested statement expression is generated that performs the individual assignments and constructs the return value using the results of the individual assignments.
-Finally, the statement expression temporary is destroyed at the end of the expression.
-
-Similarly, a multiple assignment
-\begin{cfacode}
-[x, y, z] = [f(), 3];       // multiple assignment
-\end{cfacode}
-generates the following
-\begin{cfacode}
-// [x, y, z] = [f(), 3];
-_tuple3_(int, double, int) _tmp_stmtexpr_ret0;
-({
-  // assign LHS address temporaries
-  int *__multassign_L0 = &x;    // ?{}
-  double *__multassign_L1 = &y; // ?{}
-  int *__multassign_L2 = &z;    // ?{}
-
-  // assign RHS value temporaries
-  _tuple2_(double, double) _tmp_cp_ret0;
-  _Bool _unq0_finished_ = 0;
-  double __multassign_R0 =
-    (_unq0_finished_ ?
-      _tmp_cp_ret0 :
-      (_tmp_cp_ret0=f(), _unq0_finished_=1, _tmp_cp_ret0)).0; // ?{}
-  double __multassign_R1 =
-    (_unq0_finished_ ?
-      _tmp_cp_ret0 :
-      (_tmp_cp_ret0=f(), _unq0_finished_=1, _tmp_cp_ret0)).1; // ?{}
-  ({ // tuple destruction - destruct f() return temporary
-    // assign LHS address temporaries
-    double *__massassign_L3 = (double *)&_tmp_cp_ret0.0;  // ?{}
-    double *__massassign_L4 = (double *)&_tmp_cp_ret0.1;  // ?{}
-    // perform destructions - intrinsic, so NOP
-    ((*__massassign_L3 /* ^?{} */),
-     (*__massassign_L4 /* ^?{} */));
-  });
-  int __multassign_R2 = 3; // ?{}
-
-  ({ // tuple construction - construct statement expr return variable
-    // assign LHS address temporaries
-    int *__multassign_L3 = (int *)&_tmp_stmtexpr_ret0.0;       // ?{}
-    double *__multassign_L4 = (double *)&_tmp_stmtexpr_ret0.1; // ?{}
-    int *__multassign_L5 = (int *)&_tmp_stmtexpr_ret0.2;       // ?{}
-
-    // assign RHS value temporaries and multiple-assign to L0, L1, L2
-    int __multassign_R3 = (*__multassign_L0=(int)__multassign_R0);  // ?{}
-    double __multassign_R4 = (*__multassign_L1=__multassign_R1);    // ?{}
-    int __multassign_R5 = (*__multassign_L2=__multassign_R2);       // ?{}
-
-    // perform construction of statement expr return variable using
-    // RHS value temporaries
-    ((*__multassign_L3=__multassign_R3 /* ?{} */),
-     (*__multassign_L4=__multassign_R4 /* ?{} */),
-     (*__multassign_L5=__multassign_R5 /* ?{} */));
-  });
-  _tmp_stmtexpr_ret0;
-});
-({  // tuple destruction - destruct assign expr value
-  // assign LHS address temporaries
-  int *__massassign_L5 = (int *)&_tmp_stmtexpr_ret0.0;       // ?{}
-  double *__massassign_L6 = (double *)&_tmp_stmtexpr_ret0.1; // ?{}
-  int *__massassign_L7 = (int *)&_tmp_stmtexpr_ret0.2;       // ?{}
-  // perform destructions - intrinsic, so NOP
-  ((*__massassign_L5 /* ^?{} */),
-   (*__massassign_L6 /* ^?{} */),
-   (*__massassign_L7 /* ^?{} */));
-});
-\end{cfacode}
-The difference here is that $N$ RHS values are stored into separate temporary variables.
-
-The use of statement expressions allows the translator to arbitrarily generate additional temporary variables as needed, but binds the implementation to a non-standard extension of the C language.
-There are other places where the \CFA translator makes use of GNU C extensions, such as its use of nested functions, so this is not a new restriction.
Index: doc/theses/rob/variadic.tex
===================================================================
--- doc/theses/rob/variadic.tex	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,538 +1,0 @@
-%======================================================================
-\chapter{Variadic Functions}
-%======================================================================
-
-\section{Design Criteria} % TODO: better section name???
-C provides variadic functions through the manipulation of @va_list@ objects.
-In C, a variadic function is one which contains at least one parameter, followed by @...@ as the last token in the parameter list.
-In particular, some form of \emph{argument descriptor} or \emph{sentinel value} is needed to inform the function of the number of arguments and their types.
-Two common argument descriptors are format strings or counter parameters.
-It is important to note that both of these mechanisms are inherently redundant, because they require the user to explicitly specify information that the compiler already knows \footnote{While format specifiers can convey some information the compiler does not know, such as whether to print a number in decimal or hexadecimal, the number of arguments is wholly redundant.}.
-This required repetition is error prone, because it is easy for the user to add or remove arguments without updating the argument descriptor.
-In addition, C requires the programmer to hard code all of the possible expected types.
-As a result, it is cumbersome to write a function that is open to extension.
-For example, a simple function to sum $N$ @int@s,
-\begin{cfacode}
-int sum(int N, ...) {
-  va_list args;
-  va_start(args, N);
-  int ret = 0;
-  while(N) {
-    ret += va_arg(args, int);  // have to specify type
-    N--;
-  }
-  va_end(args);
-  return ret;
-}
-sum(3, 10, 20, 30);  // need to keep counter in sync
-\end{cfacode}
-The @va_list@ type is a special C data type that abstracts variadic-argument manipulation.
-The @va_start@ macro initializes a @va_list@, given the last named parameter.
-Each use of the @va_arg@ macro allows access to the next variadic argument, given a type.
-Since the function signature does not provide any information on what types can be passed to a variadic function, the compiler does not perform any error checks on a variadic call.
-As such, it is possible to pass any value to the @sum@ function, including pointers, floating-point numbers, and structures.
-In the case where the provided type is not compatible with the argument's actual type after default argument promotions, or if too many arguments are accessed, the behaviour is undefined \cite[p.~81]{C11}.
-Furthermore, there is no way to perform the necessary error checks in the @sum@ function at run-time, since type information is not carried into the function body.
-Since they rely on programmer convention rather than compile-time checks, variadic functions are unsafe.
-
-In practice, compilers can provide warnings to help mitigate some of the problems.
-For example, GCC provides the @format@ attribute to specify that a function uses a format string, which allows the compiler to perform some checks related to the standard format-specifiers.
-Unfortunately, this approach does not permit extensions to the format-string syntax, so a programmer cannot extend the attribute to warn for mismatches with custom types.
-
-As a result, C's variadic functions are a deficient language feature.
-Two options were examined to provide better, type-safe variadic functions in \CFA.
-\subsection{Whole Tuple Matching}
-Option 1 is to change the argument matching algorithm, so that type parameters can match whole tuples, rather than just their components.
-This option could be implemented with two phases of argument matching when a function contains type parameters and the argument list contains tuple arguments.
-If flattening and structuring fail to produce a match, a second attempt at matching the function and argument combination is made where tuple arguments are not expanded and structure must match exactly, modulo non-tuple implicit conversions.
-For example:
-\begin{cfacode}
-  forall(otype T, otype U | { T g(U); })
-  void f(T, U);
-
-  [int, int] g([int, int, int, int]);
-
-  f([1, 2], [3, 4, 5, 6]);
-\end{cfacode}
-With flattening and structuring, the call is first transformed into @f(1, 2, 3, 4, 5, 6)@.
-Since the first argument of type @T@ does not have a tuple type, unification decides that @T=int@ and @1@ is matched as the first parameter.
-Likewise, @U@ does not have a tuple type, so @U=int@ and @2@ is accepted as the second parameter.
-There are now no remaining formal parameters, but there are remaining arguments and the function is not variadic, so the match fails.
-
-With the addition of an exact matching attempt, @T=[int,int]@ and @U=[int,int,int,int]@, and so the arguments type check.
-Likewise, when inferring assertion @g@, an exact match is found.
-
-This approach is strict with respect to argument structure, by nature, which makes it syntactically awkward to use in ways that the existing tuple design is not.
-For example, consider a @new@ function that allocates memory using @malloc@, and constructs the result using arbitrary arguments.
-\begin{cfacode}
-struct Array;
-void ?{}(Array *, int, int, int);
-
-forall(dtype T, otype Params | sized(T) | { void ?{}(T *, Params); })
-T * new(Params p) {
-  return malloc(){ p };
-}
-Array(int) * x = new([1, 2, 3]);
-\end{cfacode}
-The call to @new@ is not particularly appealing, since it requires the use of square brackets at the call-site, which is not required in any other function call.
-This shifts the burden from the compiler to the programmer, which is almost always wrong, and creates an odd inconsistency within the language.
-Similarly, in order to pass 0 variadic arguments, an explicit empty tuple must be passed into the argument list, otherwise the exact matching rule would not have an argument to bind against.
-
-It should be otherwise noted that the addition of an exact matching rule only affects the outcome for polymorphic type-binding when tuples are involved.
-For non-tuple arguments, exact matching and flattening and structuring are equivalent.
-For tuple arguments to a function without polymorphic formal-parameters, flattening and structuring work whenever an exact match would have worked, since the tuple is flattened and implicitly restructured to its original structure.
-Thus there is nothing to be gained from permitting the exact matching rule to take effect when a function does not contain polymorphism and none of the arguments are tuples.
-
-Overall, this option takes a step in the right direction, but is contrary to the flexibility of the existing tuple design.
-
-\subsection{A New Typeclass}
-A second option is the addition of another kind of type parameter, @ttype@.
-Matching against a @ttype@ parameter consumes all remaining argument components and packages them into a tuple, binding to the resulting tuple of types.
-In a given parameter list, there should be at most one @ttype@ parameter that must occur last, otherwise the call can never resolve, given the previous rule.
-This idea essentially matches normal variadic semantics, with a strong feeling of similarity to \CCeleven variadic templates.
-As such, @ttype@ variables are also referred to as argument packs.
-This approach is the option that has been added to \CFA.
-
-Like variadic templates, the main way to manipulate @ttype@ polymorphic functions is through recursion.
-Since nothing is known about a parameter pack by default, assertion parameters are key to doing anything meaningful.
-Unlike variadic templates, @ttype@ polymorphic functions can be separately compiled.
-
-For example, a simple translation of the C sum function using @ttype@ is
-\begin{cfacode}
-int sum(void){ return 0; }        // (0)
-forall(ttype Params | { int sum(Params); })
-int sum(int x, Params rest) { // (1)
-  return x+sum(rest);
-}
-sum(10, 20, 30);
-\end{cfacode}
-Since (0) does not accept any arguments, it is not a valid candidate function for the call @sum(10, 20, 30)@.
-In order to call (1), @10@ is matched with @x@, and the argument resolution moves on to the argument pack @rest@, which consumes the remainder of the argument list and @Params@ is bound to @[20, 30]@.
-In order to finish the resolution of @sum@, an assertion parameter that matches @int sum(int, int)@ is required.
-Like in the previous iteration, (0) is not a valid candidate, so (1) is examined with @Params@ bound to @[int]@, requiring the assertion @int sum(int)@.
-Next, (0) fails, and to satisfy (1) @Params@ is bound to @[]@, requiring an assertion @int sum()@.
-Finally, (0) matches and (1) fails, which terminates the recursion.
-Effectively, this traces as @sum(10, 20, 30)@ $\rightarrow$ @10+sum(20, 30)@ $\rightarrow$ @10+(20+sum(30))@ $\rightarrow$ @10+(20+(30+sum()))@ $\rightarrow$ @10+(20+(30+0))@.
-
-Interestingly, this version does not require any form of argument descriptor, since the \CFA type system keeps track of all of these details.
-It might be reasonable to take the @sum@ function a step further to enforce a minimum number of arguments, which could be done simply
-\begin{cfacode}
-int sum(int x, int y){
-  return x+y;
-}
-forall(ttype Params | { int sum(int, Params); })
-int sum(int x, int y, Params rest) {
-  return sum(x+y, rest);
-}
-sum(10);          // invalid
-sum(10, 20);      // valid
-sum(10, 20, 30);  // valid
-...
-\end{cfacode}
-
-One more iteration permits the summation of any summable type, as long as all arguments are the same type.
-\begin{cfacode}
-trait summable(otype T) {
-  T ?+?(T, T);
-};
-forall(otype R | summable(R))
-R sum(R x, R y){
-  return x+y;
-}
-forall(otype R, ttype Params
-  | summable(R)
-  | { R sum(R, Params); })
-R sum(R x, R y, Params rest) {
-  return sum(x+y, rest);
-}
-sum(3, 10, 20, 30);
-\end{cfacode}
-Unlike C, it is not necessary to hard code the expected type.
-This @sum@ function is naturally open to extension, in that any user-defined type with a @?+?@ operator is automatically able to be used with the @sum@ function.
-That is to say, the programmer who writes @sum@ does not need full program knowledge of every possible data type, unlike what is necessary to write an equivalent function using the standard C mechanisms.
-
-\begin{sloppypar}
-Going one last step, it is possible to achieve full generality in \CFA, allowing the summation of arbitrary lists of summable types.
-\begin{cfacode}
-trait summable(otype T1, otype T2, otype R) {
-  R ?+?(T1, T2);
-};
-forall(otype T1, otype T2, otype R | summable(T1, T2, R))
-R sum(T1 x, T2 y) {
-  return x+y;
-}
-forall(otype T1, otype T2, otype T3, otype R, ttype Params
-  | summable(T1, T2, T3)
-  | { R sum(T3, Params); })
-R sum(T1 x, T2 y, Params rest ) {
-  return sum(x+y, rest);
-}
-sum(3, 10.5, 20, 30.3);
-\end{cfacode}
-The \CFA translator requires adding explicit @double ?+?(int, double)@ and @double ?+?(double, int)@ functions for this call to work, since implicit conversions are not supported for assertions.
-\end{sloppypar}
-
-A notable limitation of this approach is that it heavily relies on recursive assertions.
-The \CFA translator imposes a limitation on the depth of the recursion for assertion satisfaction.
-Currently, the limit is set to 4, which means that the first version of the @sum@ function is limited to at most 5 arguments, while the second version can support up to 6 arguments.
-The limit is set low due to inefficiencies in the current implementation of the \CFA expression resolver.
-There is ongoing work to improve the performance of the resolver, and with noticeable gains, the limit can be relaxed to allow longer argument lists to @ttype@ functions.
-
-C variadic syntax and @ttype@ polymorphism probably should not be mixed, since it is not clear where to draw the line to decide which arguments belong where.
-Furthermore, it might be desirable to disallow polymorphic functions to use C variadic syntax to encourage a \CFA style.
-Aside from calling C variadic functions, it is not obvious that there is anything that can be done with C variadics that could not also be done with @ttype@ parameters.
-
-Variadic templates in \CC require an ellipsis token to express that a parameter is a parameter pack and to expand a parameter pack.
-\CFA does not need an ellipsis in either case, since the type class @ttype@ is only used for variadics.
-An alternative design is to use an ellipsis combined with an existing type class.
-This approach was not taken because the largest benefit of the ellipsis token in \CC is the ability to expand a parameter pack within an expression, \eg, in fold expressions, which requires compile-time knowledge of the structure of the parameter pack, which is not available in \CFA.
-\begin{cppcode}
-template<typename... Args>
-void f(Args &... args) {
-  g(&args...);  // expand to addresses of pack elements
-}
-\end{cppcode}
-As such, the addition of an ellipsis token would be purely an aesthetic change in \CFA today.
-
-It is possible to write a type-safe variadic print routine, which can replace @printf@
-\begin{cfacode}
-struct S { int x, y; };
-forall(otype T, ttype Params |
-  { void print(T); void print(Params); })
-void print(T arg, Params rest) {
-  print(arg);
-  print(rest);
-}
-void print(char * x) { printf("%s", x); }
-void print(int x) { printf("%d", x);  }
-void print(S s) { print("{ ", s.x, ",", s.y, " }"); }
-print("s = ", (S){ 1, 2 }, "\n");
-\end{cfacode}
-This example routine showcases a variadic-template-like decomposition of the provided argument list.
-The individual @print@ routines allow printing a single element of a type.
-The polymorphic @print@ allows printing any list of types, as long as each individual type has a @print@ function.
-The individual print functions can be used to build up more complicated @print@ routines, such as for @S@, which is something that cannot be done with @printf@ in C.
-
-It is also possible to use @ttype@ polymorphism to provide arbitrary argument forwarding functions.
-For example, it is possible to write @new@ as a library function.
-\begin{cfacode}
-struct Array;
-void ?{}(Array *, int, int, int);
-
-forall(dtype T, ttype Params | sized(T) | { void ?{}(T *, Params); })
-T * new(Params p) {
-  return malloc(){ p }; // construct result of malloc
-}
-Array * x = new(1, 2, 3);
-\end{cfacode}
-In the call to @new@, @Array@ is selected to match @T@, and @Params@ is expanded to match @[int, int, int, int]@. To satisfy the assertions, a constructor with an interface compatible with @void ?{}(Array *, int, int, int)@ must exist in the current scope.
-
-The @new@ function provides the combination of polymorphic @malloc@ with a constructor call, so that it becomes impossible to forget to construct dynamically-allocated objects.
-This approach provides the type-safety of @new@ in \CC, without the need to specify the allocated type, thanks to return-type inference.
-
-\section{Implementation}
-
-The definition of @new@
-\begin{cfacode}
-forall(dtype T | sized(T)) T * malloc();
-
-forall(dtype T, ttype Params | sized(T) | { void ?{}(T *, Params); })
-T * new(Params p) {
-  return malloc(){ p }; // construct result of malloc
-}
-\end{cfacode}
-generates the following
-\begin{cfacode}
-void *malloc(long unsigned int _sizeof_T, long unsigned int _alignof_T);
-
-void *new(
-  void (*_adapter_)(void (*)(), void *, void *),
-  long unsigned int _sizeof_T,
-  long unsigned int _alignof_T,
-  long unsigned int _sizeof_Params,
-  long unsigned int _alignof_Params,
-  void (* _ctor_T)(void *, void *),
-  void *p
-){
-  void *_retval_new;
-  void *_tmp_cp_ret0;
-  void *_tmp_ctor_expr0;
-  _retval_new=
-    (_adapter_(_ctor_T,
-      (_tmp_ctor_expr0=(_tmp_cp_ret0=malloc(_sizeof_2tT, _alignof_2tT),
-        _tmp_cp_ret0)),
-      p),
-    _tmp_ctor_expr0); // ?{}
-  *(void **)&_tmp_cp_ret0; // ^?{}
-  return _retval_new;
-}
-\end{cfacode}
-The constructor for @T@ is called indirectly through the adapter function on the result of @malloc@ and the parameter pack.
-The variable that is allocated and constructed is then returned from @new@.
-
-A call to @new@
-\begin{cfacode}
-struct S { int x, y; };
-void ?{}(S *, int, int);
-
-S * s = new(3, 4);
-\end{cfacode}
-Generates the following
-\begin{cfacode}
-struct _tuple2_ {  // _tuple2_(T0, T1)
-  void *field_0;
-  void *field_1;
-};
-struct _conc__tuple2_0 {  // _tuple2_(int, int)
-  int field_0;
-  int field_1;
-};
-struct _conc__tuple2_0 _tmp_cp1;  // tuple argument to new
-struct S *_tmp_cp_ret1;           // return value from new
-void _thunk0(  // ?{}(S *, [int, int])
-  struct S *_p0,
-  struct _conc__tuple2_0 _p1
-){
-  _ctor_S(_p0, _p1.field_0, _p1.field_1);  // restructure tuple parameter
-}
-void _adapter(void (*_adaptee)(), void *_p0, void *_p1){
-  // apply adaptee to arguments after casting to actual types
-  ((void (*)(struct S *, struct _conc__tuple2_0))_adaptee)(
-    _p0,
-    *(struct _conc__tuple2_0 *)_p1
-  );
-}
-struct S *s = (struct S *)(_tmp_cp_ret1=
-  new(
-    _adapter,
-    sizeof(struct S),
-    __alignof__(struct S),
-    sizeof(struct _conc__tuple2_0),
-    __alignof__(struct _conc__tuple2_0),
-    (void (*)(void *, void *))&_thunk0,
-    (({ // copy construct tuple argument to new
-      int *__multassign_L0 = (int *)&_tmp_cp1.field_0;
-      int *__multassign_L1 = (int *)&_tmp_cp1.field_1;
-      int __multassign_R0 = 3;
-      int __multassign_R1 = 4;
-      ((*__multassign_L0=__multassign_R0 /* ?{} */) ,
-       (*__multassign_L1=__multassign_R1 /* ?{} */));
-    }), &_tmp_cp1)
-  ), _tmp_cp_ret1);
-*(struct S **)&_tmp_cp_ret1; // ^?{}  // destroy return value from new
-({  // destroy argument temporary
-  int *__massassign_L0 = (int *)&_tmp_cp1.field_0;
-  int *__massassign_L1 = (int *)&_tmp_cp1.field_1;
-  ((*__massassign_L0 /* ^?{} */) , (*__massassign_L1 /* ^?{} */));
-});
-\end{cfacode}
-Of note, @_thunk0@ is generated to translate calls to @?{}(S *, [int, int])@ into calls to @?{}(S *, int, int)@.
-The call to @new@ constructs a tuple argument using the supplied arguments.
-
-The @print@ function
-\begin{cfacode}
-forall(otype T, ttype Params |
-  { void print(T); void print(Params); })
-void print(T arg, Params rest) {
-  print(arg);
-  print(rest);
-}
-\end{cfacode}
-generates the following
-\begin{cfacode}
-void print_variadic(
-  void (*_adapterF_7tParams__P)(void (*)(), void *),
-  void (*_adapterF_2tT__P)(void (*)(), void *),
-  void (*_adapterF_P2tT2tT__MP)(void (*)(), void *, void *),
-  void (*_adapterF2tT_P2tT2tT_P_MP)(void (*)(), void *, void *, void *),
-  long unsigned int _sizeof_T,
-  long unsigned int _alignof_T,
-  long unsigned int _sizeof_Params,
-  long unsigned int _alignof_Params,
-  void *(*_assign_TT)(void *, void *),
-  void (*_ctor_T)(void *),
-  void (*_ctor_TT)(void *, void *),
-  void (*_dtor_T)(void *),
-  void (*print_T)(void *),
-  void (*print_Params)(void *),
-  void *arg,
-  void *rest
-){
-  void *_tmp_cp0 = __builtin_alloca(_sizeof_T);
-  _adapterF_2tT__P(  // print(arg)
-    ((void (*)())print_T),
-    (_adapterF_P2tT2tT__MP( // copy construct argument
-      ((void (*)())_ctor_TT),
-      _tmp_cp0,
-      arg
-    ), _tmp_cp0)
-  );
-  _dtor_T(_tmp_cp0);  // destroy argument temporary
-  _adapterF_7tParams__P(  // print(rest)
-    ((void (*)())print_Params),
-    rest
-  );
-}
-\end{cfacode}
-The @print_T@ routine is called indirectly through an adapter function with a copy constructed argument, followed by an indirect call to @print_Params@.
-
-A call to print
-\begin{cfacode}
-void print(const char * x) { printf("%s", x); }
-void print(int x) { printf("%d", x);  }
-
-print("x = ", 123, ".\n");
-\end{cfacode}
-generates the following
-\begin{cfacode}
-void print_string(const char *x){
-  int _tmp_cp_ret0;
-  (_tmp_cp_ret0=printf("%s", x)) , _tmp_cp_ret0;
-  *(int *)&_tmp_cp_ret0; // ^?{}
-}
-void print_int(int x){
-  int _tmp_cp_ret1;
-  (_tmp_cp_ret1=printf("%d", x)) , _tmp_cp_ret1;
-  *(int *)&_tmp_cp_ret1; // ^?{}
-}
-
-struct _tuple2_ {  // _tuple2_(T0, T1)
-  void *field_0;
-  void *field_1;
-};
-struct _conc__tuple2_0 {  // _tuple2_(int, const char *)
-  int field_0;
-  const char *field_1;
-};
-struct _conc__tuple2_0 _tmp_cp6;  // _tuple2_(int, const char *)
-const char *_thunk0(const char **_p0, const char *_p1){
-        // const char * ?=?(const char **, const char *)
-  return *_p0=_p1;
-}
-void _thunk1(const char **_p0){ // void ?{}(const char **)
-  *_p0; // ?{}
-}
-void _thunk2(const char **_p0, const char *_p1){
-        // void ?{}(const char **, const char *)
-  *_p0=_p1; // ?{}
-}
-void _thunk3(const char **_p0){ // void ^?{}(const char **)
-  *_p0; // ^?{}
-}
-void _thunk4(struct _conc__tuple2_0 _p0){
-        // void print([int, const char *])
-  struct _tuple1_ { // _tuple1_(T0)
-    void *field_0;
-  };
-  struct _conc__tuple1_1 { // _tuple1_(const char *)
-    const char *field_0;
-  };
-  void _thunk5(struct _conc__tuple1_1 _pp0){ // void print([const char *])
-    print_string(_pp0.field_0);  // print(rest.0)
-  }
-  void _adapter_i_pii_(
-    void (*_adaptee)(),
-    void *_ret,
-    void *_p0,
-    void *_p1
-  ){
-    *(int *)_ret=((int (*)(int *, int))_adaptee)(_p0, *(int *)_p1);
-  }
-  void _adapter_pii_(void (*_adaptee)(), void *_p0, void *_p1){
-    ((void (*)(int *, int ))_adaptee)(_p0, *(int *)_p1);
-  }
-  void _adapter_i_(void (*_adaptee)(), void *_p0){
-    ((void (*)(int))_adaptee)(*(int *)_p0);
-  }
-  void _adapter_tuple1_5_(void (*_adaptee)(), void *_p0){
-    ((void (*)(struct _conc__tuple1_1 ))_adaptee)(
-      *(struct _conc__tuple1_1 *)_p0
-    );
-  }
-  print_variadic(
-    _adapter_tuple1_5,
-    _adapter_i_,
-    _adapter_pii_,
-    _adapter_i_pii_,
-    sizeof(int),
-    __alignof__(int),
-    sizeof(struct _conc__tuple1_1),
-    __alignof__(struct _conc__tuple1_1),
-    (void *(*)(void *, void *))_assign_i,    // int ?=?(int *, int)
-    (void (*)(void *))_ctor_i,               // void ?{}(int *)
-    (void (*)(void *, void *))_ctor_ii,      // void ?{}(int *, int)
-    (void (*)(void *))_dtor_ii,              // void ^?{}(int *)
-    (void (*)(void *))print_int,             // void print(int)
-    (void (*)(void *))&_thunk5,              // void print([const char *])
-    &_p0.field_0,                            // rest.0
-    &(struct _conc__tuple1_1 ){ _p0.field_1 }// [rest.1]
-  );
-}
-struct _tuple1_ {  // _tuple1_(T0)
-  void *field_0;
-};
-struct _conc__tuple1_6 {  // _tuple_1(const char *)
-  const char *field_0;
-};
-const char *_temp0;
-_temp0="x = ";
-void _adapter_pstring_pstring_string(
-  void (*_adaptee)(),
-  void *_ret,
-  void *_p0,
-  void *_p1
-){
-  *(const char **)_ret=
-    ((const char *(*)(const char **, const char *))_adaptee)(
-      _p0,
-      *(const char **)_p1
-    );
-}
-void _adapter_pstring_string(void (*_adaptee)(), void *_p0, void *_p1){
-  ((void (*)(const char **, const char *))_adaptee)(
-    _p0,
-    *(const char **)_p1
-  );
-}
-void _adapter_string_(void (*_adaptee)(), void *_p0){
-  ((void (*)(const char *))_adaptee)(*(const char **)_p0);
-}
-void _adapter_tuple2_0_(void (*_adaptee)(), void *_p0){
-  ((void (*)(struct _conc__tuple2_0 ))_adaptee)(
-    *(struct _conc__tuple2_0 *)_p0
-  );
-}
-print_variadic(
-  _adapter_tuple2_0_,
-  _adapter_string_,
-  _adapter_pstring_string_,
-  _adapter_pstring_pstring_string_,
-  sizeof(const char *),
-  __alignof__(const char *),
-  sizeof(struct _conc__tuple2_0 ),
-  __alignof__(struct _conc__tuple2_0 ),
-  &_thunk0,     // const char * ?=?(const char **, const char *)
-  &_thunk1,     // void ?{}(const char **)
-  &_thunk2,     // void ?{}(const char **, const char *)
-  &_thunk3,     // void ^?{}(const char **)
-  print_string, // void print(const char *)
-  &_thunk4,     // void print([int, const char *])
-  &_temp0,                             // "x = "
-  (({  // copy construct tuple argument to print
-    int *__multassign_L0 = (int *)&_tmp_cp6.field_0;
-    const char **__multassign_L1 = (const char **)&_tmp_cp6.field_1;
-    int __multassign_R0 = 123;
-    const char *__multassign_R1 = ".\n";
-    ((*__multassign_L0=__multassign_R0 /* ?{} */),
-     (*__multassign_L1=__multassign_R1 /* ?{} */));
-  }), &_tmp_cp6)                        // [123, ".\n"]
-);
-({  // destroy argument temporary
-  int *__massassign_L0 = (int *)&_tmp_cp6.field_0;
-  const char **__massassign_L1 = (const char **)&_tmp_cp6.field_1;
-  ((*__massassign_L0 /* ^?{} */) , (*__massassign_L1 /* ^?{} */));
-});
-\end{cfacode}
-The type @_tuple2_@ is generated to allow passing the @rest@ argument to @print_variadic@.
-Thunks 0 through 3 provide wrappers for the @otype@ parameters for @const char *@, while @_thunk4@ translates a call to @print([int, const char *])@ into a call to @print_variadic(int, [const char *])@.
-This all builds to a call to @print_variadic@, with the appropriate copy construction of the tuple argument.
Index: doc/theses/rob_schluntz/.gitignore
===================================================================
--- doc/theses/rob_schluntz/.gitignore	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/.gitignore	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,17 @@
+# generated by latex
+*.aux
+*.bbl
+*.blg
+*.brf
+*.dvi
+*.idx
+*.ilg
+*.ind
+*.log
+*.out
+*.pdf
+*.ps
+*.toc
+*.lof
+*.lot
+*.synctex.gz
Index: doc/theses/rob_schluntz/Makefile
===================================================================
--- doc/theses/rob_schluntz/Makefile	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/Makefile	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,18 @@
+TeXLIB = .:../LaTeXmacros:../bibliography/:
+LaTeX  = TEXINPUTS=${TeXLIB} && export TEXINPUTS && pdflatex -halt-on-error
+BibTeX = BIBINPUTS=${TeXLIB} && export BIBINPUTS && bibtex
+
+all : thesis.pdf
+
+thesis.pdf : Makefile ../LaTeXmacros/common.tex cfa-format.tex thesis.tex intro.tex ctordtor.tex tuples.tex variadic.tex conclusions.tex
+	${LaTeX} thesis
+	${BibTeX} thesis
+	${LaTeX} thesis
+	${LaTeX} thesis
+	pdf2ps thesis.pdf thesis.ps
+
+clean :
+	rm -f *.aux *.bbl *.blg *.lof *.log *.lot *.out *.toc
+
+spotless : clean
+	rm -f thesis.pdf thesis.ps
Index: doc/theses/rob_schluntz/cfa-format.tex
===================================================================
--- doc/theses/rob_schluntz/cfa-format.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/cfa-format.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,227 @@
+% \usepackage{xcolor}
+% \usepackage{listings}
+% \usepackage{booktabs}
+% \usepackage{array}
+% \newcolumntype{?}{!{\vrule width 1pt}} % thick vertical line
+
+
+% like Mac Classic or iPlastic
+% \definecolor{basicCol}{HTML}{000000}
+% \definecolor{commentCol}{HTML}{0066FF}
+% \definecolor{stringCol}{HTML}{036A07}
+% \definecolor{keywordCol}{HTML}{0000FF}
+% \definecolor{identifierCol}{HTML}{318495}
+
+% like Visual Studio 2010
+% \definecolor{basicCol}{HTML}{000000}
+% \definecolor{commentCol}{HTML}{006400}
+% \definecolor{stringCol}{HTML}{A31515}
+% \definecolor{keywordCol}{HTML}{0000FF}
+% \definecolor{identifierCol}{HTML}{000000}
+
+\definecolor{basicCol}{HTML}{000000}
+\definecolor{commentCol}{HTML}{000000}
+\definecolor{stringCol}{HTML}{000000}
+\definecolor{keywordCol}{HTML}{000000}
+\definecolor{identifierCol}{HTML}{000000}
+
+% from https://gist.github.com/nikolajquorning/92bbbeef32e1dd80105c9bf2daceb89a
+\lstdefinelanguage{sml} {
+  morekeywords= {
+    EQUAL, GREATER, LESS, NONE, SOME, abstraction, abstype, and, andalso, array, as, before, bool, case, char, datatype, do, else, end, eqtype, exception, exn, false, fn, fun, functor, handle, if, in, include, infix, infixr, int, let, list, local, nil, nonfix, not, o, of, op, open, option, orelse, overload, print, raise, real, rec, ref, sharing, sig, signature, string, struct, structure, substring, then, true, type, unit, val, vector, where, while, with, withtype, word
+  },
+  morestring=[b]",
+  morecomment=[s]{(*}{*)},
+}
+
+\lstdefinelanguage{D}{
+  % Keywords
+  morekeywords=[1]{
+    abstract, alias, align, auto, body, break, cast, catch, class, const,
+    continue, debug, delegate, delete, deprecated, do, else, enum, export,
+    false, final, finally, for, foreach, foreach_reverse, function, goto, if,
+    immutable, import, in, inout, interface, invariant, is, lazy, macro, mixin,
+    module, new, nothrow, null, out, override, package, pragma, private,
+    protected, public, pure, ref, return, shared, static, struct, super,
+    switch, synchronized, template, this, throw, true, try, typedef, typeid,
+    typeof, union, unittest, volatile, while, with
+  },
+  % Special identifiers, common functions
+  morekeywords=[2]{enforce},
+  % Ugly identifiers
+  morekeywords=[3]{
+    __DATE__, __EOF__, __FILE__, __LINE__, __TIMESTAMP__, __TIME__, __VENDOR__,
+    __VERSION__, __ctfe, __gshared, __monitor, __thread, __vptr, _argptr,
+    _arguments, _ctor, _dtor
+  },
+  % Basic types
+  morekeywords=[4]{
+     byte, ubyte, short, ushort, int, uint, long, ulong, cent, ucent, void,
+     bool, bit, float, double, real, ushort, int, uint, long, ulong, float,
+     char, wchar, dchar, string, wstring, dstring, ireal, ifloat, idouble,
+     creal, cfloat, cdouble, size_t, ptrdiff_t, sizediff_t, equals_t, hash_t
+  },
+  % Strings
+  morestring=[b]{"},
+  morestring=[b]{'},
+  morestring=[b]{`},
+  % Comments
+  comment=[l]{//},
+  morecomment=[s]{/*}{*/},
+  morecomment=[s][\color{blue}]{/**}{*/},
+  morecomment=[n]{/+}{+/},
+  morecomment=[n][\color{blue}]{/++}{+/},
+  % Options
+  sensitive=true
+}
+
+\lstdefinelanguage{rust}{
+  % Keywords
+  morekeywords=[1]{
+    abstract, alignof, as, become, box,
+    break, const, continue, crate, do,
+    else, enum, extern, false, final,
+    fn, for, if, impl, in,
+    let, loop, macro, match, mod,
+    move, mut, offsetof, override, priv,
+    proc, pub, pure, ref, return,
+    Self, self, sizeof, static, struct,
+    super, trait, true,  type, typeof,
+    unsafe, unsized, use, virtual, where,
+    while, yield
+  },
+  % Strings
+  morestring=[b]{"},
+  % Comments
+  comment=[l]{//},
+  morecomment=[s]{/*}{*/},
+  % Options
+  sensitive=true
+}
+
+\newcommand{\KWC}{K-W C\xspace}
+
+\renewcommand{\ttdefault}{pcr}
+
+\newcommand{\basicstylesmall}{\scriptsize\ttfamily\color{basicCol}}
+
+\lstdefinestyle{defaultStyle}{
+  escapeinside={@@},
+  basicstyle=\footnotesize\ttfamily\color{basicCol},
+  keywordstyle=\bfseries\color{keywordCol},
+  commentstyle=\itshape\color{commentCol},
+  identifierstyle=\color{identifierCol},
+  stringstyle=\color{stringCol},
+  mathescape=true,
+  columns=fixed,
+  aboveskip=4pt,                                  % spacing above/below code block
+  belowskip=3pt,
+  keepspaces=true,
+  frame=lines,
+  literate=,
+  showlines=true,                                 % show blank lines at end of code
+  showspaces=false,
+  showstringspaces=false,
+  escapechar=\$,
+  xleftmargin=\parindentlnth,                     % indent code to paragraph indentation
+  moredelim=[is][\color{red}\bfseries]{**R**}{**R**},    % red highlighting
+  % moredelim=* detects keywords, comments, strings, and other delimiters and applies their formatting
+  % moredelim=** allows cumulative application
+}
+\lstset{
+  language = CFA,
+  style=defaultStyle
+}
+\lstMakeShortInline[basewidth=0.5em,breaklines=true,breakatwhitespace,basicstyle=\normalsize\ttfamily\color{basicCol}]@  % single-character for \lstinline
+
+\lstnewenvironment{cfacode}[1][]{
+  \lstset{
+    language = CFA,
+    style=defaultStyle,
+    #1
+    % belowcaptionskip=1\baselineskip,
+    % breaklines=true,
+    % frame=L,
+  }
+}{}
+
+\lstnewenvironment{cppcode}[1][]{
+  \lstset{
+    language = c++,
+    style=defaultStyle,
+    #1
+  }
+}{}
+
+\lstnewenvironment{javacode}[1][]{
+  \lstset{
+    language = java,
+    style=defaultStyle,
+    #1
+  }
+}{}
+
+\lstnewenvironment{scalacode}[1][]{
+  \lstset{
+    language = scala,
+    style=defaultStyle,
+    #1
+  }
+}{}
+
+\lstnewenvironment{smlcode}[1][]{
+  \lstset{
+    language = sml,
+    style=defaultStyle,
+    #1
+  }
+}{}
+
+\lstnewenvironment{dcode}[1][]{
+  \lstset{
+    language = D,
+    style=defaultStyle,
+    #1
+  }
+}{}
+
+\lstnewenvironment{rustcode}[1][]{
+  \lstset{
+    language = rust,
+    style=defaultStyle,
+    #1
+  }
+}{}
+
+\newcommand{\zero}{\lstinline{zero_t}\xspace}
+\newcommand{\one}{\lstinline{one_t}\xspace}
+\newcommand{\ateq}{\lstinline{\@=}\xspace}
+
+\newenvironment{newtext}{\color{red}}{\ignorespacesafterend}
+
+% \lstset{ %
+%   backgroundcolor=\color{white},
+%   basicstyle=\footnotesize,
+%   breakatwhitespace=false,
+%   breaklines=true,
+%   captionpos=b,
+%   commentstyle=\color{mygreen},
+%   escapeinside={\%*}{*)},
+%   extendedchars=true,
+%   frame=single,
+%   keywordstyle=\color{blue},
+%   language=Prolog,
+%   numbers=left,
+%   numbersep=5pt,
+%   numberstyle=\tiny\color{mygray},
+%   rulecolor=\color{black},
+%   showspaces=false,
+%   showstringspaces=false,
+%   showtabs=false,
+%   stepnumber=2,
+%   stringstyle=\color{mymauve},
+%   tabsize=2,
+%   title=\lstname,
+%   morekeywords={not,\},\{,preconditions,effects },
+%   deletekeywords={time}
+% }
Index: doc/theses/rob_schluntz/conclusions.tex
===================================================================
--- doc/theses/rob_schluntz/conclusions.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/conclusions.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,285 @@
+%======================================================================
+\chapter{Conclusions}
+%======================================================================
+
+Adding resource management and tuples to \CFA has been a challenging design, engineering, and implementation exercise.
+On the surface, the work may appear as a rehash of similar mechanisms in \CC.
+However, every added feature is different than its \CC counterpart, often with extended functionality, better integration with C and its programmers, and always supports separate compilation.
+All of these new features are being used extensively by the \CFA development-team to build the \CFA runtime system.
+In particular, the concurrency system is built on top of RAII, library functions @new@ and @delete@ are used to manage dynamically allocated objects, and tuples are used to provide uniform interfaces to C library routines such as @div@ and @remquo@.
+
+\section{Constructors and Destructors}
+\CFA supports the RAII idiom using constructors and destructors.
+There are many engineering challenges in introducing constructors and destructors, partially since \CFA is not an object-oriented language.
+By making use of managed types, \CFA programmers are afforded an extra layer of safety and ease of use in comparison to C programmers.
+While constructors and destructors provide a sensible default behaviour, \CFA allows experienced programmers to declare unmanaged objects to take control of object management for performance reasons.
+Constructors and destructors as named functions fit the \CFA polymorphism model perfectly, allowing polymorphic code to use managed types seamlessly.
+
+\section{Tuples}
+\CFA can express functions with multiple return values in a way that is simple, concise, and safe.
+The addition of multiple-return-value functions naturally requires a way to use multiple return values, which begets tuple types.
+Tuples provide two useful notions of assignment: multiple assignment, allowing simple, yet expressive assignment between multiple variables, and mass assignment, allowing a lossless assignment of a single value across multiple variables.
+Tuples have a flexible structure that allows the \CFA type-system to decide how to restructure tuples, making it syntactically simple to pass tuples between functions.
+Tuple types can be combined with polymorphism and tuple conversions can apply during assertion inference to produce a cohesive feel.
+
+\section{Variadic Functions}
+Type-safe variadic functions, with a similar feel to variadic templates, are added to \CFA.
+The new variadic functions can express complicated recursive algorithms.
+Unlike variadic templates, it is possible to write @new@ as a library routine and to separately compile @ttype@ polymorphic functions.
+Variadic functions are statically type checked and provide a user experience that is consistent with that of tuples and polymorphic functions.
+
+\section{Future Work}
+\subsection{Constructors and Destructors}
+Both \CC and Rust support move semantics, which expand the user's control of memory management by providing the ability to transfer ownership of large data, rather than forcing potentially expensive copy semantics.
+\CFA currently does not support move semantics, partially due to the complexity of the model.
+The design space is currently being explored with the goal of finding an alternative to move semantics that provides necessary performance benefits, while reducing the amount of repetition required to create a new type, along with the cognitive burden placed on the user.
+
+% One technique being evaluated is whether named return-values can be used to eliminate unnecessary temporaries \cite{Buhr94a}.
+% For example,
+% \begin{cfacode}
+% struct A { ... };
+% [A x] f(A x);
+% [A y] g(A y);
+% [A z] h(A z);
+
+% struct A a1, a2;
+% a2 = h(g(f(a1)));
+% \end{cfacode}
+% Here, since both @f@'s argument and return value have the same name and type, the compiler can infer that @f@ returns its argument.
+% With this knowledge, the compiler can reuse the storage for the argument to @f@ as the argument to @g@.  % TODO: cite Till thesis?
+
+Exception handling is among the features expected to be added to \CFA in the near future.
+For exception handling to properly interact with the rest of the language, it must ensure all RAII guarantees continue to be met.
+That is, when an exception is raised, it must properly unwind the stack by calling the destructors for any objects that live between the raise and the handler.
+This can be accomplished either by augmenting the translator to properly emit code that executes the destructors, or by switching destructors to hook into the GCC @cleanup@ attribute \cite[6.32.1]{GCCExtensions}.
+
+The @cleanup@ attribute, which is attached to a variable declaration, takes a function name as an argument and schedules that routine to be executed when the variable goes out of scope.
+\begin{cfacode}
+struct S { int x; };
+void __dtor_S(struct S *);
+{
+  __attribute__((cleanup(__dtor_S))) struct S s;
+} // calls __dtor_S(&s)
+\end{cfacode}
+This mechanism is known and understood by GCC, so that the destructor is properly called in any situation where a variable goes out of scope, including function returns, branches, and built-in GCC exception handling mechanisms using libunwind.
+
+A caveat of this approach is that the @cleanup@ attribute only permits a function that consumes a single argument of type @T *@ for a variable of type @T@.
+This restriction means that any destructor that consumes multiple arguments (\eg, because it is polymorphic) or any destructor that is a function pointer (\eg, because it is an assertion parameter) must be called through a local thunk.
+For example,
+\begin{cfacode}
+forall(otype T)
+struct Box {
+  T x;
+};
+forall(otype T) void ^?{}(Box(T) * x); // has implicit parameters
+
+forall(otype T)
+void f(T x) {
+  T y = x;  // destructor is a function-pointer parameter
+  Box(T) z = { x }; // destructor has multiple parameters
+}
+\end{cfacode}
+currently generates the following
+\begin{cfacode}
+void _dtor_BoxT(  // consumes more than 1 parameter due to assertions
+  void (*_adapter_PTT)(void (*)(), void *, void *),
+  void (*_adapter_T_PTT)(void (*)(), void *, void *, void *),
+  long unsigned int _sizeof_T,
+  long unsigned int _alignof_T,
+  void *(*_assign_T_PTT)(void *, void *),
+  void (*_ctor_PT)(void *),
+  void (*_ctor_PTT)(void *, void *),
+  void (*_dtor_PT)(void *),
+  void *x
+);
+
+void f(
+  void (*_adapter_PTT)(void (*)(), void *, void *),
+  void (*_adapter_T_PTT)(void (*)(), void *, void *, void *),
+  long unsigned int _sizeof_T,
+  long unsigned int _alignof_T,
+  void *(*_assign_TT)(void *, void *),
+  void (*_ctor_T)(void *),
+  void (*_ctor_TT)(void *, void *),
+  void (*_dtor_T)(void *),
+  void *x
+){
+  void *y = __builtin_alloca(_sizeof_T);
+  // constructor call elided
+
+  // generic layout computation elided
+  long unsigned int _sizeof_BoxT = ...;
+  void *z = __builtin_alloca(_sizeof_BoxT);
+  // constructor call elided
+
+  _dtor_BoxT(  // ^?{}(&z); -- _dtor_BoxT has > 1 arguments
+    _adapter_PTT,
+    _adapter_T_PTT,
+    _sizeof_T,
+    _alignof_T,
+    _assign_TT,
+    _ctor_T,
+    _ctor_TT,
+    _dtor_T,
+    z
+  );
+  _dtor_T(y);  // ^?{}(&y); -- _dtor_T is a function pointer
+}
+\end{cfacode}
+Further to this point, every distinct array type will require a thunk for its destructor, where array destructor code is currently inlined, since array destructors hard code the length of the array.
+
+For function call temporaries, new scopes have to be added for destructor ordering to remain consistent.
+In particular, the translator currently destroys argument and return value temporary objects as soon as the statement they were created for ends.
+In order for this behaviour to be maintained, new scopes have to be added around every statement that contains a function call.
+Since a nested expression can raise an exception, care must be taken when destroying temporary objects.
+One way to achieve this is to split statements at every function call, to provide the correct scoping to destroy objects as necessary.
+For example,
+\begin{cfacode}
+struct S { ... };
+void ?{}(S *, S);
+void ^?{}(S *);
+
+S f();
+S g(S);
+
+g(f());
+\end{cfacode}
+would generate
+\begin{cfacode}
+struct S { ... };
+void _ctor_S(struct S *, struct S);
+void _dtor_S(struct S *);
+
+{
+  __attribute__((cleanup(_dtor_S))) struct S _tmp1 = f();
+  __attribute__((cleanup(_dtor_S))) struct S _tmp2 =
+    (_ctor_S(&_tmp2, _tmp1), _tmp2);
+  __attribute__((cleanup(_dtor_S))) struct S _tmp3 = g(_tmp2);
+} // destroy _tmp3, _tmp2, _tmp1
+\end{cfacode}
+Note that destructors must be registered after the temporary is fully initialized, since it is possible for initialization expressions to raise exceptions, and a destructor should never be called on an uninitialized object.
+This requires a slightly strange looking initializer for constructor calls, where a comma expression is used to produce the value of the object being initialized, after the constructor call, conceptually bitwise copying the initialized data into itself.
+Since this copy is wholly unnecessary, it is easily optimized away.
+
+A second approach is to attach an accompanying boolean to every temporary that records whether the object contains valid data, and thus whether the value should be destructed.
+\begin{cfacode}
+struct S { ... };
+void _ctor_S(struct S *, struct S);
+void _dtor_S(struct S *);
+
+struct _tmp_bundle_S {
+  bool valid;
+  struct S value;
+};
+
+void _dtor_tmpS(struct _tmp_bundle_S * ret) {
+  if (ret->valid) {
+    _dtor_S(&ret->value);
+  }
+}
+
+{
+  __attribute__((cleanup(_dtor_tmpS))) struct _tmp_bundle_S _tmp1 = { 0 };
+  __attribute__((cleanup(_dtor_tmpS))) struct _tmp_bundle_S _tmp2 = { 0 };
+  __attribute__((cleanup(_dtor_tmpS))) struct _tmp_bundle_S _tmp3 = { 0 };
+  _tmp2.value = g(
+    (_ctor_S(
+      &_tmp2.value,
+      (_tmp1.value = f(), _tmp1.valid = 1, _tmp1.value)
+    ), _tmp2.valid = 1, _tmp2.value)
+  ), _tmp3.valid = 1, _tmp3.value;
+} // destroy _tmp3, _tmp2, _tmp1
+\end{cfacode}
+In particular, the boolean is set immediately after argument construction and immediately after return value copy.
+The boolean is checked as a part of the @cleanup@ routine, forwarding to the object's destructor if the object is valid.
+One such type and @cleanup@ routine needs to be generated for every type used in a function parameter or return value.
+
+The former approach generates much simpler code, however splitting expressions requires care to ensure that expression evaluation order does not change.
+Expression ordering has to be performed by a full compiler, so it is possible that the latter approach would be more suited to the \CFA prototype, whereas the former approach is clearly the better option in a full compiler.
+More investigation is needed to determine whether the translator's current design can easily handle proper expression ordering.
+
+As discussed in Section \ref{s:implicit_copy_construction}, return values are destructed with a different @this@ pointer than they are constructed with.
+This problem can be easily fixed once a full \CFA compiler is built, since it would have full control over the call/return mechanism.
+In particular, since the callee is aware of where it needs to place the return value, it can construct the return value directly, rather than bitwise copy the internal data.
+
+Currently, the special functions are always auto-generated, except for generic types where the type parameter does not have assertions for the corresponding operation.
+For example,
+\begin{cfacode}
+forall(dtype T | sized(T) | { void ?{}(T *); })
+struct S { T x; };
+\end{cfacode}
+only auto-generates the default constructor for @S@, since the member @x@ is missing the other 3 special functions.
+Once deleted functions have been added, function generation can make use of this information to disable generation of special functions when a member has a deleted function.
+For example,
+\begin{cfacode}
+struct A {};
+void ?{}(A *) = delete;
+struct S { A x; };  // does not generate void ?{}(S *);
+\end{cfacode}
+
+Unmanaged objects and their interactions with the managed \CFA environment are an open problem that deserves greater attention.
+In particular, the interactions between unmanaged objects and copy semantics are subtle and can easily lead to errors.
+It is possible that the compiler should mark some of these situations as errors by default, and possibly conditionally emit warnings for some situations.
+Another possibility is to construct, destruct, and assign unmanaged objects using the intrinsic and auto-generated functions.
+A more thorough examination of the design space for this problem is required.
+
+Currently, the \CFA translator does not support any warnings.
+Ideally, the translator should support optional warnings in the case where it can detect that an object has been constructed twice.
+For example, forwarding constructor calls are guaranteed to initialize the entire object, so redundant constructor calls can cause problems such as memory leaks, while looking innocuous to a novice user.
+\begin{cfacode}
+struct B { ... };
+struct A {
+  B x, y, z;
+};
+void ?{}(A * a, B x) {
+  // y, z implicitly default constructed
+  (&a->x){ ... }; // explicitly construct x
+} // constructs an entire A
+void ?{}(A * a) {
+  (&a->y){}; // initialize y
+  a{ (B){ ... } }; // forwarding constructor call
+                   // initializes entire object, including y
+}
+\end{cfacode}
+
+Finally, while constructors provide a mechanism for establishing invariants, there is currently no mechanism for maintaining invariants without resorting to opaque types.
+That is, structure fields can be accessed and modified by any block of code without restriction, so while it is possible to ensure that an object is initially set to a valid state, it is not possible to ensure that it remains in a consistent state throughout its lifetime.
+A popular technique for ensuring consistency in object-oriented programming languages is to provide access modifiers such as @private@, which provides compile-time checks that only privileged code accesses private data.
+This approach could be added to \CFA, but it requires an idiomatic way of specifying what code is privileged and what data is protected.
+One possibility is to tie access control into an eventual module system.
+
+\begin{sloppypar}
+The current implementation of implicit subobject-construction is currently an all-or-nothing check.
+That is, if a subobject is conditionally constructed, \eg within an if-statement, no implicit constructors for that object are added.
+\begin{cfacode}
+struct A { ... };
+void ?{}(A * a) { ... }
+
+struct B {
+  A a;
+};
+void ?{}(B * b) {
+  if (...) {
+    (&b->a){};  // explicitly constructed
+  } // does not construct in else case
+}
+\end{cfacode}
+This behaviour is unsafe and breaks the guarantee that constructors fully initialize objects.
+This situation should be properly handled, either by examining all paths and inserting implicit constructor calls only in the paths missing construction, or by emitting an error or warning.
+\end{sloppypar}
+
+\subsection{Tuples}
+Named result values are planned, but not yet implemented.
+This feature ties nicely into named tuples, as seen in D and Swift.
+
+Currently, tuple flattening and structuring conversions are 0-cost conversions in the resolution algorithm.
+This makes tuples conceptually very simple to work with, but easily causes unnecessary ambiguity in situations where the type system should be able to differentiate between alternatives.
+Adding an appropriate cost function to tuple conversions will allow tuples to interact with the rest of the programming language more cohesively.
+
+\subsection{Variadic Functions}
+Use of @ttype@ functions currently relies heavily on recursion.
+\CC has opened variadic templates up so that recursion is not strictly necessary in some cases, and it would be interesting to see if any such cases can be applied to \CFA.
+
+\CC supports variadic templated data-types, making it possible to express arbitrary length tuples, arbitrary parameter function objects, and more with generic types.
+Currently, \CFA does not support @ttype@-parameter generic-types, though there does not appear to be a technical reason that it cannot.
+Notably, opening up support for this makes it possible to implement the exit form of scope guard (see section \ref{s:ResMgmt}), making it possible to call arbitrary functions at scope exit in idiomatic \CFA.
Index: doc/theses/rob_schluntz/ctordtor.tex
===================================================================
--- doc/theses/rob_schluntz/ctordtor.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/ctordtor.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,1259 @@
+%======================================================================
+\chapter{Constructors and Destructors}
+%======================================================================
+
+% TODO now: as an experiment, implement Andrei Alexandrescu's ScopeGuard http://www.drdobbs.com/cpp/generic-change-the-way-you-write-excepti/184403758?pgno=2
+% doesn't seem possible to do this without allowing ttype on generic structs?
+
+Since \CFA is a true systems language, it does not require a garbage collector.
+As well, \CFA is not an object-oriented programming language, \ie, structures cannot have methods.
+While structures can have function pointer members, this is different from methods, since methods have implicit access to structure members and methods cannot be reassigned.
+Nevertheless, one important goal is to reduce programming complexity and increase safety.
+To that end, \CFA provides support for implicit pre/post-execution of routines for objects, via constructors and destructors.
+
+This chapter details the design of constructors and destructors in \CFA, along with their current implementation in the translator.
+Generated code samples have been edited for clarity and brevity.
+
+\section{Design Criteria}
+\label{s:Design}
+In designing constructors and destructors for \CFA, the primary goals were ease of use and maintaining backwards compatibility.
+
+In C, when a variable is defined, its value is initially undefined unless it is explicitly initialized or allocated in the static area.
+\begin{cfacode}
+int main() {
+  int x;        // uninitialized
+  int y = 5;    // initialized to 5
+  x = y;        // assigned 5
+  static int z; // initialized to 0
+}
+\end{cfacode}
+In the example above, @x@ is defined and left uninitialized, while @y@ is defined and initialized to 5.
+Next, @x@ is assigned the value of @y@.
+In the last line, @z@ is implicitly initialized to 0 since it is marked @static@.
+The key difference between assignment and initialization being that assignment occurs on a live object (\ie, an object that contains data).
+It is important to note that this means @x@ could have been used uninitialized prior to being assigned, while @y@ could not be used uninitialized.
+Use of uninitialized variables yields undefined behaviour \cite[p.~558]{C11}, which is a common source of errors in C programs.
+
+Initialization of a declaration is strictly optional, permitting uninitialized variables to exist.
+Furthermore, declaration initialization is limited to expressions, so there is no way to insert arbitrary code before a variable is live, without delaying the declaration.
+Many C compilers give good warnings for uninitialized variables most of the time, but they cannot in all cases.
+\begin{cfacode}
+int f(int *);  // output parameter: never reads, only writes
+int g(int *);  // input parameter: never writes, only reads,
+               // so requires initialized variable
+
+int x, y;
+f(&x);  // okay - only writes to x
+g(&y);  // uses y uninitialized
+\end{cfacode}
+Other languages are able to give errors in the case of uninitialized variable use, but due to backwards compatibility concerns, this is not the case in \CFA.
+
+In C, constructors and destructors are often mimicked by providing routines that create and tear down objects, where the tear down function is typically only necessary if the type modifies the execution environment.
+\begin{cfacode}
+struct array_int {
+  int * x;
+};
+struct array_int create_array(int sz) {
+  return (struct array_int) { calloc(sizeof(int)*sz) };
+}
+void destroy_rh(struct resource_holder * rh) {
+  free(rh->x);
+}
+\end{cfacode}
+This idiom does not provide any guarantees unless the structure is opaque, which then requires that all objects are heap allocated.
+\begin{cfacode}
+struct opqaue_array_int;
+struct opqaue_array_int * create_opqaue_array(int sz);
+void destroy_opaque_array(opaque_array_int *);
+int opaque_get(opaque_array_int *);  // subscript
+
+opaque_array_int * x = create_opaque_array(10);
+int x2 = opaque_get(x, 2);
+\end{cfacode}
+This pattern is cumbersome to use since every access becomes a function call, requiring awkward syntax and a performance cost.
+While useful in some situations, this compromise is too restrictive.
+Furthermore, even with this idiom it is easy to make mistakes, such as forgetting to destroy an object or destroying it multiple times.
+
+A constructor provides a way of ensuring that the necessary aspects of object initialization is performed, from setting up invariants to providing compile- and run-time checks for appropriate initialization parameters.
+This goal is achieved through a \emph{guarantee} that a constructor is called \emph{implicitly} after every object is allocated from a type with associated constructors, as part of an object's \emph{definition}.
+Since a constructor is called on every object of a managed type, it is \emph{impossible} to forget to initialize such objects, as long as all constructors perform some sensible form of initialization.
+
+In \CFA, a constructor is a function with the name @?{}@.
+Like other operators in \CFA, the name represents the syntax used to call the constructor, \eg, @struct S = { ... };@.
+Every constructor must have a return type of @void@ and at least one parameter, the first of which is colloquially referred to as the \emph{this} parameter, as in many object-oriented programming-languages (however, a programmer can give it an arbitrary name).
+The @this@ parameter must have a pointer type, whose base type is the type of object that the function constructs.
+There is precedence for enforcing the first parameter to be the @this@ parameter in other operators, such as the assignment operator, where in both cases, the left-hand side of the equals is the first parameter.
+There is currently a proposal to add reference types to \CFA.
+Once this proposal has been implemented, the @this@ parameter will become a reference type with the same restrictions.
+
+Consider the definition of a simple type encapsulating a dynamic array of @int@s.
+
+\begin{cfacode}
+struct Array {
+  int * data;
+  int len;
+}
+\end{cfacode}
+
+In C, if the user creates an @Array@ object, the fields @data@ and @len@ are uninitialized, unless an explicit initializer list is present.
+It is the user's responsibility to remember to initialize both of the fields to sensible values, since there are no implicit checks for invalid values or reasonable defaults.
+In \CFA, the user can define a constructor to handle initialization of @Array@ objects.
+
+\begin{cfacode}
+void ?{}(Array * arr){
+  arr->len = 10;    // default size
+  arr->data = malloc(sizeof(int)*arr->len);
+  for (int i = 0; i < arr->len; ++i) {
+    arr->data[i] = 0;
+  }
+}
+Array x;  // allocates storage for Array and calls ?{}(&x)
+\end{cfacode}
+
+This constructor initializes @x@ so that its @length@ field has the value 10, and its @data@ field holds a pointer to a block of memory large enough to hold 10 @int@s, and sets the value of each element of the array to 0.
+This particular form of constructor is called the \emph{default constructor}, because it is called on an object defined without an initializer.
+In other words, a default constructor is a constructor that takes a single argument: the @this@ parameter.
+
+In \CFA, a destructor is a function much like a constructor, except that its name is \lstinline!^?{}! \footnote{Originally, the name @~?{}@ was chosen for destructors, to provide familiarity to \CC programmers. Unforunately, this name causes parsing conflicts with the bitwise-not operator when used with operator syntax (see section \ref{sub:syntax}.)} and it takes only one argument.
+A destructor for the @Array@ type can be defined as:
+\begin{cfacode}
+void ^?{}(Array * arr) {
+  free(arr->data);
+}
+\end{cfacode}
+The destructor is automatically called at deallocation for all objects of type @Array@.
+Hence, the memory associated with an @Array@ is automatically freed when the object's lifetime ends.
+The exact guarantees made by \CFA with respect to the calling of destructors are discussed in section \ref{sub:implicit_dtor}.
+
+As discussed previously, the distinction between initialization and assignment is important.
+Consider the following example.
+\begin{cfacode}[numbers=left]
+Array x, y;
+Array z = x;  // initialization
+y = x;        // assignment
+\end{cfacode}
+By the previous definition of the default constructor for @Array@, @x@ and @y@ are initialized to valid arrays of length 10 after their respective definitions.
+On line 2, @z@ is initialized with the value of @x@, while on line 3, @y@ is assigned the value of @x@.
+The key distinction between initialization and assignment is that a value to be initialized does not hold any meaningful values, whereas an object to be assigned might.
+In particular, these cases cannot be handled the same way because in the former case @z@ has no array, while @y@ does.
+A \emph{copy constructor} is used to perform initialization using another object of the same type.
+
+\begin{cfacode}[emph={other}, emphstyle=\color{red}]
+void ?{}(Array * arr, Array other) {  // copy constructor
+  arr->len = other.len;               // initialization
+  arr->data = malloc(sizeof(int)*arr->len)
+  for (int i = 0; i < arr->len; ++i) {
+    arr->data[i] = other.data[i];     // copy from other object
+  }
+}
+Array ?=?(Array * arr, Array other) { // assignment
+  ^?{}(arr);                          // explicitly call destructor
+  ?{}(arr, other);                    // explicitly call constructor
+  return *arr;
+}
+\end{cfacode}
+The two functions above handle the cases of initialization and assignment.
+The first function is called a copy constructor, because it constructs its argument by copying the values from another object of the same type.
+The second function is the standard copy-assignment operator.
+\CFA does not currently have the concept of reference types, so the most appropriate type for the source object in copy constructors and assignment operators is a value type.
+Appropriate care is taken in the implementation to avoid recursive calls to the copy constructor.
+The four functions (default constructor, destructor, copy constructor, and assignment operator) are special in that they safely control the state of most objects.
+
+It is possible to define a constructor that takes any combination of parameters to provide additional initialization options.
+For example, a reasonable extension to the array type would be a constructor that allocates the array to a given initial capacity and initializes the elements of the array to a given @fill@ value.
+\begin{cfacode}
+void ?{}(Array * arr, int capacity, int fill) {
+  arr->len = capacity;
+  arr->data = malloc(sizeof(int)*arr->len);
+  for (int i = 0; i < arr->len; ++i) {
+    arr->data[i] = fill;
+  }
+}
+\end{cfacode}
+
+In \CFA, constructors are called implicitly in initialization contexts.
+\begin{cfacode}
+Array x, y = { 20, 0xdeadbeef }, z = y;
+\end{cfacode}
+Constructor calls look just like C initializers, which allows them to be inserted into legacy C code with minimal code changes, and also provides a very simple syntax that veteran C programmers are familiar with.
+One downside of reusing C initialization syntax is that it is not possible to determine whether an object is constructed just by looking at its declaration, since that requires knowledge of whether the type is managed at that point in the program.
+
+This example generates the following code
+\begin{cfacode}
+Array x;
+?{}(&x);                  // implicit default construct
+Array y;
+?{}(&y, 20, 0xdeadbeef);  // explicit fill construct
+Array z;
+?{}(&z, y);               // copy construct
+^?{}(&z);                 // implicit destruct
+^?{}(&y);                 // implicit destruct
+^?{}(&x);                 // implicit destruct
+\end{cfacode}
+Due to the way that constructor calls are interleaved, it is impossible for @y@ to be referenced before it is initialized, except in its own constructor.
+This loophole is minor and exists in \CC as well.
+Destructors are implicitly called in reverse declaration-order so that objects with dependencies are destructed before the objects they are dependent on.
+
+\subsection{Calling Syntax}
+\label{sub:syntax}
+There are several ways to construct an object in \CFA.
+As previously introduced, every variable is automatically constructed at its definition, which is the most natural way to construct an object.
+\begin{cfacode}
+struct A { ... };
+void ?{}(A *);
+void ?{}(A *, A);
+void ?{}(A *, int, int);
+
+A a1;             // default constructed
+A a2 = { 0, 0 };  // constructed with 2 ints
+A a3 = a1;        // copy constructed
+// implicitly destruct a3, a2, a1, in that order
+\end{cfacode}
+Since constructors and destructors are just functions, the second way is to call the function directly.
+\begin{cfacode}
+struct A { int a; };
+void ?{}(A *);
+void ?{}(A *, A);
+void ^?{}(A *);
+
+A x;               // implicitly default constructed: ?{}(&x)
+A * y = malloc();  // copy construct: ?{}(&y, malloc())
+
+^?{}(&x);   // explicit destroy x, in different order
+?{}(&x);    // explicit construct x, second construction
+^?{}(y);    // explicit destroy y
+?{}(y, x);  // explit construct y from x, second construction
+
+// implicit ^?{}(&y);
+// implicit ^?{}(&x);
+\end{cfacode}
+Calling a constructor or destructor directly is a flexible feature that allows complete control over the management of storage.
+In particular, constructors double as a placement syntax.
+\begin{cfacode}
+struct A { ... };
+struct memory_pool { ... };
+void ?{}(memory_pool *, size_t);
+
+memory_pool pool = { 1024 };  // create an arena of size 1024
+
+A * a = allocate(&pool);      // allocate from memory pool
+?{}(a);                       // construct an A in place
+
+for (int i = 0; i < 10; i++) {
+  // reuse storage rather than reallocating
+  ^?{}(a);
+  ?{}(a);
+  // use a ...
+}
+^?{}(a);
+deallocate(&pool, a);         // return to memory pool
+\end{cfacode}
+Finally, constructors and destructors support \emph{operator syntax}.
+Like other operators in \CFA, the function name mirrors the use-case, in that the question marks are placeholders for the first $N$ arguments.
+This syntactic form is similar to the new initialization syntax in \CCeleven, except that it is used in expression contexts, rather than declaration contexts.
+\begin{cfacode}
+struct A { ... };
+struct B { A a; };
+
+A x, y, * z = &x;
+(&x){}          // default construct
+(&x){ y }       // copy construct
+(&x){ 1, 2, 3 } // construct with 3 arguments
+z{ y };         // copy construct x through a pointer
+^(&x){}         // destruct
+
+void ?{}(B * b) {
+  (&b->a){ 11, 17, 13 };  // construct a member
+}
+\end{cfacode}
+Constructor operator syntax has relatively high precedence, requiring parentheses around an address-of expression.
+Destructor operator syntax is actually an statement, and requires parentheses for symmetry with constructor syntax.
+
+One of these three syntactic forms should appeal to either C or \CC programmers using \CFA.
+
+\subsection{Constructor Expressions}
+In \CFA, it is possible to use a constructor as an expression.
+Like other operators, the function name @?{}@ matches its operator syntax.
+For example, @(&x){}@ calls the default constructor on the variable @x@, and produces @&x@ as a result.
+A key example for this capability is the use of constructor expressions to initialize the result of a call to @malloc@.
+\begin{cfacode}
+struct X { ... };
+void ?{}(X *, double);
+X * x = malloc(){ 1.5 };
+\end{cfacode}
+In this example, @malloc@ dynamically allocates storage and initializes it using a constructor, all before assigning it into the variable @x@.
+Intuitively, the expression-resolver determines that @malloc@ returns some type @T *@, as does the constructor expression since it returns the type of its argument.
+This type flows outwards to the declaration site where the expected type is known to be @X *@, thus the first argument to the constructor must be @X *@, narrowing the search space.
+
+If this extension is not present, constructing dynamically allocated objects is much more cumbersome, requiring separate initialization of the pointer and initialization of the pointed-to memory.
+\begin{cfacode}
+X * x = malloc();
+x{ 1.5 };
+\end{cfacode}
+Not only is this verbose, but it is also more error prone, since this form allows maintenance code to easily sneak in between the initialization of @x@ and the initialization of the memory that @x@ points to.
+This feature is implemented via a transformation producing the value of the first argument of the constructor, since constructors do not themselves have a return value.
+Since this transformation results in two instances of the subexpression, care is taken to allocate a temporary variable to hold the result of the subexpression in the case where the subexpression may contain side effects.
+The previous example generates the following code.
+\begin{cfacode}
+struct X *_tmp_ctor;
+struct X *x = ?{}(  // construct result of malloc
+  _tmp_ctor=malloc_T( // store result of malloc
+    sizeof(struct X),
+    _Alignof(struct X)
+  ),
+  1.5
+), _tmp_ctor; // produce constructed result of malloc
+\end{cfacode}
+It should be noted that this technique is not exclusive to @malloc@, and allows a user to write a custom allocator that can be idiomatically used in much the same way as a constructed @malloc@ call.
+
+While it is possible to use operator syntax with destructors, destructors invalidate their argument, thus operator syntax with destructors is void-typed expression.
+
+\subsection{Function Generation}
+In \CFA, every type is defined to have the core set of four special functions described previously.
+Having these functions exist for every type greatly simplifies the semantics of the language, since most operations can simply be defined directly in terms of function calls.
+In addition to simplifying the definition of the language, it also simplifies the analysis that the translator must perform.
+If the translator can expect these functions to exist, then it can unconditionally attempt to resolve them.
+Moreover, the existence of a standard interface allows polymorphic code to interoperate with new types seamlessly.
+While automatic generation of assignment functions is present in previous versions of \CFA, the the implementation has been largely rewritten to accomodate constructors and destructors.
+
+To mimic the behaviour of standard C, the default constructor and destructor for all of the basic types and for all pointer types are defined to do nothing, while the copy constructor and assignment operator perform a bitwise copy of the source parameter (as in \CC).
+This default is intended to maintain backwards compatibility and performance, by not imposing unexpected operations for a C programmer, as a zero-default behaviour would.
+However, it is possible for a user to define such constructors so that variables are safely zeroed by default, if desired.
+%%%%%%%%%%%%%%%%%%%%%%%%%% line width %%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{cfacode}
+void ?{}(int * i) { *i = 0; }
+forall(dtype T) void ?{}(T ** p) { *p = 0; }  // any pointer type
+void f() {
+  int x;    // initialized to 0
+  int * p;  // initialized to 0
+}
+\end{cfacode}
+%%%%%%%%%%%%%%%%%%%%%%%%%% line width %%%%%%%%%%%%%%%%%%%%%%%%%%
+
+There are several options for user-defined types: structures, unions, and enumerations.
+To aid in ease of use, the standard set of four functions is automatically generated for a user-defined type after its definition is completed.
+By auto-generating these functions, it is ensured that legacy C code continues to work correctly in every context where \CFA expects these functions to exist, since they are generated for every complete type.
+As well, these functions are always generated, since they may be needed by polymorphic functions.
+With that said, the generated functions are not called implicitly unless they are non-trivial, and are never exported, making it simple for the optimizer to strip them away when they are not used.
+
+The generated functions for enumerations are the simplest.
+Since enumerations in C are essentially just another integral type, the generated functions behave in the same way that the built-in functions for the basic types work.
+For example, given the enumeration
+\begin{cfacode}
+enum Colour {
+  R, G, B
+};
+\end{cfacode}
+The following functions are automatically generated.
+\begin{cfacode}
+void ?{}(enum Colour *_dst){
+  // default constructor does nothing
+}
+void ?{}(enum Colour *_dst, enum Colour _src){
+  *_dst=_src;  // bitwise copy
+}
+void ^?{}(enum Colour *_dst){
+  // destructor does nothing
+}
+enum Colour ?=?(enum Colour *_dst, enum Colour _src){
+  return *_dst=_src; // bitwise copy
+}
+\end{cfacode}
+In the future, \CFA will introduce strongly-typed enumerations, like those in \CC, wherein enumerations create a new type distinct from @int@ so that integral values require an explicit cast to be stored in an enumeration variable.
+The existing generated routines are sufficient to express this restriction, since they are currently set up to take in values of that enumeration type.
+Changes related to this feature only need to affect the expression resolution phase, where more strict rules will be applied to prevent implicit conversions from integral types to enumeration types, but should continue to permit conversions from enumeration types to @int@.
+In this way, it is still possible to add an @int@ to an enumeration, but the resulting value is an @int@, meaning it cannot be reassigned to an enumeration without a cast.
+
+For structures, the situation is more complicated.
+Given a structure @S@ with members @M$_0$@, @M$_1$@, ... @M$_{N-1}$@, each function @f@ in the standard set calls \lstinline{f(s->M$_i$, ...)} for each @$i$@.
+That is, a default constructor for @S@ default constructs the members of @S@, the copy constructor copy constructs them, and so on.
+For example, given the structure definition
+\begin{cfacode}
+struct A {
+  B b;
+  C c;
+}
+\end{cfacode}
+The following functions are implicitly generated.
+\begin{cfacode}
+void ?{}(A * this) {
+  ?{}(&this->b);  // default construct each field
+  ?{}(&this->c);
+}
+void ?{}(A * this, A other) {
+  ?{}(&this->b, other.b);  // copy construct each field
+  ?{}(&this->c, other.c);
+}
+A ?=?(A * this, A other) {
+  ?=?(&this->b, other.b);  // assign each field
+  ?=?(&this->c, other.c);
+}
+void ^?{}(A * this) {
+  ^?{}(&this->c);  // destruct each field
+  ^?{}(&this->b);
+}
+\end{cfacode}
+It is important to note that the destructors are called in reverse declaration order to prevent conflicts in the event there are dependencies among members.
+
+In addition to the standard set, a set of \emph{field constructors} is also generated for structures.
+The field constructors are constructors that consume a prefix of the structure's member-list.
+That is, $N$ constructors are built of the form @void ?{}(S *, T$_{\text{M}_0}$)@, @void ?{}(S *, T$_{\text{M}_0}$, T$_{\text{M}_1}$)@, ..., @void ?{}(S *, T$_{\text{M}_0}$, T$_{\text{M}_1}$, ..., T$_{\text{M}_{N-1}}$)@, where members are copy constructed if they have a corresponding positional argument and are default constructed otherwise.
+The addition of field constructors allows structures in \CFA to be used naturally in the same ways as used in C (\ie, to initialize any prefix of the structure), \eg, @A a0 = { b }, a1 = { b, c }@.
+Extending the previous example, the following constructors are implicitly generated for @A@.
+\begin{cfacode}
+void ?{}(A * this, B b) {
+  ?{}(&this->b, b);
+  ?{}(&this->c);
+}
+void ?{}(A * this, B b, C c) {
+  ?{}(&this->b, b);
+  ?{}(&this->c, c);
+}
+\end{cfacode}
+
+For unions, the default constructor and destructor do nothing, as it is not obvious which member, if any, should be constructed.
+For copy constructor and assignment operations, a bitwise @memcpy@ is applied.
+In standard C, a union can also be initialized using a value of the same type as its first member, and so a corresponding field constructor is generated to perform a bitwise @memcpy@ of the object.
+An alternative to this design is to always construct and destruct the first member of a union, to match with the C semantics of initializing the first member of the union.
+This approach ultimately feels subtle and unsafe.
+Another option is to, like \CC, disallow unions from containing members that are themselves managed types.
+This restriction is a reasonable approach from a safety standpoint, but is not very C-like.
+Since the primary purpose of a union is to provide low-level memory optimization, it is assumed that the user has a certain level of maturity.
+It is therefore the responsibility of the user to define the special functions explicitly if they are appropriate, since it is impossible to accurately predict the ways that a union is intended to be used at compile-time.
+
+For example, given the union
+\begin{cfacode}
+union X {
+  Y y;
+  Z z;
+};
+\end{cfacode}
+The following functions are automatically generated.
+\begin{cfacode}
+void ?{}(union X *_dst){  // default constructor
+}
+void ?{}(union X *_dst, union X _src){  // copy constructor
+  __builtin_memcpy(_dst, &_src, sizeof(union X ));
+}
+void ^?{}(union X *_dst){  // destructor
+}
+union X ?=?(union X *_dst, union X _src){  // assignment
+  __builtin_memcpy(_dst, &_src, sizeof(union X));
+  return _src;
+}
+void ?{}(union X *_dst, struct Y src){  // construct first field
+  __builtin_memcpy(_dst, &src, sizeof(struct Y));
+}
+\end{cfacode}
+
+% This feature works in the \CFA model, since constructors are simply special functions and can be called explicitly, unlike in \CC. % this sentence isn't really true => placement new
+In \CCeleven, unions may have managed members, with the caveat that if there are any members with a user-defined operation, then that operation is not implicitly defined, forcing the user to define the operation if necessary.
+This restriction could easily be added into \CFA once \emph{deleted} functions are added.
+
+\subsection{Using Constructors and Destructors}
+Implicitly generated constructor and destructor calls ignore the outermost type qualifiers, \eg @const@ and @volatile@, on a type by way of a cast on the first argument to the function.
+For example,
+\begin{cfacode}
+struct S { int i; };
+void ?{}(S *, int);
+void ?{}(S *, S);
+
+const S s = { 11 };
+volatile S s2 = s;
+\end{cfacode}
+Generates the following code
+\begin{cfacode}
+const struct S s;
+?{}((struct S *)&s, 11);
+volatile struct S s2;
+?{}((struct S *)&s2, s);
+\end{cfacode}
+Here, @&s@ and @&s2@ are cast to unqualified pointer types.
+This mechanism allows the same constructors and destructors to be used for qualified objects as for unqualified objects.
+This rule applies only to implicitly generated constructor calls.
+Hence, explicitly re-initializing qualified objects with a constructor requires an explicit cast.
+
+As discussed in Section \ref{sub:c_background}, compound literals create unnamed objects.
+This mechanism can continue to be used seamlessly in \CFA with managed types to create temporary objects.
+The object created by a compound literal is constructed using the provided brace-enclosed initializer-list, and is destructed at the end of the scope it is used in.
+For example,
+\begin{cfacode}
+struct A { int x; };
+void ?{}(A *, int, int);
+{
+  int x = (A){ 10, 20 }.x;
+}
+\end{cfacode}
+is equivalent to
+\begin{cfacode}
+struct A { int x, y; };
+void ?{}(A *, int, int);
+{
+  A _tmp;
+  ?{}(&_tmp, 10, 20);
+  int x = _tmp.x;
+  ^?{}(&tmp);
+}
+\end{cfacode}
+
+Unlike \CC, \CFA provides an escape hatch that allows a user to decide at an object's definition whether it should be managed or not.
+An object initialized with \ateq is guaranteed to be initialized like a C object, and has no implicit destructor call.
+This feature provides all of the freedom that C programmers are used to having to optimize a program, while maintaining safety as a sensible default.
+\begin{cfacode}
+struct A { int * x; };
+// RAII
+void ?{}(A * a) { a->x = malloc(sizeof(int)); }
+void ^?{}(A * a) { free(a->x); }
+
+A a1;           // managed
+A a2 @= { 0 };  // unmanaged
+\end{cfacode}
+In this example, @a1@ is a managed object, and thus is default constructed and destructed at the start/end of @a1@'s lifetime, while @a2@ is an unmanaged object and is not implicitly constructed or destructed.
+Instead, @a2->x@ is initialized to @0@ as if it were a C object, because of the explicit initializer.
+
+In addition to freedom, \ateq provides a simple path for migrating legacy C code to \CFA, in that objects can be moved from C-style initialization to \CFA gradually and individually.
+It is worth noting that the use of unmanaged objects can be tricky to get right, since there is no guarantee that the proper invariants are established on an unmanaged object.
+It is recommended that most objects be managed by sensible constructors and destructors, except where absolutely necessary, such as memory-mapped devices, trigger devices, I/O controllers, etc.
+
+When a user declares any constructor or destructor, the corresponding intrinsic/generated function and all field constructors for that type are hidden, so that they are not found during expression resolution until the user-defined function goes out of scope.
+Furthermore, if the user declares any constructor, then the intrinsic/generated default constructor is also hidden, precluding default construction.
+These semantics closely mirror the rule for implicit declaration of constructors in \CC, wherein the default constructor is implicitly declared if there is no user-declared constructor \cite[p.~186]{ANSI98:C++}.
+\begin{cfacode}
+struct S { int x, y; };
+
+void f() {
+  S s0, s1 = { 0 }, s2 = { 0, 2 }, s3 = s2;  // okay
+  {
+    void ?{}(S * s, int i) { s->x = i*2; } // locally hide autogen ctors
+    S s4;  // error, no default constructor
+    S s5 = { 3 };  // okay, local constructor
+    S s6 = { 4, 5 };  // error, no field constructor
+    S s7 = s5; // okay
+  }
+  S s8, s9 = { 6 }, s10 = { 7, 8 }, s11 = s10;  // okay
+}
+\end{cfacode}
+In this example, the inner scope declares a constructor from @int@ to @S@, which hides the default constructor and field constructors until the end of the scope.
+
+When defining a constructor or destructor for a structure @S@, any members that are not explicitly constructed or destructed are implicitly constructed or destructed automatically.
+If an explicit call is present, then that call is taken in preference to any implicitly generated call.
+A consequence of this rule is that it is possible, unlike \CC, to precisely control the order of construction and destruction of sub-objects on a per-constructor basis, whereas in \CC sub-object initialization and destruction is always performed based on the declaration order.
+\begin{cfacode}
+struct A {
+  B w, x, y, z;
+};
+void ?{}(A * a, int i) {
+  (&a->x){ i };
+  (&a->z){ a->y };
+}
+\end{cfacode}
+Generates the following
+\begin{cfacode}
+void ?{}(A * a, int i) {
+  (&a->w){};   // implicit default ctor
+  (&a->y){};   // implicit default ctor
+  (&a->x){ i };
+  (&a->z){ a->y };
+}
+\end{cfacode}
+Finally, it is illegal for a sub-object to be explicitly constructed for the first time after it is used for the first time.
+If the translator cannot be reasonably sure that an object is constructed prior to its first use, but is constructed afterward, an error is emitted.
+More specifically, the translator searches the body of a constructor to ensure that every sub-object is initialized.
+\begin{cfacode}
+void ?{}(A * a, double x) {
+  f(a->x);
+  (&a->x){ (int)x }; // error, used uninitialized on previous line
+}
+\end{cfacode}
+However, if the translator sees a sub-object used within the body of a constructor, but does not see a constructor call that uses the sub-object as the target of a constructor, then the translator assumes the object is to be implicitly constructed (copy constructed in a copy constructor and default constructed in any other constructor).
+To override this rule, \ateq can be used to force the translator to trust the programmer's discretion.
+This form of \ateq is not yet implemented.
+\begin{cfacode}
+void ?{}(A * a) {
+  // default constructs all members
+  f(a->x);
+}
+
+void ?{}(A * a, A other) {
+  // copy constructs all members
+  f(a->y);
+}
+
+void ?{}(A * a, int x) {
+  // object forwarded to another constructor,
+  // does not implicitly construct any members
+  (&a){};
+}
+
+void ^?{}(A * a) {
+  ^(&a->x){}; // explicit destructor call
+} // z, y, w implicitly destructed, in this order
+\end{cfacode}
+If at any point, the @this@ parameter is passed directly as the target of another constructor, then it is assumed the other constructor handles the initialization of all of the object's members and no implicit constructor calls are added to the current constructor.
+
+Despite great effort, some forms of C syntax do not work well with constructors in \CFA.
+In particular, constructor calls cannot contain designations (see \ref{sub:c_background}), since this is equivalent to allowing designations on the arguments to arbitrary function calls.
+\begin{cfacode}
+// all legal forward declarations in C
+void f(int, int, int);
+void f(int a, int b, int c);
+void f(int b, int c, int a);
+void f(int c, int a, int b);
+void f(int x, int y, int z);
+
+f(b:10, a:20, c:30);  // which parameter is which?
+\end{cfacode}
+In C, function prototypes are permitted to have arbitrary parameter names, including no names at all, which may have no connection to the actual names used at function definition.
+Furthermore, a function prototype can be repeated an arbitrary number of times, each time using different names.
+As a result, it was decided that any attempt to resolve designated function calls with C's function prototype rules would be brittle, and thus it is not sensible to allow designations in constructor calls.
+
+\begin{sloppypar}
+In addition, constructor calls do not support unnamed nesting.
+\begin{cfacode}
+struct B { int x; };
+struct C { int y; };
+struct A { B b; C c; };
+void ?{}(A *, B);
+void ?{}(A *, C);
+
+A a = {
+  { 10 },  // construct B? - invalid
+};
+\end{cfacode}
+In C, nesting initializers means that the programmer intends to initialize sub-objects with the nested initializers.
+The reason for this omission is to both simplify the mental model for using constructors, and to make initialization simpler for the expression resolver.
+If this were allowed, it would be necessary for the expression resolver to decide whether each argument to the constructor call could initialize to some argument in one of the available constructors, making the problem highly recursive and potentially much more expensive.
+That is, in the previous example the line marked as an error could mean construct using @?{}(A *, B)@ or with @?{}(A *, C)@, since the inner initializer @{ 10 }@ could be taken as an intermediate object of type @B@ or @C@.
+In practice, however, there could be many objects that can be constructed from a given @int@ (or, indeed, any arbitrary parameter list), and thus a complete solution to this problem would require fully exploring all possibilities.
+\end{sloppypar}
+
+More precisely, constructor calls cannot have a nesting depth greater than the number of array dimensions in the type of the initialized object, plus one.
+For example,
+\begin{cfacode}
+struct A;
+void ?{}(A *, int);
+void ?{}(A *, A, A);
+
+A a1[3] = { { 3 }, { 4 }, { 5 } };
+A a2[2][2] = {
+  { { 9 }, { 10 } },  // a2[0]
+  { {14 }, { 15 } }   // a2[1]
+};
+A a3[4] = { // 1 dimension => max depth 2
+  { { 11 }, { 12 } },  // error, three levels deep
+  { 80 }, { 90 }, { 100 }
+}
+\end{cfacode}
+The body of @A@ has been omitted, since only the constructor interfaces are important.
+
+It should be noted that unmanaged objects, i.e. objects that have only trivial constructors, can still make use of designations and nested initializers in \CFA.
+It is simple to overcome this limitation for managed objects by making use of compound literals, so that the arguments to the constructor call are explicitly typed.
+%%%%%%%%%%%%%%%%%%%%%%%%%% line width %%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{cfacode}
+struct B { int x; };
+struct C { int y; };
+struct A { B b; C c; };
+void ?{}(A *, B);
+void ?{}(A *, C);
+
+A a = {
+  (C){ 10 } // disambiguate with compound literal
+};
+\end{cfacode}
+%%%%%%%%%%%%%%%%%%%%%%%%%% line width %%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\subsection{Implicit Destructors}
+\label{sub:implicit_dtor}
+Destructors are automatically called at the end of the block in which the object is declared.
+In addition to this, destructors are automatically called when statements manipulate control flow to leave a block in which the object is declared, \eg, with return, break, continue, and goto statements.
+The example below demonstrates a simple routine with multiple return statements.
+\begin{cfacode}
+struct A;
+void ^?{}(A *);
+
+void f(int i) {
+  A x;  // construct x
+  {
+    A y; // construct y
+    {
+      A z; // construct z
+      {
+        if (i == 0) return; // destruct x, y, z
+      }
+      if (i == 1) return; // destruct x, y, z
+    } // destruct z
+    if (i == 2) return; // destruct x, y
+  } // destruct y
+} // destruct x
+\end{cfacode}
+
+The next example illustrates the use of simple continue and break statements and the manner that they interact with implicit destructors.
+\begin{cfacode}
+for (int i = 0; i < 10; i++) {
+  A x;
+  if (i == 2) {
+    continue;  // destruct x
+  } else if (i == 3) {
+    break;     // destruct x
+  }
+} // destruct x
+\end{cfacode}
+Since a destructor call is automatically inserted at the end of the block, nothing special needs to happen to destruct @x@ in the case where control reaches the end of the loop.
+In the case where @i@ is @2@, the continue statement runs the loop update expression and attempts to begin the next iteration of the loop.
+Since continue is a C statement, which does not understand destructors, it is transformed into a @goto@ statement that branches to the end of the loop, just before the block's destructors, to ensure that @x@ is destructed.
+When @i@ is @3@, the break statement moves control to just past the end of the loop.
+Unlike the previous case, the destructor for @x@ cannot be reused, so a destructor call for @x@ is inserted just before the break statement.
+
+\CFA also supports labeled break and continue statements, which allow more precise manipulation of control flow.
+Labeled break and continue allow the programmer to specify which control structure to target by using a label attached to a control structure.
+\begin{cfacode}[emph={L1,L2}, emphstyle=\color{red}]
+L1: for (int i = 0; i < 10; i++) {
+  A x;
+  for (int j = 0; j < 10; j++) {
+    A y;
+    if (i == 1) {
+      continue L1; // destruct y
+    } else if (i == 2) {
+      break L1;    // destruct x,y
+    }
+  } // destruct y
+} // destruct X
+\end{cfacode}
+The statement @continue L1@ begins the next iteration of the outer for-loop.
+Since the semantics of continue require the loop update expression to execute, control branches to the end of the outer for loop, meaning that the block destructor for @x@ can be reused, and it is only necessary to generate the destructor for @y@.
+Break, on the other hand, requires jumping out of both loops, so the destructors for both @x@ and @y@ are generated and inserted before the @break L1@ statement.
+
+Finally, an example which demonstrates goto.
+Since goto is a general mechanism for jumping to different locations in the program, a more comprehensive approach is required.
+For each goto statement $G$ and each target label $L$, let $S_G$ be the set of all managed variables alive at $G$, and let $S_L$ be the set of all managed variables alive at $L$.
+If at any $G$, $S_L \setminus S_G = \emptyset$, then the translator emits an error, because control flow branches from a point where the object is not yet live to a point where it is live, skipping the object's constructor.
+Then, for every $G$, the destructors for each variable in the set $S_G \setminus S_L$ is inserted directly before $G$, which ensures each object that is currently live at $G$, but not at $L$, is destructed before control branches.
+\begin{cfacode}
+int i = 0;
+{
+  L0: ;     // S_L0 = { x }
+    A y;
+  L1: ;     // S_L1 = { x }
+    A x;
+  L2: ;     // S_L2 = { y, x }
+    if (i == 0) {
+      ++i;
+      goto L1;    // S_G = { y, x }
+      // S_G-S_L1 = { x } => destruct x
+    } else if (i == 1) {
+      ++i;
+      goto L2;    // S_G = { y, x }
+      // S_G-S_L2 = {} => destruct nothing
+    } else if (i == 2) {
+      ++i;
+      goto L3;    // S_G = { y, x }
+      // S_G-S_L3 = {}
+    } else if (false) {
+      ++i;
+      A z;
+      goto L3;    // S_G = { z, y, x }
+      // S_G-S_L3 = { z } => destruct z
+    } else {
+      ++i;
+      goto L4;    // S_G = { y, x }
+      // S_G-S_L4 = { y, x } => destruct y, x
+    }
+  L3: ;    // S_L3 = { y, x }
+    goto L2;      // S_G = { y, x }
+    // S_G-S_L2 = {}
+}
+L4: ;  // S_L4 = {}
+if (i == 4) {
+  goto L0;        // S_G = {}
+  // S_G-S_L0 = {}
+}
+\end{cfacode}
+All break and continue statements are implemented in \CFA in terms of goto statements, so the more constrained forms are precisely governed by these rules.
+
+The next example demonstrates the error case.
+\begin{cfacode}
+{
+    goto L1; // S_G = {}
+    // S_L1-S_G = { y } => error
+    A y;
+  L1: ; // S_L1 = { y }
+    A x;
+  L2: ; // S_L2 = { y, x }
+}
+goto L2; // S_G = {}
+// S_L2-S_G = { y, x } => error
+\end{cfacode}
+
+While \CFA supports the GCC computed-goto extension, the behaviour of managed objects in combination with computed-goto is undefined.
+\begin{cfacode}
+void f(int val) {
+  void * l = val == 0 ? &&L1 : &&L2;
+  {
+      A x;
+    L1: ;
+      goto *l;  // branches differently depending on argument
+  }
+  L2: ;
+}
+\end{cfacode}
+Likewise, destructors are not executed at scope-exit due to a computed-goto in \CC, as of g++ version 6.2.
+
+\subsection{Implicit Copy Construction}
+\label{s:implicit_copy_construction}
+When a function is called, the arguments supplied to the call are subject to implicit copy construction (and destruction of the generated temporary), and the return value is subject to destruction.
+When a value is returned from a function, the copy constructor is called to pass the value back to the call site.
+Exempt from these rules are intrinsic and built-in functions.
+It should be noted that unmanaged objects are subject to copy constructor calls when passed as arguments to a function or when returned from a function, since they are not the \emph{target} of the copy constructor call.
+That is, since the parameter is not marked as an unmanaged object using \ateq, it is copy constructed if it is returned by value or passed as an argument to another function, so to guarantee consistent behaviour, unmanaged objects must be copy constructed when passed as arguments.
+These semantics are important to bear in mind when using unmanaged objects, and could produce unexpected results when mixed with objects that are explicitly constructed.
+\begin{cfacode}
+struct A { ... };
+void ?{}(A *);
+void ?{}(A *, A);
+void ^?{}(A *);
+
+A identity(A x) { // pass by value => need local copy
+  return x;       // return by value => make call-site copy
+}
+
+A y, z @= {};
+identity(y);  // copy construct y into x
+identity(z);  // copy construct z into x
+\end{cfacode}
+Note that unmanaged argument @z@ is logically copy constructed into managed parameter @x@; however, the translator must copy construct into a temporary variable to be passed as an argument, which is also destructed after the call.
+A compiler could by-pass the argument temporaries since it is in control of the calling conventions and knows exactly where the called-function's parameters live.
+
+This generates the following
+\begin{cfacode}
+struct A f(struct A x){
+  struct A _retval_f;    // return value
+  ?{}((&_retval_f), x);  // copy construct return value
+  return _retval_f;
+}
+
+struct A y;
+?{}(&y);                 // default construct
+struct A z = { 0 };      // C default
+
+struct A _tmp_cp1;       // argument 1
+struct A _tmp_cp_ret0;   // return value
+_tmp_cp_ret0=f(
+  (?{}(&_tmp_cp1, y) , _tmp_cp1)  // argument is a comma expression
+), _tmp_cp_ret0;         // return value for cascading
+^?{}(&_tmp_cp_ret0);     // destruct return value
+^?{}(&_tmp_cp1);         // destruct argument 1
+
+struct A _tmp_cp2;       // argument 1
+struct A _tmp_cp_ret1;   // return value
+_tmp_cp_ret1=f(
+  (?{}(&_tmp_cp2, z), _tmp_cp2)  // argument is a common expression
+), _tmp_cp_ret1;         // return value for cascading
+^?{}(&_tmp_cp_ret1);     // destruct return value
+^?{}(&_tmp_cp2);         // destruct argument 1
+^?{}(&y);
+\end{cfacode}
+
+A special syntactic form, such as a variant of \ateq, can be implemented to specify at the call site that an argument should not be copy constructed, to regain some control for the C programmer.
+\begin{cfacode}
+identity(z@);  // do not copy construct argument
+               // - will copy construct/destruct return value
+A@ identity_nocopy(A @ x) {  // argument not copy constructed or destructed
+  return x;  // not copy constructed
+             // return type marked @ => not destructed
+}
+\end{cfacode}
+It should be noted that reference types will allow specifying that a value does not need to be copied, however reference types do not provide a means of preventing implicit copy construction from uses of the reference, so the problem is still present when passing or returning the reference by value.
+
+Adding implicit copy construction imposes the additional runtime cost of the copy constructor for every argument and return value in a function call.
+This cost is necessary to maintain appropriate value semantics when calling a function.
+In the future, return-value-optimization (RVO) can be implemented for \CFA to elide unnecessary copy construction and destruction of temporary objects.
+This cost is not present for types with trivial copy constructors and destructors.
+
+A known issue with this implementation is that the argument and return value temporaries are not guaranteed to have the same address for their entire lifetimes.
+In the previous example, since @_retval_f@ is allocated and constructed in @f@, then returned by value, the internal data is bitwise copied into the caller's stack frame.
+This approach works out most of the time, because typically destructors need to only access the fields of the object and recursively destroy.
+It is currently the case that constructors and destructors that use the @this@ pointer as a unique identifier to store data externally do not work correctly for return value objects.
+Thus, it is currently not safe to rely on an object's @this@ pointer to remain constant throughout execution of the program.
+\begin{cfacode}
+A * external_data[32];
+int ext_count;
+struct A;
+void ?{}(A * a) {
+  // ...
+  external_data[ext_count++] = a;
+}
+void ^?{}(A * a) {
+  for (int i = 0; i < ext_count) {
+    if (a == external_data[i]) { // may never be true
+      // ...
+    }
+  }
+}
+
+A makeA() {
+  A x;  // stores &x in external_data
+  return x;
+}
+makeA();  // return temporary has a different address than x
+// equivalent to:
+//   A _tmp;
+//   _tmp = makeA(), _tmp;
+//   ^?{}(&_tmp);
+\end{cfacode}
+In the above example, a global array of pointers is used to keep track of all of the allocated @A@ objects.
+Due to copying on return, the current object being destructed does not exist in the array if an @A@ object is ever returned by value from a function, such as in @makeA@.
+
+This problem could be solved in the translator by changing the function signatures so that the return value is moved into the parameter list.
+For example, the translator could restructure the code like so
+\begin{cfacode}
+void f(struct A x, struct A * _retval_f){
+  ?{}(_retval_f, x);  // construct directly into caller's stack frame
+}
+
+struct A y;
+?{}(&y);
+struct A z = { 0 };
+
+struct A _tmp_cp1;     // argument 1
+struct A _tmp_cp_ret0; // return value
+f((?{}(&_tmp_cp1, y) , _tmp_cp1), &_tmp_cp_ret0), _tmp_cp_ret0;
+^?{}(&_tmp_cp_ret0);   // return value
+^?{}(&_tmp_cp1);       // argument 1
+\end{cfacode}
+This transformation provides @f@ with the address of the return variable so that it can be constructed into directly.
+It is worth pointing out that this kind of signature rewriting already occurs in polymorphic functions that return by value, as discussed in \cite{Bilson03}.
+A key difference in this case is that every function would need to be rewritten like this, since types can switch between managed and unmanaged at different scope levels, \eg
+\begin{cfacode}
+struct A { int v; };
+A x; // unmanaged, since only trivial constructors are available
+{
+  void ?{}(A * a) { ... }
+  void ^?{}(A * a) { ... }
+  A y; // managed
+}
+A z; // unmanaged
+\end{cfacode}
+Hence there is not enough information to determine at function declaration whether a type is managed or not, and thus it is the case that all signatures have to be rewritten to account for possible copy constructor and destructor calls.
+Even with this change, it would still be possible to declare backwards compatible function prototypes with an @extern "C"@ block, which allows for the definition of C-compatible functions within \CFA code, however this would require actual changes to the way code inside of an @extern "C"@ function is generated as compared with normal code generation.
+Furthermore, it is not possible to overload C functions, so using @extern "C"@ to declare functions is of limited use.
+
+It would be possible to regain some control by adding an attribute to structures that specifies whether they can be managed or not (perhaps \emph{manageable} or \emph{unmanageable}), and to emit an error in the case that a constructor or destructor is declared for an unmanageable type.
+Ideally, structures should be manageable by default, since otherwise the default case becomes more verbose.
+This means that in general, function signatures would have to be rewritten, and in a select few cases the signatures would not be rewritten.
+\begin{cfacode}
+__attribute__((manageable)) struct A { ... };   // can declare ctors
+__attribute__((unmanageable)) struct B { ... }; // cannot declare ctors
+struct C { ... };                               // can declare ctors
+
+A f();  // rewritten void f(A *);
+B g();  // not rewritten
+C h();  // rewritten void h(C *);
+\end{cfacode}
+An alternative is to make the attribute \emph{identifiable}, which states that objects of this type use the @this@ parameter as an identity.
+This strikes more closely to the visible problem, in that only types marked as identifiable would need to have the return value moved into the parameter list, and every other type could remain the same.
+Furthermore, no restrictions would need to be placed on whether objects can be constructed.
+\begin{cfacode}
+__attribute__((identifiable)) struct A { ... };  // can declare ctors
+struct B { ... };                                // can declare ctors
+
+A f();  // rewritten void f(A *);
+B g();  // not rewritten
+\end{cfacode}
+
+Ultimately, both of these are patchwork solutions.
+Since a real compiler has full control over its calling conventions, it can seamlessly allow passing the return parameter without outwardly changing the signature of a routine.
+As such, it has been decided that this issue is not currently a priority and will be fixed when a full \CFA compiler is implemented.
+
+\section{Implementation}
+\subsection{Array Initialization}
+Arrays are a special case in the C type-system.
+Type checking largely ignores size information for C arrays, making it impossible to write a standalone \CFA function that constructs or destructs an array, while maintaining the standard interface for constructors and destructors.
+Instead, \CFA defines the initialization and destruction of an array recursively.
+That is, when an array is defined, each of its elements is constructed in order from element 0 up to element $n-1$.
+When an array is to be implicitly destructed, each of its elements is destructed in reverse order from element $n-1$ down to element 0.
+As in C, it is possible to explicitly provide different initializers for each element of the array through array initialization syntax.
+In this case, each of the initializers is taken in turn to construct a subsequent element of the array.
+If too many initializers are provided, only the initializers up to N are actually used.
+If too few initializers are provided, then the remaining elements are default constructed.
+
+For example, given the following code.
+\begin{cfacode}
+struct X {
+  int x, y, z;
+};
+void f() {
+  X x[10] = { { 1, 2, 3 }, { 4 }, { 7, 8 } };
+}
+\end{cfacode}
+The following code is generated for @f@.
+\begin{cfacode}
+void f(){
+  struct X x[((long unsigned int )10)];
+  // construct x
+  {
+    int _index0 = 0;
+    // construct with explicit initializers
+    {
+      if (_index0<10) ?{}(&x[_index0], 1, 2, 3);
+      ++_index0;
+      if (_index0<10) ?{}(&x[_index0], 4);
+      ++_index0;
+      if (_index0<10) ?{}(&x[_index0], 7, 8);
+      ++_index0;
+    }
+
+    // default construct remaining elements
+    for (;_index0<10;++_index0) {
+      ?{}(&x[_index0]);
+    }
+  }
+  // destruct x
+  {
+    int _index1 = 10-1;
+    for (;_index1>=0;--_index1) {
+      ^?{}(&x[_index1]);
+    }
+  }
+}
+\end{cfacode}
+Multidimensional arrays require more complexity.
+For example, a two dimensional array
+\begin{cfacode}
+void g() {
+  X x[10][10] = {
+    { { 1, 2, 3 }, { 4 } }, // x[0]
+    { { 7, 8 } }            // x[1]
+  };
+}\end{cfacode}
+Generates the following
+\begin{cfacode}
+void g(){
+  struct X x[10][10];
+  // construct x
+  {
+    int _index0 = 0;
+    for (;_index0<10;++_index0) {
+      {
+        int _index1 = 0;
+        // construct with explicit initializers
+        {
+          switch ( _index0 ) {
+            case 0:
+              // construct first array
+              if ( _index1<10 ) ?{}(&x[_index0][_index1], 1, 2, 3);
+              ++_index1;
+              if ( _index1<10 ) ?{}(&x[_index0][_index1], 4);
+              ++_index1;
+              break;
+            case 1:
+              // construct second array
+              if ( _index1<10 ) ?{}(&x[_index0][_index1], 7, 8);
+              ++_index1;
+              break;
+          }
+        }
+        // default construct remaining elements
+        for (;_index1<10;++_index1) {
+            ?{}(&x[_index0][_index1]);
+        }
+      }
+    }
+  }
+  // destruct x
+  {
+    int _index2 = 10-1;
+    for (;_index2>=0;--_index2) {
+      {
+        int _index3 = 10-1;
+        for (;_index3>=0;--_index3) {
+            ^?{}(&x[_index2][_index3]);
+        }
+      }
+    }
+  }
+}
+\end{cfacode}
+% It is possible to generate slightly simpler code for the switch cases, since the value of @_index1@ is known at compile-time within each case, however the procedure for generating constructor calls is complicated.
+% It is simple to remove the increment statements for @_index1@, but it is not simple to remove the
+%% technically, it's not hard either. I could easily downcast and change the second argument to ?[?], but is it really necessary/worth it??
+
+\subsection{Global Initialization}
+In standard C, global variables can only be initialized to compile-time constant expressions, which places strict limitations on the programmer's ability to control the default values of objects.
+In \CFA, constructors and destructors are guaranteed to be run on global objects, allowing arbitrary code to be run before and after the execution of the main routine.
+By default, objects within a translation unit are constructed in declaration order, and destructed in the reverse order.
+The default order of construction of objects amongst translation units is unspecified.
+It is, however, guaranteed that any global objects in the standard library are initialized prior to the initialization of any object in a user program.
+
+This feature is implemented in the \CFA translator by grouping every global constructor call into a function with the GCC attribute \emph{constructor}, which performs most of the heavy lifting \cite[6.31.1]{GCCExtensions}.
+A similar function is generated with the \emph{destructor} attribute, which handles all global destructor calls.
+At the time of writing, initialization routines in the library are specified with priority \emph{101}, which is the highest priority level that GCC allows, whereas initialization routines in the user's code are implicitly given the default priority level, which ensures they have a lower priority than any code with a specified priority level.
+This mechanism allows arbitrarily complicated initialization to occur before any user code runs, making it possible for library designers to initialize their modules without requiring the user to call specific startup or tear-down routines.
+
+For example, given the following global declarations.
+\begin{cfacode}
+struct X {
+  int y, z;
+};
+void ?{}(X *);
+void ?{}(X *, int, int);
+void ^?{}(X *);
+
+X a;
+X b = { 10, 3 };
+\end{cfacode}
+The following code is generated.
+\begin{cfacode}
+__attribute__ ((constructor)) static void _init_global_ctor(void){
+  ?{}(&a);
+  ?{}(&b, 10, 3);
+}
+__attribute__ ((destructor)) static void _destroy_global_ctor(void){
+  ^?{}(&b);
+  ^?{}(&a);
+}
+\end{cfacode}
+
+%   https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Attributes.html#C_002b_002b-Attributes
+% suggestion: implement this in CFA by picking objects with a specified priority and pulling them into their own init functions (could even group them by priority level -> map<int, list<ObjectDecl*>>) and pull init_priority forward into constructor and destructor attributes with the same priority level
+GCC provides an attribute @init_priority@ in \CC, which allows specifying the relative priority for initialization of global objects on a per-object basis.
+A similar attribute can be implemented in \CFA by pulling marked objects into global constructor/destructor-attribute functions with the specified priority.
+For example,
+\begin{cfacode}
+struct A { ... };
+void ?{}(A *, int);
+void ^?{}(A *);
+__attribute__((init_priority(200))) A x = { 123 };
+\end{cfacode}
+would generate
+\begin{cfacode}
+A x;
+__attribute__((constructor(200))) __init_x() {
+  ?{}(&x, 123);  // construct x with priority 200
+}
+__attribute__((destructor(200))) __destroy_x() {
+  ?{}(&x);       // destruct x with priority 200
+}
+\end{cfacode}
+
+\subsection{Static Local Variables}
+In standard C, it is possible to mark variables that are local to a function with the @static@ storage class.
+Unlike normal local variables, a @static@ local variable is defined to live for the entire duration of the program, so that each call to the function has access to the same variable with the same address and value as it had in the previous call to the function.
+Much like global variables, @static@ variables can only be initialized to a \emph{compile-time constant value} so that a compiler is able to create storage for the variable and initialize it at compile-time.
+
+Yet again, this rule is too restrictive for a language with constructors and destructors.
+Since the initializer expression is not necessarily a compile-time constant and can depend on the current execution state of the function, \CFA modifies the definition of a @static@ local variable so that objects are guaranteed to be live from the time control flow reaches their declaration, until the end of the program.
+Since standard C does not allow access to a @static@ local variable before the first time control flow reaches the declaration, this change does not preclude any valid C code.
+Local objects with @static@ storage class are only implicitly constructed and destructed once for the duration of the program.
+The object is constructed when its declaration is reached for the first time.
+The object is destructed once at the end of the program.
+
+Construction of @static@ local objects is implemented via an accompanying @static bool@ variable, which records whether the variable has already been constructed.
+A conditional branch checks the value of the companion @bool@, and if the variable has not yet been constructed then the object is constructed.
+The object's destructor is scheduled to be run when the program terminates using @atexit@ \footnote{When using the dynamic linker, it is possible to dynamically load and unload a shared library. Since glibc 2.2.3 \cite{atexit}, functions registered with @atexit@ within the shared library are called when unloading the shared library. As such, static local objects can be destructed using this mechanism even in shared libraries on Linux systems.}, and the companion @bool@'s value is set so that subsequent invocations of the function do not reconstruct the object.
+Since the parameter to @atexit@ is a parameter-less function, some additional tweaking is required.
+First, the @static@ variable must be hoisted up to global scope and uniquely renamed to prevent name clashes with other global objects.
+If necessary, a local structure may need to be hoisted, as well.
+Second, a function is built that calls the destructor for the newly hoisted variable.
+Finally, the newly generated function is registered with @atexit@, instead of registering the destructor directly.
+Since @atexit@ calls functions in the reverse order in which they are registered, @static@ local variables are guaranteed to be destructed in the reverse order that they are constructed, which may differ between multiple executions of the same program.
+Extending the previous example
+\begin{cfacode}
+int f(int x) {
+  static X a;
+  static X b = { x, x };  // depends on parameter value
+  static X c = b;         // depends on local variable
+}
+\end{cfacode}
+Generates the following.
+\begin{cfacode}
+static struct X a_static_var0;
+static void __a_dtor_atexit0(void){
+  ((void)^?{}(((struct X *)(&a_static_var0))));
+}
+static struct X b_static_var1;
+static void __b_dtor_atexit1(void){
+  ((void)^?{}(((struct X *)(&b_static_var1))));
+}
+static struct X c_static_var2;
+static void __c_dtor_atexit2(void){
+  ((void)^?{}(((struct X *)(&c_static_var2))));
+}
+int f(int x){
+  int _retval_f;
+  __attribute__ ((unused)) static void *_dummy0;
+  static _Bool __a_uninitialized = 1;
+  if ( __a_uninitialized ) {
+    ((void)?{}(((struct X *)(&a_static_var0))));
+    ((void)(__a_uninitialized=0));
+    ((void)atexit(__a_dtor_atexit0));
+  }
+
+  __attribute__ ((unused)) static void *_dummy1;
+  static _Bool __b_uninitialized = 1;
+  if ( __b_uninitialized ) {
+    ((void)?{}(((struct X *)(&b_static_var1)), x, x));
+    ((void)(__b_uninitialized=0));
+    ((void)atexit(__b_dtor_atexit1));
+  }
+
+  __attribute__ ((unused)) static void *_dummy2;
+  static _Bool __c_uninitialized = 1;
+  if ( __c_uninitialized ) {
+    ((void)?{}(((struct X *)(&c_static_var2)), b_static_var1));
+    ((void)(__c_uninitialized=0));
+    ((void)atexit(__c_dtor_atexit2));
+  }
+}
+\end{cfacode}
+
+This implementation comes at the runtime cost of an additional branch for every @static@ local variable, each time the function is called.
+Since initializers are not required to be compile-time constant expressions, they can involve global variables, function arguments, function calls, etc.
+As a direct consequence, @static@ local variables cannot be initialized with an attribute-constructor routines like global variables can.
+However, in the case where the variable is unmanaged and has a compile-time constant initializer, a C-compliant initializer is generated and the additional cost is not present.
+\CC shares the same semantics for its @static@ local variables.
+
+\subsection{Polymorphism}
+As mentioned in section \ref{sub:polymorphism}, \CFA currently has 3 type-classes that are used to designate polymorphic data types: @otype@, @dtype@, and @ftype@.
+In previous versions of \CFA, @otype@ was syntactic sugar for @dtype@ with known size/alignment information and an assignment function.
+That is,
+\begin{cfacode}
+forall(otype T)
+void f(T);
+\end{cfacode}
+was equivalent to
+\begin{cfacode}
+forall(dtype T | sized(T) | { T ?=?(T *, T); })
+void f(T);
+\end{cfacode}
+This allows easily specifying constraints that are common to all complete object-types very simply.
+
+Now that \CFA has constructors and destructors, more of a complete object's behaviour can be specified than was previously possible.
+As such, @otype@ has been augmented to include assertions for a default constructor, copy constructor, and destructor.
+That is, the previous example is now equivalent to
+\begin{cfacode}
+forall(dtype T | sized(T) |
+  { T ?=?(T *, T); void ?{}(T *); void ?{}(T *, T); void ^?{}(T *); })
+void f(T);
+\end{cfacode}
+These additions allow @f@'s body to create and destroy objects of type @T@, and pass objects of type @T@ as arguments to other functions, following the normal \CFA rules.
+A point of note here is that objects can be missing default constructors (and eventually other functions through deleted functions), so it is important for \CFA programmers to think carefully about the operations needed by their function, as to not over-constrain the acceptable parameter types and prevent potential reuse.
+
+These additional assertion parameters impose a runtime cost on all managed temporary objects created in polymorphic code, even those with trivial constructors and destructors.
+This cost is necessary because polymorphic code does not know the actual type at compile-time, due to separate compilation.
+Since trivial constructors and destructors either do not perform operations or are simply bit-wise copy operations, the imposed cost is essentially the cost of the function calls.
+
+\section{Summary}
+
+When creating a new object of a managed type, it is guaranteed that a constructor is be called to initialize the object at its definition point, and is destructed when the object's lifetime ends.
+Destructors are called in the reverse order of construction.
+
+Every argument passed to a function is copy constructed into a temporary object that is passed by value to the functions and destructed at the end of the statement.
+Function return values are copy constructed inside the function at the return statement, passed by value to the call-site, and destructed at the call-site at the end of the statement.
+
+Every complete object type has a default constructor, copy constructor, assignment operator, and destructor.
+To accomplish this, these functions are generated as appropriate for new types.
+User-defined functions shadow built-in and automatically generated functions, so it is possible to specialize the behaviour of a type.
+Furthermore, default constructors and aggregate field constructors are hidden when \emph{any} constructor is defined.
+
+Objects dynamically allocated with @malloc@, \ateq objects, and objects with only trivial constructors and destructors are unmanaged.
+Unmanaged objects are never the target of an implicit constructor or destructor call.
Index: doc/theses/rob_schluntz/examples/conclusions/dtor.c
===================================================================
--- doc/theses/rob_schluntz/examples/conclusions/dtor.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/conclusions/dtor.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,11 @@
+forall(otype T)
+struct Box {
+  T x;
+};
+forall(otype T) void ^?{}(Box(T) * x);
+
+forall(otype T)
+void f(T x) {
+  T y = x;
+  Box(T) z = { x };
+}
Index: doc/theses/rob_schluntz/examples/conclusions/except.c
===================================================================
--- doc/theses/rob_schluntz/examples/conclusions/except.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/conclusions/except.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,20 @@
+#include <stdio.h>
+typedef struct S {
+  int x;
+} S;
+
+void _dtor_S(S * s);
+//  {
+//   printf("called destructor!\n");
+// }
+
+void _ctor_S(struct S *s);
+//  {
+//   s->x = 123;
+// }
+
+int main() {
+  struct S _tmp3;
+  __attribute__((cleanup(_dtor_S))) struct S _tmp2 = (_ctor_S(&_tmp2), _tmp2);
+  printf("%d\n", _tmp2.x);
+}
Index: doc/theses/rob_schluntz/examples/conclusions/except.cc
===================================================================
--- doc/theses/rob_schluntz/examples/conclusions/except.cc	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/conclusions/except.cc	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,31 @@
+#include <iostream>
+using namespace std;
+
+struct S {
+  int x;
+};
+
+void _dtor_S(S * s) {
+  cout << "called destructor!" << endl;
+}
+
+S f() {
+  throw 3;
+  return (S) { 0 };
+}
+
+void _ctor_S(struct S *s, struct S) {
+  s->x = 123;
+}
+
+int main() {
+  try {
+//    __attribute__((cleanup(_dtor_S))) S s = f();
+  struct S _tmp1;
+  struct S _tmp2 = (_ctor_S(&_tmp2, _tmp1), _tmp2);
+  cout << _tmp2.x << endl;
+
+  } catch(...) {
+
+  }
+}
Index: doc/theses/rob_schluntz/examples/ctor/array_ctor.c
===================================================================
--- doc/theses/rob_schluntz/examples/ctor/array_ctor.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/ctor/array_ctor.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,16 @@
+struct X { int x, y, z; };
+void ?{}(X *);
+void ?{}(X *, int);
+void ?{}(X *, int, int);
+void ?{}(X *, int, int, int);
+void ^?{}(X *);
+void f() {
+  X x[10] = { { 1, 2, 3 }, { 4 }, { 7, 8 } };
+}
+
+void g() {
+  X x[10][10] = {
+    { { 1, 2, 3 }, { 4 } },
+    { { 7, 8 } }
+  };
+}
Index: doc/theses/rob_schluntz/examples/ctor/copy_ctor.c
===================================================================
--- doc/theses/rob_schluntz/examples/ctor/copy_ctor.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/ctor/copy_ctor.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,14 @@
+struct A;
+void ?{}(A *);
+void ?{}(A *, A);
+void ^?{}(A *);
+
+A f(A x) {
+  return x;
+}
+
+int main() {
+	A y, z @= {};
+	f(y);
+	f(z);
+}
Index: doc/theses/rob_schluntz/examples/ctor/cv_ctor.c
===================================================================
--- doc/theses/rob_schluntz/examples/ctor/cv_ctor.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/ctor/cv_ctor.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,10 @@
+struct S { int i; };
+void ?{}(S *, int);
+void ?{}(S *, S);
+
+int main() {
+  const int i = 5;
+  volatile int j = i;
+  const S s = { 11 };
+  volatile S s2 = s;
+}
Index: doc/theses/rob_schluntz/examples/ctor/enum_ctor.c
===================================================================
--- doc/theses/rob_schluntz/examples/ctor/enum_ctor.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/ctor/enum_ctor.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,3 @@
+enum Colour {
+  R, G, B
+};
Index: doc/theses/rob_schluntz/examples/ctor/expr_ctor.c
===================================================================
--- doc/theses/rob_schluntz/examples/ctor/expr_ctor.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/ctor/expr_ctor.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,6 @@
+struct X {};
+void ?{}(X *, double);
+
+int f() {
+  X * x = malloc(sizeof(X)){ 1.5 };
+}
Index: doc/theses/rob_schluntz/examples/ctor/global_ctor.c
===================================================================
--- doc/theses/rob_schluntz/examples/ctor/global_ctor.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/ctor/global_ctor.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,9 @@
+struct X {
+  int y, z;
+};
+void ?{}(X *);
+void ?{}(X *, int, int);
+void ^?{}(X *);
+
+X a;
+X b = { 10, 3 };
Index: doc/theses/rob_schluntz/examples/ctor/hide_ctor.c
===================================================================
--- doc/theses/rob_schluntz/examples/ctor/hide_ctor.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/ctor/hide_ctor.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,12 @@
+struct S { int x; };
+
+int main() {
+  S s0; // okay
+  {
+    void ?{}(S * s, int i) { s->x = i*2; }
+    void ?{}(S *s) { }
+//    void ^?{}(S *s ) { }
+    S s1; // error
+  }
+  S s2; // okay
+}
Index: doc/theses/rob_schluntz/examples/ctor/member.c
===================================================================
--- doc/theses/rob_schluntz/examples/ctor/member.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/ctor/member.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,26 @@
+struct T {
+  int x;
+};
+const int val = 12223344;
+void ?{}(T * t) {
+  if (t->x == val) printf("uh-oh, constructed twice!\n");
+  t->x = val;
+}
+
+struct S {
+  T t1, t2;
+};
+
+void ?{}(S * this) {
+  // construct both members
+}
+
+void ?{}(S * this, int x) {
+  // forward
+  ?{}(this);
+  ?{}(&this->t1);
+}
+
+int main() {
+  S s = 5;
+}
Index: doc/theses/rob_schluntz/examples/ctor/placement_ctor.c
===================================================================
--- doc/theses/rob_schluntz/examples/ctor/placement_ctor.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/ctor/placement_ctor.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,51 @@
+struct memory_pool {
+  char * start;
+  char * cur;
+  size_t size;
+  char * free;
+};
+
+void ?{}(memory_pool * pool, size_t size) {
+  pool->[start, cur] = malloc(size);
+  pool->size = size;
+  printf("initializing memory pool with size %lu at location %p\n", pool->size, pool->start);
+}
+
+void ^?{}(memory_pool * pool) {
+  free(pool->start);
+}
+
+forall(dtype T | sized(T))
+T * allocate(memory_pool * pool, unsigned int array_size = 1) {
+  size_t size = sizeof(T) * array_size;
+  printf("allocating block of size %lu...", size);
+  if (pool->cur + size < pool->start + pool->size) {
+    T * x = (T*)pool->cur;
+    pool->cur += size;
+    printf("success!\n");
+    printf("next address is %p\n", pool->cur);
+    return x;
+  } else {
+    printf("failed!\n");
+    // fail to allocate
+    return 0;
+  }
+}
+
+struct A {
+  int x, y, z;
+};
+void ?{}(A * a) {
+  a->[x,y,z] = [123, 456, 789];
+}
+
+int main() {
+  memory_pool pool = { 1024 };
+
+  int * x = allocate(&pool);
+  A * a = allocate(&pool);
+  A * b = allocate(&pool, 1000);
+  a{};
+  printf("%p\n", x);
+  printf("%p %d %d %d\n", a, a->[x,y,z]);
+}
Index: doc/theses/rob_schluntz/examples/ctor/return_dtor.c
===================================================================
--- doc/theses/rob_schluntz/examples/ctor/return_dtor.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/ctor/return_dtor.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,20 @@
+struct A;
+void ?{}(A *);
+void ^?{}(A *);
+
+void f(int i) {
+  A x;  // construct x
+  {
+    A y; // construct y
+    {
+      A z; // construct z
+      {
+        if (i == 0) return; // destruct x, y, z
+      }
+      if (i == 1) return; // destruct x, y, z
+      // destruct z
+    }
+    if (i == 2) return; // destruct x, y
+    // destruct y
+  }
+}
Index: doc/theses/rob_schluntz/examples/ctor/static_ctor.c
===================================================================
--- doc/theses/rob_schluntz/examples/ctor/static_ctor.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/ctor/static_ctor.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,12 @@
+struct X {
+  int y, z;
+};
+void ?{}(X *);
+void ?{}(X *, int, int);
+void ^?{}(X *);
+
+int f(int x) {
+  static X a;
+  static X b = { x, x };
+  static X c = b;
+}
Index: doc/theses/rob_schluntz/examples/ctor/union_ctor.c
===================================================================
--- doc/theses/rob_schluntz/examples/ctor/union_ctor.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/ctor/union_ctor.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,6 @@
+struct Y { int a; };
+struct Z { double z; };
+union X {
+  Y y;
+  Z z;
+};
Index: doc/theses/rob_schluntz/examples/intro/FileOutputStream.java
===================================================================
--- doc/theses/rob_schluntz/examples/intro/FileOutputStream.java	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/intro/FileOutputStream.java	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,38 @@
+import java.io.IOException;
+import java.io.FileNotFoundException;
+
+public class FileOutputStream implements AutoCloseable {
+	public static int throwOnWrite;
+	public static int throwOnClose;
+	public static int throwOnOpen;
+
+	public static int numWrites;
+	public static int numCloses;
+	public static int numOpens;
+
+	private String filename;
+	private <EX extends Throwable> void doexcept(EX ex, boolean pred) throws EX {
+		if (pred) {
+			System.out.println("Stream: " + filename + " threw exception: " + ex);
+			throw ex;
+		}
+	}
+
+	public FileOutputStream(String filename) throws FileNotFoundException {
+		doexcept(new FileNotFoundException(), throwOnOpen == ++numOpens);
+		System.out.println("Opened file: " + filename);
+		this.filename = filename;
+	}
+	public void write(byte[] bytes) throws IOException {
+		doexcept(new IOException(), throwOnWrite == ++numWrites);
+		System.out.println("wrote message: " + new String(bytes) + " to file: " + filename);
+	}
+	public void close() throws IOException {
+		System.out.println("Closing file: " + filename);
+		filename = null;
+		doexcept(new IOException(), throwOnClose == ++numCloses);
+	}
+	protected void finalize() {
+		if (filename != null) System.out.println("Finalize closing file: " + filename);
+	}
+}
Index: doc/theses/rob_schluntz/examples/intro/compound_lit.c
===================================================================
--- doc/theses/rob_schluntz/examples/intro/compound_lit.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/intro/compound_lit.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,16 @@
+int printf(const char *, ...);
+
+struct A { int x, y; };
+int f(struct A a, int z) {
+	printf("%d %d %d\n", a.x, a.y, z);
+}
+int g(int * x) {
+	if (x == 0) printf("NULL\n");
+	else printf("%d\n", *x);
+}
+
+int main() {
+	f((struct A){ 3, 4 }, (int){ 5 } = 10);
+	g((int[]){ 1, 2, 3 });
+	g(&(int){ 0 });
+}
Index: doc/theses/rob_schluntz/examples/intro/designation.c
===================================================================
--- doc/theses/rob_schluntz/examples/intro/designation.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/intro/designation.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,24 @@
+int printf(const char *, ...);
+
+struct A {
+  int w, x, y, z;
+};
+
+void print(struct A a) {
+	printf("{ %d, %d, %d, %d }\n", a.w, a.x, a.y, a.z);
+}
+
+int main() {
+	struct A a0 = { .x=4, .z=1, .x=8 };
+	struct A a1 = { 1, .y=7, 6 };
+	struct A a2[3] = { [2]=a0, [0]=a1, { .z=3 } };
+
+	print(a0);
+	print(a1);
+	printf("{\n");
+	for (int i = 0; i < 3; i++) {
+		printf("  ");
+		print(a2[i]);
+	}
+	printf("}\n");
+}
Index: doc/theses/rob_schluntz/examples/intro/ignore.c
===================================================================
--- doc/theses/rob_schluntz/examples/intro/ignore.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/intro/ignore.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,22 @@
+struct __ignore_t__ {
+};
+__ignore_t__ __ignore__;
+
+forall(dtype T | sized(T))
+__ignore_t__ ?=?(__ignore_t__ * dst, T src) {
+	return *dst;
+}
+
+forall(dtype T | sized(T) | { void ?{}(T *, T); })
+T ?=?(T * dst, __ignore_t__ src) {
+	return *dst;
+}
+
+int main() {
+	int x = 123, y = 456, z = 789;
+	double j = 3.14, i = 8.77;
+	[x, __ignore__, z] = [y, z, x];
+	[i, j, __ignore__] = [0, i, j];
+	printf("%d %d %d\n", x, y, z);
+	printf("%g %g\n", i, j);
+}
Index: doc/theses/rob_schluntz/examples/intro/ires.java
===================================================================
--- doc/theses/rob_schluntz/examples/intro/ires.java	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/intro/ires.java	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,3 @@
+public interface ires {
+	public void write(String filename, String msg) throws Exception;
+}
Index: doc/theses/rob_schluntz/examples/intro/res.java
===================================================================
--- doc/theses/rob_schluntz/examples/intro/res.java	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/intro/res.java	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,34 @@
+public class res {
+	private ires res;
+	public res(ires res) {
+		this.res = res;
+	}
+
+	public void dotest(String msg, int open, int write, int close) {
+		try {
+			System.out.println(msg);
+			FileOutputStream.throwOnOpen = open;
+			FileOutputStream.throwOnWrite = write;
+			FileOutputStream.throwOnClose = close;
+			res.write("foo.txt", "output message");
+		} catch (Exception ex) {
+		}
+		FileOutputStream.numOpens = 0;
+		FileOutputStream.numWrites = 0;
+		FileOutputStream.numCloses = 0;
+		System.gc();
+		System.runFinalization();
+		System.out.println();
+		System.out.flush();
+	}
+
+	public static void dotest(ires res) {
+		res r = new res(res);
+		r.dotest("Exception on open 1",  1, 0, 0);
+		r.dotest("Exception on open 2",  2, 0, 0);
+		r.dotest("Exception on write 1", 0, 1, 0);
+		r.dotest("Exception on write 2", 0, 2, 0);
+		r.dotest("Exception on close 1", 0, 0, 1);
+		r.dotest("Exception on close 2", 0, 0, 2);
+	}
+}
Index: doc/theses/rob_schluntz/examples/intro/res1.java
===================================================================
--- doc/theses/rob_schluntz/examples/intro/res1.java	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/intro/res1.java	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,16 @@
+import java.io.IOException;
+
+public class res1 implements ires {
+	public void write(String filename, String msg) throws IOException {
+	  FileOutputStream out = new FileOutputStream(filename);  // may throw FileNotFoundException
+	  FileOutputStream log = new FileOutputStream("log.txt"); //  or SecurityException
+	  out.write(msg.getBytes()); // may throw an IOException
+	  log.write(msg.getBytes()); // may throw an IOException
+	  log.close(); // may throw an IOException
+	  out.close(); // may throw an IOException
+	}
+
+	public static void main(String[] args) {
+		res.dotest(new res1());
+	}
+}
Index: doc/theses/rob_schluntz/examples/intro/res2.java
===================================================================
--- doc/theses/rob_schluntz/examples/intro/res2.java	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/intro/res2.java	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,22 @@
+import java.io.IOException;
+
+public class res2 implements ires {
+  public void write(String filename, String msg) throws Exception {
+    FileOutputStream out = new FileOutputStream(filename); // may throw FileNotFoundException
+    try {
+      FileOutputStream log = new FileOutputStream("log.txt"); //  or SecurityException
+      try {
+        out.write(msg.getBytes()); // may throw an IOException
+        log.write(msg.getBytes()); // may throw an IOException
+      } finally {
+        log.close(); // may throw an IOException
+      }
+    } finally {
+      out.close(); // may throw an IOException
+    }
+  }
+
+  public static void main(String[] args) {
+    res.dotest(new res2());
+  }
+}
Index: doc/theses/rob_schluntz/examples/intro/res3.java
===================================================================
--- doc/theses/rob_schluntz/examples/intro/res3.java	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/intro/res3.java	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,17 @@
+import java.io.IOException;
+
+public class res3 implements ires {
+  public void write(String filename, String msg) throws Exception {
+    try (
+      FileOutputStream out = new FileOutputStream(filename); // may throw FileNotFoundException
+      FileOutputStream log = new FileOutputStream("log.txt"); //  or SecurityException
+    ) {
+      out.write(msg.getBytes()); // may throw an IOException
+      log.write(msg.getBytes()); // may throw an IOException
+    }
+  }
+
+  public static void main(String[] args) {
+    res.dotest(new res3());
+  }
+}
Index: doc/theses/rob_schluntz/examples/intro/tuple.cc
===================================================================
--- doc/theses/rob_schluntz/examples/intro/tuple.cc	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/intro/tuple.cc	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,10 @@
+#include <iostream>
+#include <tuple>
+using namespace std;
+
+int main() {
+	tuple<int, int, int> triple(10, 20, 30);
+	cout << get<1>(triple) << endl;
+	tuple_element<2, tuple<int, float, double>>::type x = 3.14;
+	cout << tuple_size<decltype(triple)>::value << endl;
+}
Index: doc/theses/rob_schluntz/examples/intro/variadic.java
===================================================================
--- doc/theses/rob_schluntz/examples/intro/variadic.java	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/intro/variadic.java	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,25 @@
+class variadic {
+  int sum(int... args) {
+    int s = 0;
+    for (int x : args) {
+      s += x;
+    }
+    print(args.length, " ", args[0], " ", args[args.length-1], "\n");
+    return s;
+  }
+
+  void print(Object... objs) {
+    for (Object obj : objs) {
+      System.out.print(obj);
+    }
+  }
+
+  public void run() {
+    print("The sum from 1 to 10 is ", sum(1,2,3,4,5,6,7,8,9,10), ".\n");
+    print(sum(new int[]{1, 2,3}), "\n");
+  }
+
+  public static void main(String args[]) {
+    new variadic().run();
+  }
+}
Index: doc/theses/rob_schluntz/examples/malloc.cc
===================================================================
--- doc/theses/rob_schluntz/examples/malloc.cc	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/malloc.cc	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,20 @@
+#include <cstdlib>
+#include <iostream>
+using namespace std;
+
+class A {
+public:
+  A() {
+    cout << "A()" << endl;  
+  }
+  ~A(){
+    cout << "~A()" << endl;
+  }
+};
+
+int main() {
+  A * x = (A*)malloc(sizeof(A));
+  A * y = new A;
+  delete y;
+  free(x);
+}
Index: doc/theses/rob_schluntz/examples/nested.c
===================================================================
--- doc/theses/rob_schluntz/examples/nested.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/nested.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,8 @@
+struct S {
+  int x;
+};
+void ^?{}(S * s) { }
+
+int main() {
+  [S, [S, S]] x;
+}
Index: doc/theses/rob_schluntz/examples/poly.c
===================================================================
--- doc/theses/rob_schluntz/examples/poly.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/poly.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,14 @@
+forall(dtype T)
+void foo(T x) {
+
+}
+
+forall(dtype T)
+void bar(T * y) { }
+
+int main() {
+  foo(5);
+  foo("baz");
+  foo(foo);
+  bar(foo);
+}
Index: doc/theses/rob_schluntz/examples/scope_guard.h
===================================================================
--- doc/theses/rob_schluntz/examples/scope_guard.h	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/scope_guard.h	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,29 @@
+#ifndef SCOPE_GUARD_H
+#define SCOPE_GUARD_H
+
+struct ScopeGuard {
+  void (*fn)(void *);
+  // Args args;
+};
+
+// forall(ttype Args, ttype Ret)
+// void ?{}(ScopeGuard(Args, Ret) * this) {
+void ?{}(ScopeGuard * this) {
+
+}
+
+// // inline
+// forall(ttype Args, ttype Ret)
+// void ?{}(ScopeGuard(Args, Ret) * this, Ret (*fn)(Args), Args args) {
+//   this->fn = fn;
+//   // this->args = args;
+// }
+
+// inline
+// forall(ttype Args, ttype Ret)
+// void ^?{}(ScopeGuard(Args, Ret) * this) {
+void ^?{}(ScopeGuard * this) {
+  this->fn(0);
+}
+
+#endif
Index: doc/theses/rob_schluntz/examples/test_scoped_guard.c
===================================================================
--- doc/theses/rob_schluntz/examples/test_scoped_guard.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/test_scoped_guard.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,12 @@
+#include "scope_guard.h"
+
+extern "C" {
+  void free(void *);
+}
+
+int main() {
+  int * x = malloc(sizeof(10));
+  // ScopeGuard(int*, void) foo;
+  ScopeGuard foo;
+  foo.fn = free;
+}
Index: doc/theses/rob_schluntz/examples/tuples/assign.c
===================================================================
--- doc/theses/rob_schluntz/examples/tuples/assign.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/tuples/assign.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,9 @@
+int x, z;
+double y;
+[double, double] f();
+
+int main () {
+  [x, y, z] = [f(), 3];       // multiple assignment
+  // [x, y, z] = 1.5;            // mass assignment
+}
+
Index: doc/theses/rob_schluntz/examples/tuples/cast.c
===================================================================
--- doc/theses/rob_schluntz/examples/tuples/cast.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/tuples/cast.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,10 @@
+[int, int, int] f();
+[int, [int, int], int] g();
+
+int main() {
+  ([int, double])f();           // (1)
+  ([int, [int], int])g();         // (2)
+  printf("%d %d\n", ([void, [int, int]])g());      // (3) -- should work and doesn't -- tries to construct void object, but should ignore that component in terms of the type of the tuple
+  // ([int, int, int, int])g();    // (4) -- should not work and doesn't
+  // ([int, [int, int, int]])g();  // (5) -- should not work and doesn't
+}
Index: doc/theses/rob_schluntz/examples/tuples/ctor.c
===================================================================
--- doc/theses/rob_schluntz/examples/tuples/ctor.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/tuples/ctor.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,10 @@
+struct S { int x; double y; };
+[void] ?{}(* [int, double] this, S s) {
+  this->0 = s.x;
+  this->1 = s.y;
+}
+int main() {
+  S s = { 123, 345 };
+  [int, double] x = s;
+  printf("%d %g\n", x);
+}
Index: doc/theses/rob_schluntz/examples/tuples/mrv.c
===================================================================
--- doc/theses/rob_schluntz/examples/tuples/mrv.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/tuples/mrv.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,2 @@
+[int, int] foo();
+[double, int] bar();
Index: doc/theses/rob_schluntz/examples/tuples/mrv_1.c
===================================================================
--- doc/theses/rob_schluntz/examples/tuples/mrv_1.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/tuples/mrv_1.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,34 @@
+#include <stdio.h>
+#include <ctype.h>
+struct mf_ret {
+  int freq;
+  char ch;
+};
+
+struct mf_ret most_frequent(const char * str) {
+  char freqs [26] = { 0 };
+  struct mf_ret ret = { 0, 'a' };
+  for (int i = 0; str[i] != '\0'; ++i) {
+    if (isalpha(str[i])) {        // only count letters
+      int ch = tolower(str[i]);   // convert to lower case
+      int idx = ch-'a';
+      if (++freqs[idx] > ret.freq) {  // update on new max
+        ret.freq = freqs[idx];
+        ret.ch = ch;
+      }
+    }
+  }
+  return ret;
+}
+
+void dothing(const char * str) {
+  struct mf_ret ret = most_frequent(str);
+  printf("%s -- %d %c\n", str, ret.freq, ret.ch);
+}
+
+int main() {
+  dothing("hello");
+  dothing("hello, world!");
+  dothing("aaabbbba");
+  dothing("");
+}
Index: doc/theses/rob_schluntz/examples/tuples/mrv_2.c
===================================================================
--- doc/theses/rob_schluntz/examples/tuples/mrv_2.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/tuples/mrv_2.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,31 @@
+#include <stdio.h>
+#include <ctype.h>
+
+int most_frequent(const char * str, char * ret_ch) {
+  char freqs [26] = { 0 };
+  int ret_freq = 0;
+  for (int i = 0; str[i] != '\0'; ++i) {
+    if (isalpha(str[i])) {        // only count letters
+      int ch = tolower(str[i]);   // convert to lower case
+      int idx = ch-'a';
+      if (++freqs[idx] > ret_freq) {  // update on new max
+        ret_freq = freqs[idx];
+        *ret_ch = ch;
+      }
+    }
+  }
+  return ret_freq;
+}
+
+void dothing(const char * str) {
+  char ch;
+  int freq = most_frequent(str, &ch);
+  printf("%s -- %d %c\n", str, freq, ch);
+}
+
+int main() {
+  dothing("hello");
+  dothing("hello, world!");
+  dothing("aaabbbba");
+  dothing("");
+}
Index: doc/theses/rob_schluntz/examples/tuples/mrv_3.c
===================================================================
--- doc/theses/rob_schluntz/examples/tuples/mrv_3.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/tuples/mrv_3.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,33 @@
+#include <stdio.h>
+#include <ctype.h>
+
+[int, char] most_frequent(const char * str) {
+  char freqs [26] = { 0 };
+  int ret_freq = 0;
+  char ret_ch = 'a';
+  for (int i = 0; str[i] != '\0'; ++i) {
+    if (isalpha(str[i])) {        // only count letters
+      int ch = tolower(str[i]);   // convert to lower case
+      int idx = ch-'a';
+      if (++freqs[idx] > ret_freq) {  // update on new max
+        ret_freq = freqs[idx];
+        ret_ch = ch;
+      }
+    }
+  }
+  return [ret_freq, ret_ch];
+}
+
+void dothing(const char * str) {
+  int freq;
+  char ch;
+  [freq, ch] = most_frequent(str);
+  printf("%s -- %d %c\n", str, ret_freq, ret_ch);
+}
+
+int main() {
+  dothing("hello");
+  dothing("hello, world!");
+  dothing("aaabbbba");
+  dothing("");
+}
Index: doc/theses/rob_schluntz/examples/tuples/named.c
===================================================================
--- doc/theses/rob_schluntz/examples/tuples/named.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/tuples/named.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,6 @@
+typedef [int x, int y] Point2D;
+Point2D p1, p2;
+int main() {
+  p1.x + p1.y + p2.x + p2.y;
+  p1.0 + p1.1 + p2.0 + p2.1;  // equivalent
+}
Index: doc/theses/rob_schluntz/examples/variadic/new.c
===================================================================
--- doc/theses/rob_schluntz/examples/variadic/new.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/variadic/new.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,13 @@
+forall(dtype T | sized(T)) T * malloc(void);
+
+forall(dtype T, ttype Params | sized(T) | { void ?{}(T *, Params); })
+T * new(Params p) {
+  return ((T*)malloc()){ p }; // construct result of malloc
+}
+
+struct S { int x, y; }; 
+void ?{}(S *, int, int);
+
+int main() {
+  S * s = new(3, 4);
+}
Index: doc/theses/rob_schluntz/examples/variadic/print.c
===================================================================
--- doc/theses/rob_schluntz/examples/variadic/print.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/variadic/print.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,11 @@
+forall(otype T, ttype Params |
+  { void print(T); void print(Params); })
+void print(T arg, Params rest) {
+  print(arg);
+  print(rest);
+}
+void print(const char * x) { printf("%s", x); }
+void print(int x) { printf("%d", x);  }
+int main() {
+  print("x = ", 123, ".");
+}
Index: doc/theses/rob_schluntz/examples/variadic/sum1.c
===================================================================
--- doc/theses/rob_schluntz/examples/variadic/sum1.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/variadic/sum1.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,8 @@
+int sum(void){ return 0; }        // (0)
+forall(ttype Params | { int sum(Params); })
+int sum(int x, Params rest) { // (1)
+  return x+sum(rest);
+}
+int main() {
+  printf("%d\n", sum(10, 20, 30, 40, 50, 60));
+}
Index: doc/theses/rob_schluntz/examples/variadic/sum2.c
===================================================================
--- doc/theses/rob_schluntz/examples/variadic/sum2.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/examples/variadic/sum2.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,10 @@
+int sum(int x, int y){
+  return x+y;
+}
+forall(ttype Params | { int sum(int, Params); })
+int sum(int x, int y, Params rest) {
+  return sum(x+y, rest);
+}
+int main() {
+  printf("%d\n", sum(10, 20, 30, 40, 50, 60));
+}
Index: doc/theses/rob_schluntz/intro.tex
===================================================================
--- doc/theses/rob_schluntz/intro.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/intro.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,910 @@
+%======================================================================
+\chapter{Introduction}
+%======================================================================
+
+\section{\protect\CFA Background}
+\label{s:background}
+\CFA \footnote{Pronounced ``C-for-all'', and written \CFA or Cforall.} is a modern non-object-oriented extension to the C programming language.
+As it is an extension of C, there is already a wealth of existing C code and principles that govern the design of the language.
+Among the goals set out in the original design of \CFA, four points stand out \cite{Bilson03}.
+\begin{enumerate}
+\item The behaviour of standard C code must remain the same when translated by a \CFA compiler as when translated by a C compiler.
+\item Standard C code must be as fast and as small when translated by a \CFA compiler as when translated by a C compiler.
+\item \CFA code must be at least as portable as standard C code.
+\item Extensions introduced by \CFA must be translated in the most efficient way possible.
+\end{enumerate}
+Therefore, these design principles must be kept in mind throughout the design and development of new language features.
+In order to appeal to existing C programmers, great care must be taken to ensure that new features naturally feel like C.
+These goals ensure existing C code-bases can be converted to \CFA incrementally with minimal effort, and C programmers can productively generate \CFA code without training beyond the features being used.
+Unfortunately, \CC is actively diverging from C, so incremental additions require significant effort and training, coupled with multiple legacy design-choices that cannot be updated.
+
+The current implementation of \CFA is a source-to-source translator from \CFA to GNU C \cite{GCCExtensions}.
+
+The remainder of this section describes some of the important features that currently exist in \CFA, to give the reader the necessary context in which the new features presented in this thesis must dovetail.
+
+\subsection{C Background}
+\label{sub:c_background}
+In the context of this work, the term \emph{object} refers to a region of data storage in the execution environment, the contents of which can represent values \cite[p.~6]{C11}.
+
+One of the lesser-known features of standard C is \emph{designations}.
+Designations are similar to named parameters in languages such as Python and Scala, except that they only apply to aggregate initializers.
+Note that in \CFA, designations use a colon separator, rather than an equals sign as in C, because this syntax is one of the few places that conflicts with the new language features.
+\begin{cfacode}
+struct A {
+  int w, x, y, z;
+};
+A a0 = { .x:4 .z:1, .x:8 };
+A a1 = { 1, .y:7, 6 };
+A a2[4] = { [2]:a0, [0]:a1, { .z:3 } };
+// equivalent to
+// A a0 = { 0, 8, 0, 1 };
+// A a1 = { 1, 0, 7, 6 };
+// A a2[4] = { a1, { 0, 0, 0, 3 }, a0, { 0, 0, 0, 0 } };
+\end{cfacode}
+Designations allow specifying the field to initialize by name, rather than by position.
+Any field not explicitly initialized is initialized as if it had static storage duration \cite[p.~141]{C11}.
+A designator specifies the current object for initialization, and as such any undesignated sub-objects pick up where the last initialization left off.
+For example, in the initialization of @a1@, the initializer of @y@ is @7@, and the unnamed initializer @6@ initializes the next sub-object, @z@.
+Later initializers override earlier initializers, so a sub-object for which there is more than one initializer is only initialized by its last initializer.
+These semantics can be seen in the initialization of @a0@, where @x@ is designated twice, and thus initialized to @8@.
+
+C also provides \emph{compound literal} expressions, which provide a first-class mechanism for creating unnamed objects.
+\begin{cfacode}
+struct A { int x, y; };
+int f(A, int);
+int g(int *);
+
+f((A){ 3, 4 }, (int){ 5 } = 10);
+g((int[]){ 1, 2, 3 });
+g(&(int){ 0 });
+\end{cfacode}
+Compound literals create an unnamed object, and result in an lvalue, so it is legal to assign a value into a compound literal or to take its address \cite[p.~86]{C11}.
+Syntactically, compound literals look like a cast operator followed by a brace-enclosed initializer, but semantically are different from a C cast, which only applies basic conversions and coercions and is never an lvalue.
+
+The \CFA translator makes use of several GNU C extensions, including \emph{nested functions} and \emph{attributes}.
+Nested functions make it possible to access data that is lexically in scope in the nested function's body.
+\begin{cfacode}
+int f() {
+  int x = 0;
+  void g() {
+    x++;
+  }
+  g();  // changes x
+}
+\end{cfacode}
+Nested functions come with the usual C caveat that they should not leak into the containing environment, since they are only valid as long as the containing function's stack frame is active.
+
+Attributes make it possible to inform the compiler of certain properties of the code.
+For example, a function can be marked as deprecated, so that legacy APIs can be identified and slowly removed, or as \emph{hot}, so that the compiler knows the function is called frequently and should be aggresively optimized.
+\begin{cfacode}
+__attribute__((deprecated("foo is deprecated, use bar instead")))
+void foo();
+__attribute__((hot)) void bar(); // heavily optimized
+
+foo();  // warning
+bar();
+\end{cfacode}
+
+\subsection{Overloading}
+\label{sub:overloading}
+Overloading is the ability to specify multiple entities with the same name.
+The most common form of overloading is function overloading, wherein multiple functions can be defined with the same name, but with different signatures.
+C provides a small amount of built-in overloading, \eg + is overloaded for the basic types.
+Like in \CC, \CFA allows user-defined overloading based both on the number of parameters and on the types of parameters.
+\begin{cfacode}
+void f(void);  // (1)
+void f(int);   // (2)
+void f(char);  // (3)
+
+f('A');        // selects (3)
+\end{cfacode}
+In this case, there are three @f@ procedures, where @f@ takes either 0 or 1 arguments, and if an argument is provided then it may be of type @int@ or of type @char@.
+Exactly which procedure is executed depends on the number and types of arguments passed.
+If there is no exact match available, \CFA attempts to find a suitable match by examining the C built-in conversion heuristics.
+The \CFA expression resolution algorithm uses a cost function to determine the interpretation that uses the fewest conversions and polymorphic type bindings.
+\begin{cfacode}
+void g(long long);
+
+g(12345);
+\end{cfacode}
+In the above example, there is only one instance of @g@, which expects a single parameter of type @long long@.
+Here, the argument provided has type @int@, but since all possible values of type @int@ can be represented by a value of type @long long@, there is a safe conversion from @int@ to @long long@, and so \CFA calls the provided @g@ routine.
+
+Overloading solves the problem present in C where there can only be one function with a given name, requiring multiple names for functions that perform the same operation but take in different types.
+This can be seen in the example of the absolute value functions C:
+\begin{cfacode}
+// stdlib.h
+int abs(int);
+long int labs(long int);
+long long int llabs(long long int);
+\end{cfacode}
+In \CFA, the functions @labs@ and @llabs@ are replaced by appropriate overloads of @abs@.
+
+In addition to this form of overloading, \CFA also allows overloading based on the number and types of \emph{return} values.
+This extension is a feature that is not available in \CC, but is available in other programming languages such as Ada \cite{Ada95}.
+\begin{cfacode}
+int g();         // (1)
+double g();      // (2)
+
+int x = g();     // selects (1)
+\end{cfacode}
+Here, the only difference between the signatures of the different versions of @g@ is in the return values.
+The result context is used to select an appropriate routine definition.
+In this case, the result of @g@ is assigned into a variable of type @int@, so \CFA prefers the routine that returns a single @int@, because it is an exact match.
+
+Return-type overloading solves similar problems to parameter-list overloading, in that multiple functions that perform similar operations can have the same, but produce different values.
+One use case for this feature is to provide two versions of the @bsearch@ routine:
+\begin{cfacode}
+forall(otype T | { int ?<?( T, T ); })
+T * bsearch(T key, const T * arr, size_t dimension) {
+  int comp(const void * t1, const void * t2) {
+    return *(T *)t1 < *(T *)t2 ? -1 : *(T *)t2 < *(T *)t1 ? 1 : 0;
+  }
+  return (T *)bsearch(&key, arr, dimension, sizeof(T), comp);
+}
+forall(otype T | { int ?<?( T, T ); })
+unsigned int bsearch(T key, const T * arr, size_t dimension) {
+  T *result = bsearch(key, arr, dimension);
+  // pointer subtraction includes sizeof(T)
+  return result ? result - arr : dimension;
+}
+double key = 5.0;
+double vals[10] = { /* 10 floating-point values */ };
+
+double * val = bsearch( 5.0, vals, 10 ); // selection based on return type
+int posn = bsearch( 5.0, vals, 10 );
+\end{cfacode}
+The first version provides a thin wrapper around the C @bsearch@ routine, converting untyped @void *@ to the polymorphic type @T *@, allowing the \CFA compiler to catch errors when the type of @key@, @arr@, and the target at the call-site do not agree.
+The second version provides an alternate return of the index in the array of the selected element, rather than its address.
+
+There are times when a function should logically return multiple values.
+Since a function in standard C can only return a single value, a programmer must either take in additional return values by address, or the function's designer must create a wrapper structure to package multiple return-values.
+For example, the first approach:
+\begin{cfacode}
+int f(int * ret) {        // returns a value through parameter ret
+  *ret = 37;
+  return 123;
+}
+
+int res1, res2;           // allocate return value
+int res1 = g(&res2);      // explicitly pass storage
+\end{cfacode}
+is awkward because it requires the caller to explicitly allocate memory for $n$ result variables, even if they are only temporary values used as a subexpression, or even not used at all.
+The second approach:
+\begin{cfacode}
+struct A {
+  int x, y;
+};
+struct A g() {            // returns values through a structure
+  return (struct A) { 123, 37 };
+}
+struct A res3 = g();
+... res3.x ... res3.y ... // use result values
+\end{cfacode}
+is awkward because the caller has to either learn the field names of the structure or learn the names of helper routines to access the individual return values.
+Both approaches are syntactically unnatural.
+
+In \CFA, it is possible to directly declare a function returning multiple values.
+This extension provides important semantic information to the caller, since return values are only for output.
+\begin{cfacode}
+[int, int] f() {       // no new type
+  return [123, 37];
+}
+\end{cfacode}
+However, the ability to return multiple values is useless without a syntax for accepting the results from the function.
+
+In standard C, return values are most commonly assigned directly into local variables, or are used as the arguments to another function call.
+\CFA allows both of these contexts to accept multiple return values.
+\begin{cfacode}
+int res1, res2;
+[res1, res2] = f();    // assign return values into local variables
+
+void g(int, int);
+g(f());                // pass both return values of f to g
+\end{cfacode}
+As seen in the example, it is possible to assign the results from a return value directly into local variables.
+These local variables can be referenced naturally, without requiring any unpacking as in structured return values.
+Perhaps more interesting is the fact that multiple return values can be passed to multiple parameters seamlessly, as in the call @g(f())@.
+In this call, the return values from @f@ are linked to the parameters of @g@ so that each of the return values is passed directly to the corresponding parameter of @g@, without any explicit storing, unpacking, or additional naming.
+
+An extra quirk introduced by multiple return values is in the resolution of function calls.
+\begin{cfacode}
+int f();            // (1)
+[int, int] f();     // (2)
+
+void g(int, int);
+
+int x, y;
+[x, y] = f();       // selects (2)
+g(f());             // selects (2)
+\end{cfacode}
+In this example, the only possible call to @f@ that can produce the two @int@s required for assigning into the variables @x@ and @y@ is the second option.
+A similar reasoning holds calling the function @g@.
+
+This duality between aggregation and aliasing can be seen in the C standard library in the @div@ and @remquo@ functions, which return the quotient and remainder for a division of integer and floating-point values, respectively.
+\begin{cfacode}
+typedef struct { int quo, rem; } div_t; // from stdlib.h
+div_t div( int num, int den );
+double remquo( double num, double den, int * quo );
+div_t qr = div( 13, 5 );            // return quotient/remainder aggregate
+int q;
+double r = remquo( 13.5, 5.2, &q ); // return remainder, alias quotient
+\end{cfacode}
+@div@ aggregates the quotient/remainder in a structure, while @remquo@ aliases a parameter to an argument.
+Alternatively, a programming language can directly support returning multiple values, \eg in \CFA:
+\begin{lstlisting}
+[int, int] div(int num, int den);               // return two integers
+[double, double] div( double num, double den ); // return two doubles
+int q, r;                     // overloaded variable names
+double q, r;
+[q, r] = div(13, 5);          // select appropriate div and q, r
+[q, r] = div(13.5, 5.2);
+\end{lstlisting}
+
+In \CFA, overloading also applies to operator names, known as \emph{operator overloading}.
+Similar to function overloading, a single operator is given multiple meanings by defining new versions of the operator with different signatures.
+In \CC, this can be done as follows
+\begin{cppcode}
+struct A { int i; };
+A operator+(A x, A y);
+bool operator<(A x, A y);
+\end{cppcode}
+
+In \CFA, the same example can be written as follows.
+\begin{cfacode}
+struct A { int i; };
+A ?+?(A x, A y);    // '?'s represent operands
+int ?<?(A x, A y);
+\end{cfacode}
+Notably, the only difference is syntax.
+Most of the operators supported by \CC for operator overloading are also supported in \CFA.
+Of notable exception are the logical operators (\eg @||@), the sequence operator (\ie @,@), and the member-access operators (\eg @.@ and \lstinline{->}).
+
+Finally, \CFA also permits overloading variable identifiers.
+This feature is not available in \CC.
+\begin{cfacode}
+struct Rational { int numer, denom; };
+int x = 3;               // (1)
+double x = 1.27;         // (2)
+Rational x = { 4, 11 };  // (3)
+
+void g(double);
+
+x += 1;                  // chooses (1)
+g(x);                    // chooses (2)
+Rational y = x;          // chooses (3)
+\end{cfacode}
+In this example, there are three definitions of the variable @x@.
+Based on the context, \CFA attempts to choose the variable whose type best matches the expression context.
+When used judiciously, this feature allows names like @MAX@, @MIN@, and @PI@ to apply across many types.
+
+Finally, the values @0@ and @1@ have special status in standard C.
+In particular, the value @0@ is both an integer and a pointer literal, and thus its meaning depends on the context.
+In addition, several operations can be redefined in terms of other operations and the values @0@ and @1@.
+For example,
+\begin{cfacode}
+int x;
+if (x) {  // if (x != 0)
+  x++;    //   x += 1;
+}
+\end{cfacode}
+Every if- and iteration-statement in C compares the condition with @0@, and every increment and decrement operator is semantically equivalent to adding or subtracting the value @1@ and storing the result.
+Due to these rewrite rules, the values @0@ and @1@ have the types \zero and \one in \CFA, which allow for overloading various operations that connect to @0@ and @1@ \footnote{In the original design of \CFA, @0@ and @1@ were overloadable names \cite[p.~7]{cforall}.}.
+The types \zero and \one have special built-in implicit conversions to the various integral types, and a conversion to pointer types for @0@, which allows standard C code involving @0@ and @1@ to work as normal.
+\begin{cfacode}
+// lvalue is similar to returning a reference in C++
+lvalue Rational ?+=?(Rational *a, Rational b);
+Rational ?=?(Rational * dst, zero_t) {
+  return *dst = (Rational){ 0, 1 };
+}
+
+Rational sum(Rational *arr, int n) {
+  Rational r;
+  r = 0;     // use rational-zero_t assignment
+  for (; n > 0; n--) {
+    r += arr[n-1];
+  }
+  return r;
+}
+\end{cfacode}
+This function takes an array of @Rational@ objects and produces the @Rational@ representing the sum of the array.
+Note the use of an overloaded assignment operator to set an object of type @Rational@ to an appropriate @0@ value.
+
+\subsection{Polymorphism}
+\label{sub:polymorphism}
+In its most basic form, polymorphism grants the ability to write a single block of code that accepts different types.
+In particular, \CFA supports the notion of parametric polymorphism.
+Parametric polymorphism allows a function to be written generically, for all values of all types, without regard to the specifics of a particular type.
+For example, in \CC, the simple identity function for all types can be written as:
+\begin{cppcode}
+template<typename T>
+T identity(T x) { return x; }
+\end{cppcode}
+\CC uses the template mechanism to support parametric polymorphism. In \CFA, an equivalent function can be written as:
+\begin{cfacode}
+forall(otype T)
+T identity(T x) { return x; }
+\end{cfacode}
+Once again, the only visible difference in this example is syntactic.
+Fundamental differences can be seen by examining more interesting examples.
+In \CC, a generic sum function is written as follows:
+\begin{cppcode}
+template<typename T>
+T sum(T *arr, int n) {
+  T t;  // default construct => 0
+  for (; n > 0; n--) t += arr[n-1];
+  return t;
+}
+\end{cppcode}
+Here, the code assumes the existence of a default constructor, assignment operator, and an addition operator over the provided type @T@.
+If any of these required operators are not available, the \CC compiler produces an error message stating which operators could not be found.
+
+A similar sum function can be written in \CFA as follows:
+\begin{cfacode}
+forall(otype T | **R**{ T ?=?(T *, zero_t); T ?+=?(T *, T); }**R**)
+T sum(T *arr, int n) {
+  T t = 0;
+  for (; n > 0; n--) t = t += arr[n-1];
+  return t;
+}
+\end{cfacode}
+The first thing to note here is that immediately following the declaration of @otype T@ is a list of \emph{type assertions} that specify restrictions on acceptable choices of @T@.
+In particular, the assertions above specify that there must be an assignment from \zero to @T@ and an addition assignment operator from @T@ to @T@.
+The existence of an assignment operator from @T@ to @T@ and the ability to create an object of type @T@ are assumed implicitly by declaring @T@ with the @otype@ type-class.
+In addition to @otype@, there are currently two other type-classes.
+
+@dtype@, short for \emph{data type}, serves as the top type for object types; any object type, complete or incomplete, can be bound to a @dtype@ type variable.
+To contrast, @otype@, short for \emph{object type}, is a @dtype@ with known size, alignment, and an assignment operator, and thus bind only to complete object types.
+With this extra information, complete objects can be used in polymorphic code in the same way they are used in monomorphic code, providing familiarity and ease of use.
+The third type-class is @ftype@, short for \emph{function type}, matching only function types.
+The three type parameter kinds are summarized in \autoref{table:types}
+
+\begin{table}[h!]
+  \begin{center}
+    \begin{tabular}{|c||c|c|c||c|c|c|}
+                                                                                                    \hline
+    name    & object type & incomplete type & function type & can assign & can create & has size \\ \hline
+    @otype@ & X           &                 &               & X                & X          & X        \\ \hline
+    @dtype@ & X           & X               &               &                  &            &          \\ \hline
+    @ftype@ &             &                 & X             &                  &            &          \\ \hline
+    \end{tabular}
+  \end{center}
+  \caption{\label{table:types} The different kinds of type parameters in \protect\CFA}
+\end{table}
+
+A major difference between the approaches of \CC and \CFA to polymorphism is that the set of assumed properties for a type is \emph{explicit} in \CFA.
+One of the major limiting factors of \CC's approach is that templates cannot be separately compiled.
+In contrast, the explicit nature of assertions allows \CFA's polymorphic functions to be separately compiled, as the function prototype states all necessary requirements separate from the implementation.
+For example, the prototype for the previous sum function is
+\begin{cfacode}
+forall(otype T | **R**{ T ?=?(T *, zero_t); T ?+=?(T *, T); }**R**)
+T sum(T *arr, int n);
+\end{cfacode}
+With this prototype, a caller in another translation unit knows all of the constraints on @T@, and thus knows all of the operations that need to be made available to @sum@.
+
+In \CFA, a set of assertions can be factored into a \emph{trait}.
+\begin{cfacode}
+trait Addable(otype T) {
+  T ?+?(T, T);
+  T ++?(T);
+  T ?++(T);
+}
+forall(otype T | Addable(T)) void f(T);
+forall(otype T | Addable(T) | { T --?(T); }) T g(T);
+forall(otype T, U | Addable(T) | { T ?/?(T, U); }) U h(T, U);
+\end{cfacode}
+This capability allows specifying the same set of assertions in multiple locations, without the repetition and likelihood of mistakes that come with manually writing them out for each function declaration.
+
+An interesting application of return-type resolution and polymorphism is a polymorphic version of @malloc@.
+\begin{cfacode}
+forall(dtype T | sized(T))
+T * malloc() {
+  return (T*)malloc(sizeof(T)); // call C malloc
+}
+int * x = malloc();     // malloc(sizeof(int))
+double * y = malloc();  // malloc(sizeof(double))
+
+struct S { ... };
+S * s = malloc();       // malloc(sizeof(S))
+\end{cfacode}
+The built-in trait @sized@ ensures that size and alignment information for @T@ is available in the body of @malloc@ through @sizeof@ and @_Alignof@ expressions respectively.
+In calls to @malloc@, the type @T@ is bound based on call-site information, allowing \CFA code to allocate memory without the potential for errors introduced by manually specifying the size of the allocated block.
+
+\subsection{Planned Features}
+
+One of the planned features \CFA is \emph{reference types}.
+At a high level, the current proposal is to add references as a way to cleanup pointer syntax.
+With references, it will be possible to store any address, as with a pointer, with the key difference being that references are automatically dereferenced.
+\begin{cfacode}
+int x = 0;
+int * p = &x;  // needs &
+int & ref = x; // no &
+
+printf("%d %d\n", *p, ref); // pointer needs *, ref does not
+\end{cfacode}
+
+It is possible to add new functions or shadow existing functions for the duration of a scope, using normal C scoping rules.
+One application of this feature is to reverse the order of @qsort@.
+\begin{cfacode}
+forall(otype T | { int ?<?( T, T ); })
+void qsort(const T * arr, size_t size) {
+  int comp(const void * t1, const void * t2) {
+    return *(T *)t1 < *(T *)t2 ? -1 : *(T *)t2 < *(T *)t1 ? 1 : 0;
+  }
+  qsort(arr, dimension, sizeof(T), comp);
+
+}
+double vals[10] = { ... };
+qsort(vals, 10);                // ascending order
+{
+  int ?<?(double x, double y) { // locally override behaviour
+    return x > y;
+  }
+  qsort(vals, 10);              // descending sort
+}
+\end{cfacode}
+Currently, there is no way to \emph{remove} a function from consideration from the duration of a scope.
+For example, it may be desirable to eliminate assignment from a scope, to reduce accidental mutation.
+To address this desire, \emph{deleted functions} are a planned feature for \CFA.
+\begin{cfacode}
+forall(otype T) void f(T *);
+
+int x = 0;
+f(&x);  // might modify x
+{
+  int ?=?(int *, int) = delete;
+  f(&x);   // error, no assignment for int
+}
+\end{cfacode}
+Now, if the deleted function is chosen as the best match, the expression resolver emits an error.
+
+\section{Invariants}
+An \emph{invariant} is a logical assertion that is true for some duration of a program's execution.
+Invariants help a programmer to reason about code correctness and prove properties of programs.
+
+\begin{sloppypar}
+In object-oriented programming languages, type invariants are typically established in a constructor and maintained throughout the object's lifetime.
+These assertions are typically achieved through a combination of access-control modifiers and a restricted interface.
+Typically, data which requires the maintenance of an invariant is hidden from external sources using the \emph{private} modifier, which restricts reads and writes to a select set of trusted routines, including member functions.
+It is these trusted routines that perform all modifications to internal data in a way that is consistent with the invariant, by ensuring that the invariant holds true at the end of the routine call.
+\end{sloppypar}
+
+In C, the @assert@ macro is often used to ensure invariants are true.
+Using @assert@, the programmer can check a condition and abort execution if the condition is not true.
+This powerful tool forces the programmer to deal with logical inconsistencies as they occur.
+For production, assertions can be removed by simply defining the preprocessor macro @NDEBUG@, making it simple to ensure that assertions are 0-cost for a performance intensive application.
+\begin{cfacode}
+struct Rational {
+  int n, d;
+};
+struct Rational create_rational(int n, int d) {
+  assert(d != 0);  // precondition
+  if (d < 0) {
+    n *= -1;
+    d *= -1;
+  }
+  assert(d > 0);  // postcondition
+  // rational invariant: d > 0
+  return (struct Rational) { n, d };
+}
+struct Rational rat_abs(struct Rational r) {
+  assert(r.d > 0); // check invariant, since no access control
+  r.n = abs(r.n);
+  assert(r.d > 0); // ensure function preserves invariant on return value
+  return r;
+}
+\end{cfacode}
+
+Some languages, such as D, provide language-level support for specifying program invariants.
+In addition to providing a C-like @assert@ expression, D allows specifying type invariants that are automatically checked at the end of a constructor, beginning of a destructor, and at the beginning and end of every public member function.
+\begin{dcode}
+import std.math;
+struct Rational {
+  invariant {
+    assert(d > 0, "d <= 0");
+  }
+  int n, d;
+  this(int n, int d) {  // constructor
+    assert(d != 0);
+    this.n = n;
+    this.d = d;
+    // implicitly check invariant
+  }
+  Rational abs() {
+    // implicitly check invariant
+    return Rational(std.math.abs(n), d);
+    // implicitly check invariant
+  }
+}
+\end{dcode}
+The D compiler is able to assume that assertions and invariants hold true and perform optimizations based on those assumptions.
+Note, these invariants are internal to the type's correct behaviour.
+
+Types also have external invariants with the state of the execution environment, including the heap, the open-file table, the state of global variables, etc.
+Since resources are finite and shared (concurrency), it is important to ensure that objects clean up properly when they are finished, restoring the execution environment to a stable state so that new objects can reuse resources.
+
+\section{Resource Management}
+\label{s:ResMgmt}
+
+Resource management is a problem that pervades every programming language.
+
+In standard C, resource management is largely a manual effort on the part of the programmer, with a notable exception to this rule being the program stack.
+The program stack grows and shrinks automatically with each function call, as needed for local variables.
+However, whenever a program needs a variable to outlive the block it is created in, the storage must be allocated dynamically with @malloc@ and later released with @free@.
+This pattern is extended to more complex objects, such as files and sockets, which can also outlive the block where they are created, and thus require their own resource management.
+Once allocated storage escapes\footnote{In garbage collected languages, such as Java, escape analysis \cite{Choi:1999:EAJ:320385.320386} is used to determine when dynamically allocated objects are strictly contained within a function, which allows the optimizer to allocate them on the stack.} a block, the responsibility for deallocating the storage is not specified in a function's type, that is, that the return value is owned by the caller.
+This implicit convention is provided only through documentation about the expectations of functions.
+
+In other languages, a hybrid situation exists where resources escape the allocation block, but ownership is precisely controlled by the language.
+This pattern requires a strict interface and protocol for a data structure, consisting of a pre-initialization and a post-termination call, and all intervening access is done via interface routines.
+This kind of encapsulation is popular in object-oriented programming languages, and like the stack, it takes care of a significant portion of resource-management cases.
+
+For example, \CC directly supports this pattern through class types and an idiom known as RAII \footnote{Resource Acquisition is Initialization} by means of constructors and destructors.
+Constructors and destructors are special routines that are automatically inserted into the appropriate locations to bookend the lifetime of an object.
+Constructors allow the designer of a type to establish invariants for objects of that type, since it is guaranteed that every object must be initialized through a constructor.
+In particular, constructors allow a programmer to ensure that all objects are initially set to a valid state.
+On the other hand, destructors provide a simple mechanism for tearing down an object and resetting the environment in which the object lived.
+RAII ensures that if all resources are acquired in a constructor and released in a destructor, there are no resource leaks, even in exceptional circumstances.
+A type with at least one non-trivial constructor or destructor is henceforth referred to as a \emph{managed type}.
+In the context of \CFA, a non-trivial constructor is either a user defined constructor or an auto-generated constructor that calls a non-trivial constructor.
+
+For the remaining resource ownership cases, a programmer must follow a brittle, explicit protocol for freeing resources or an implicit protocol enforced by the programming language.
+
+In garbage collected languages, such as Java, resources are largely managed by the garbage collector.
+Still, garbage collectors typically focus only on memory management.
+There are many kinds of resources that the garbage collector does not understand, such as sockets, open files, and database connections.
+In particular, Java supports \emph{finalizers}, which are similar to destructors.
+Unfortunately, finalizers are only guaranteed to be called before an object is reclaimed by the garbage collector \cite[p.~373]{Java8}, which may not happen if memory use is not contentious.
+Due to operating-system resource-limits, this is unacceptable for many long running programs.
+Instead, the paradigm in Java requires programmers to manually keep track of all resources \emph{except} memory, leading many novices and experts alike to forget to close files, etc.
+Complicating the picture, uncaught exceptions can cause control flow to change dramatically, leaking a resource that appears on first glance to be released.
+\begin{javacode}
+void write(String filename, String msg) throws Exception {
+  FileOutputStream out = new FileOutputStream(filename);
+  FileOutputStream log = new FileOutputStream(filename);
+  out.write(msg.getBytes());
+  log.write(msg.getBytes());
+  log.close();
+  out.close();
+}
+\end{javacode}
+Any line in this program can throw an exception, which leads to a profusion of finally blocks around many function bodies, since it is not always clear when an exception may be thrown.
+\begin{javacode}
+public void write(String filename, String msg) throws Exception {
+  FileOutputStream out = new FileOutputStream(filename);
+  try {
+    FileOutputStream log = new FileOutputStream("log.txt");
+    try {
+      out.write(msg.getBytes());
+      log.write(msg.getBytes());
+    } finally {
+      log.close();
+    }
+  } finally {
+    out.close();
+  }
+}
+\end{javacode}
+In Java 7, a new \emph{try-with-resources} construct was added to alleviate most of the pain of working with resources, but ultimately it still places the burden squarely on the user rather than on the library designer.
+Furthermore, for complete safety this pattern requires nested objects to be declared separately, otherwise resources that can throw an exception on close can leak nested resources \footnote{Since close is only guaranteed to be called on objects declared in the try-list and not objects passed as constructor parameters, the @B@ object may not be closed in @new A(new B())@ if @A@'s close raises an exception.} \cite{TryWithResources}.
+\begin{javacode}
+public void write(String filename, String msg) throws Exception {
+  try (  // try-with-resources
+    FileOutputStream out = new FileOutputStream(filename);
+    FileOutputStream log = new FileOutputStream("log.txt");
+  ) {
+    out.write(msg.getBytes());
+    log.write(msg.getBytes());
+  } // automatically closes out and log in every exceptional situation
+}
+\end{javacode}
+Variables declared as part of a try-with-resources statement must conform to the @AutoClosable@ interface, and the compiler implicitly calls @close@ on each of the variables at the end of the block.
+Depending on when the exception is raised, both @out@ and @log@ are null, @log@ is null, or both are non-null, therefore, the cleanup for these variables at the end is automatically guarded and conditionally executed to prevent null-pointer exceptions.
+
+While Rust \cite{Rust} does not enforce the use of a garbage collector, it does provide a manual memory management environment, with a strict ownership model that automatically frees allocated memory and prevents common memory management errors.
+In particular, a variable has ownership over its associated value, which is freed automatically when the owner goes out of scope.
+Furthermore, values are \emph{moved} by default on assignment, rather than copied, which invalidates the previous variable binding.
+\begin{rustcode}
+struct S {
+  x: i32
+}
+let s = S { x: 123 };
+let z = s;           // move, invalidate s
+println!("{}", s.x); // error, s has been moved
+\end{rustcode}
+Types can be made copyable by implementing the @Copy@ trait.
+
+Rust allows multiple unowned views into an object through references, also known as borrows, provided that a reference does not outlive its referent.
+A mutable reference is allowed only if it is the only reference to its referent, preventing data race errors and iterator invalidation errors.
+\begin{rustcode}
+let mut x = 10;
+{
+  let y = &x;
+  let z = &x;
+  println!("{} {}", y, z); // prints 10 10
+}
+{
+  let y = &mut x;
+  // let z1 = &x;     // not allowed, have mutable reference
+  // let z2 = &mut x; // not allowed, have mutable reference
+  *y = 5;
+  println!("{}", y); // prints 5
+}
+println!("{}", x); // prints 5
+\end{rustcode}
+Since references are not owned, they do not release resources when they go out of scope.
+There is no runtime cost imposed on these restrictions, since they are enforced at compile-time.
+
+Rust provides RAII through the @Drop@ trait, allowing arbitrary code to execute when the object goes out of scope, providing automatic clean up of auxiliary resources, much like a \CC program.
+\begin{rustcode}
+struct S {
+  name: &'static str
+}
+
+impl Drop for S {  // RAII for S
+  fn drop(&mut self) {  // destructor
+    println!("dropped {}", self.name);
+  }
+}
+
+{
+  let x = S { name: "x" };
+  let y = S { name: "y" };
+} // prints "dropped y" "dropped x"
+\end{rustcode}
+
+% D has constructors and destructors that are worth a mention (under classes) https://dlang.org/spec/spec.html
+%  also https://dlang.org/spec/struct.html#struct-constructor
+% these are declared in the struct, so they're closer to C++ than to CFA, at least syntactically. Also do not allow for default constructors
+% D has a GC, which already makes the situation quite different from C/C++
+The programming language D also manages resources with constructors and destructors \cite{D}.
+In D, @struct@s are stack allocatable and managed via scoping like in \CC, whereas @class@es are managed automatically by the garbage collector.
+Like Java, using the garbage collector means that destructors are called indeterminately, requiring the use of finally statements to ensure dynamically allocated resources that are not managed by the garbage collector, such as open files, are cleaned up.
+Since D supports RAII, it is possible to use the same techniques as in \CC to ensure that resources are released in a timely manner.
+Finally, D provides a scope guard statement, which allows an arbitrary statement to be executed at normal scope exit with \emph{success}, at exceptional scope exit with \emph{failure}, or at normal and exceptional scope exit with \emph{exit}. % https://dlang.org/spec/statement.html#ScopeGuardStatement
+It has been shown that the \emph{exit} form of the scope guard statement can be implemented in a library in \CC \cite{ExceptSafe}.
+
+To provide managed types in \CFA, new kinds of constructors and destructors are added to \CFA and discussed in Chapter 2.
+
+\section{Tuples}
+\label{s:Tuples}
+In mathematics, tuples are finite-length sequences which, unlike sets, are ordered and allow duplicate elements.
+In programming languages, tuples provide fixed-sized heterogeneous lists of elements.
+Many programming languages have tuple constructs, such as SETL, \KWC, ML, and Scala.
+
+\KWC, a predecessor of \CFA, introduced tuples to C as an extension of the C syntax, rather than as a full-blown data type \cite{Till89}.
+In particular, Till noted that C already contains a tuple context in the form of function parameter lists.
+The main contributions of that work were in the form of adding tuple contexts to assignment in the form of multiple assignment and mass assignment (discussed in detail in section \ref{s:TupleAssignment}), function return values (see section \ref{s:MRV_Functions}), and record field access (see section \ref{s:MemberAccessTuple}).
+Adding tuples to \CFA has previously been explored by Esteves \cite{Esteves04}.
+
+The design of tuples in \KWC took much of its inspiration from SETL \cite{SETL}.
+SETL is a high-level mathematical programming language, with tuples being one of the primary data types.
+Tuples in SETL allow a number of operations, including subscripting, dynamic expansion, and multiple assignment.
+
+\CCeleven introduced @std::tuple@ as a library variadic template struct.
+Tuples are a generalization of @std::pair@, in that they allow for arbitrary length, fixed-size aggregation of heterogeneous values.
+\begin{cppcode}
+tuple<int, int, int> triple(10, 20, 30);
+get<1>(triple); // access component 1 => 20
+
+tuple<int, double> f();
+int i;
+double d;
+tie(i, d) = f(); // assign fields of return value into local variables
+
+tuple<int, int, int> greater(11, 0, 0);
+triple < greater; // true
+\end{cppcode}
+Tuples are simple data structures with few specific operations.
+In particular, it is possible to access a component of a tuple using @std::get<N>@.
+Another interesting feature is @std::tie@, which creates a tuple of references, allowing assignment of the results of a tuple-returning function into separate local variables, without requiring a temporary variable.
+Tuples also support lexicographic comparisons, making it simple to write aggregate comparators using @std::tie@.
+
+There is a proposal for \CCseventeen called \emph{structured bindings} \cite{StructuredBindings}, that introduces new syntax to eliminate the need to pre-declare variables and use @std::tie@ for binding the results from a function call.
+\begin{cppcode}
+tuple<int, double> f();
+auto [i, d] = f(); // unpacks into new variables i, d
+
+tuple<int, int, int> triple(10, 20, 30);
+auto & [t1, t2, t3] = triple;
+t2 = 0; // changes middle element of triple
+
+struct S { int x; double y; };
+S s = { 10, 22.5 };
+auto [x, y] = s; // unpack s
+\end{cppcode}
+Structured bindings allow unpacking any structure with all public non-static data members into fresh local variables.
+The use of @&@ allows declaring new variables as references, which is something that cannot be done with @std::tie@, since \CC references do not support rebinding.
+This extension requires the use of @auto@ to infer the types of the new variables, so complicated expressions with a non-obvious type must be documented with some other mechanism.
+Furthermore, structured bindings are not a full replacement for @std::tie@, as it always declares new variables.
+
+Like \CC, D provides tuples through a library variadic-template structure.
+In D, it is possible to name the fields of a tuple type, which creates a distinct type.
+% http://dlang.org/phobos/std_typecons.html
+\begin{dcode}
+Tuple!(float, "x", float, "y") point2D;
+Tuple!(float, float) float2;  // different type from point2D
+
+point2D[0]; // access first element
+point2D.x;  // access first element
+
+float f(float x, float y) {
+  return x+y;
+}
+
+f(point2D.expand);
+\end{dcode}
+Tuples are 0-indexed and can be subscripted using an integer or field name, if applicable.
+The @expand@ method produces the components of the tuple as a list of separate values, making it possible to call a function that takes $N$ arguments using a tuple with $N$ components.
+
+Tuples are a fundamental abstraction in most functional programming languages, such as Standard ML \cite{sml}.
+A function in SML always accepts exactly one argument.
+There are two ways to mimic multiple argument functions: the first through currying and the second by accepting tuple arguments.
+\begin{smlcode}
+fun fact (n : int) =
+  if (n = 0) then 1
+  else n*fact(n-1)
+
+fun binco (n: int, k: int) =
+  real (fact n) / real (fact k * fact (n-k))
+\end{smlcode}
+Here, the function @binco@ appears to take 2 arguments, but it actually takes a single argument which is implicitly decomposed via pattern matching.
+Tuples are a foundational tool in SML, allowing the creation of arbitrarily-complex structured data-types.
+
+Scala, like \CC, provides tuple types through the standard library \cite{Scala}.
+Scala provides tuples of size 1 through 22 inclusive through generic data structures.
+Tuples support named access and subscript access, among a few other operations.
+\begin{scalacode}
+val a = new Tuple3(0, "Text", 2.1) // explicit creation
+val b = (6, 'a', 1.1f)       // syntactic sugar: Tuple3[Int, Char, Float]
+val (i, _, d) = triple       // extractor syntax, ignore middle element
+
+println(a._2)                // named access => print "Text"
+println(b.productElement(0)) // subscript access => print 6
+\end{scalacode}
+In Scala, tuples are primarily used as simple data structures for carrying around multiple values or for returning multiple values from a function.
+The 22-element restriction is an odd and arbitrary choice, but in practice it does not cause problems since large tuples are uncommon.
+Subscript access is provided through the @productElement@ method, which returns a value of the top-type @Any@, since it is impossible to receive a more precise type from a general subscripting method due to type erasure.
+The disparity between named access beginning at @_1@ and subscript access starting at @0@ is likewise an oddity, but subscript access is typically avoided since it discards type information.
+Due to the language's pattern matching facilities, it is possible to extract the values from a tuple into named variables, which is a more idiomatic way of accessing the components of a tuple.
+
+
+\Csharp also has tuples, but has similarly strange limitations, allowing tuples of size up to 7 components. % https://msdn.microsoft.com/en-us/library/system.tuple(v=vs.110).aspx
+The officially supported workaround for this shortcoming is to nest tuples in the 8th component.
+\Csharp allows accessing a component of a tuple by using the field @Item$N$@ for components 1 through 7, and @Rest@ for the nested tuple.
+
+In Python \cite{Python}, tuples are immutable sequences that provide packing and unpacking operations.
+While the tuple itself is immutable, and thus does not allow the assignment of components, there is nothing preventing a component from being internally mutable.
+The components of a tuple can be accessed by unpacking into multiple variables, indexing, or via field name, like D.
+Tuples support multiple assignment through a combination of packing and unpacking, in addition to the common sequence operations.
+
+Swift \cite{Swift}, like D, provides named tuples, with components accessed by name, index, or via extractors.
+Tuples are primarily used for returning multiple values from a function.
+In Swift, @Void@ is an alias for the empty tuple, and there are no single element tuples.
+
+Tuples comparable to those described above are added to \CFA and discussed in Chapter 3.
+
+\section{Variadic Functions}
+\label{sec:variadic_functions}
+In statically-typed programming languages, functions are typically defined to receive a fixed number of arguments of specified types.
+Variadic argument functions provide the ability to define a function that can receive a theoretically unbounded number of arguments.
+
+C provides a simple implementation of variadic functions.
+A function whose parameter list ends with @, ...@ is a variadic function.
+Among the most common variadic functions is @printf@.
+\begin{cfacode}
+int printf(const char * fmt, ...);
+printf("%d %g %c %s", 10, 3.5, 'X', "a string");
+\end{cfacode}
+Through the use of a format string, C programmers can communicate argument type information to @printf@, allowing C programmers to print any of the standard C data types.
+Still, @printf@ is extremely limited, since the format codes are specified by the C standard, meaning users cannot define their own format codes to extend @printf@ for new data types or new formatting rules.
+
+\begin{sloppypar}
+C provides manipulation of variadic arguments through the @va_list@ data type, which abstracts details of the manipulation of variadic arguments.
+Since the variadic arguments are untyped, it is up to the function to interpret any data that is passed in.
+Additionally, the interface to manipulate @va_list@ objects is essentially limited to advancing to the next argument, without any built-in facility to determine when the last argument is read.
+This limitation requires the use of an \emph{argument descriptor} to pass information to the function about the structure of the argument list, including the number of arguments and their types.
+The format string in @printf@ is one such example of an argument descriptor.
+\begin{cfacode}
+int f(const char * fmt, ...) {
+  va_list args;
+  va_start(args, fmt);  // initialize va_list
+  for (const char * c = fmt; *c != '\0'; ++c) {
+    if (*c == '%') {
+      ++c;
+      switch (*c) {
+        case 'd': {
+          int i = va_arg(args, int);  // have to specify type
+          // ...
+          break;
+        }
+        case 'g': {
+          double d = va_arg(args, double);
+          // ...
+          break;
+        }
+        ...
+      }
+    }
+  }
+  va_end(args);
+  return ...;
+}
+\end{cfacode}
+Every case must be handled explicitly, since the @va_arg@ macro requires a type argument to determine how the next set of bytes is to be interpreted.
+Furthermore, if the user makes a mistake, compile-time checking is typically restricted to standard format codes and their corresponding types.
+In general, this means that C's variadic functions are not type-safe, making them difficult to use properly.
+\end{sloppypar}
+
+% When arguments are passed to a variadic function, they undergo \emph{default argument promotions}.
+% Specifically, this means that
+
+\CCeleven added support for \emph{variadic templates}, which add much needed type-safety to C's variadic landscape.
+It is possible to use variadic templates to define variadic functions and variadic data types.
+\begin{cppcode}
+void print(int);
+void print(char);
+void print(double);
+...
+
+void f() {}    // base case
+
+template<typename T, typename... Args>
+void f(const T & arg, const Args &... rest) {
+  print(arg);  // print the current element
+  f(rest...);  // handle remaining arguments recursively
+}
+\end{cppcode}
+Variadic templates work largely through recursion on the \emph{parameter pack}, which is the argument with @...@ following its type.
+A parameter pack matches 0 or more elements, which can be types or expressions depending on the context.
+Like other templates, variadic template functions rely on an implicit set of constraints on a type, in this example a @print@ routine.
+That is, it is possible to use the @f@ routine on any type provided there is a corresponding @print@ routine, making variadic templates fully open to extension, unlike variadic functions in C.
+
+Recent \CC standards (\CCfourteen, \CCseventeen) expand on the basic premise by allowing variadic template variables and providing convenient expansion syntax to remove the need for recursion in some cases, amongst other things.
+
+% D has variadic templates that deserve a mention http://dlang.org/ctarguments.html
+
+In Java, a variadic function appears similar to a C variadic function in syntax.
+\begin{javacode}
+int sum(int... args) {
+  int s = 0;
+  for (int x : args) {
+    s += x;
+  }
+  return s;
+}
+
+void print(Object... objs) {
+  for (Object obj : objs) {
+    System.out.print(obj);
+  }
+}
+
+print("The sum from 1 to 10 is ", sum(1,2,3,4,5,6,7,8,9,10), ".\n");
+\end{javacode}
+The key difference is that Java variadic functions are type-safe, because they specify the type of the argument immediately prior to the ellipsis.
+In Java, variadic arguments are syntactic sugar for arrays, allowing access to length, subscripting operations, and for-each iteration on the variadic arguments, among other things.
+Since the argument type is specified explicitly, the top-type @Object@ can be used to accept arguments of any type, but to do anything interesting on the argument requires a down-cast to a more specific type, landing Java in a similar situation to C in that writing a function open to extension is difficult.
+
+The other option is to restrict the number of types that can be passed to the function by using a more specific type.
+Unfortunately, Java's use of nominal inheritance means that types must explicitly inherit from classes or interfaces in order to be considered a subclass.
+The combination of these two issues greatly restricts the usefulness of variadic functions in Java.
+
+Type-safe variadic functions are added to \CFA and discussed in Chapter 4.
+
+\section{Contributions}
+\label{s:contributions}
+
+No prior work on constructors or destructors had been done for \CFA.
+I did both the design and implementation work.
+While the overall design is based on constructors and destructors in object-oriented C++, it had to be re-engineered into non-object-oriented \CFA.
+I also had to make changes to the \CFA expression-resolver to integrate constructors and destructors into the type system.
+
+Prior work on the design of tuples for \CFA was done by Till, and some initial implementation work by Esteves.
+I largely took the Till design but added tuple indexing, which exists in a number of programming languages with tuples, simplified the implicit tuple conversions, and integrated with the \CFA polymorphism and assertion satisfaction model.
+I did a new implementation of tuples, and extensively
+augmented initial work by Bilson to incorporate tuples into the \CFA expression-resolver and type-unifier.
+
+No prior work on variadic functions had been done for \CFA.
+I did both the design and implementation work.
+While the overall design is based on variadic templates in C++, my design is novel in the way it is incorporated into the \CFA polymorphism model, and is engineered into \CFA so it dovetails with tuples.
Index: doc/theses/rob_schluntz/thesis-frontpgs.tex
===================================================================
--- doc/theses/rob_schluntz/thesis-frontpgs.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/thesis-frontpgs.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,163 @@
+% T I T L E   P A G E
+% -------------------
+% Last updated May 24, 2011, by Stephen Carr, IST-Client Services
+% The title page is counted as page `i' but we need to suppress the
+% page number.  We also don't want any headers or footers.
+\pagestyle{empty}
+\pagenumbering{roman}
+
+% The contents of the title page are specified in the "titlepage"
+% environment.
+\begin{titlepage}
+        \begin{center}
+        \vspace*{1.0cm}
+
+        \Huge
+        {\bf Resource Management and Tuples in \CFA}
+
+        \vspace*{1.0cm}
+
+        \normalsize
+        by \\
+
+        \vspace*{1.0cm}
+
+        \Large
+        Robert Schluntz \\
+
+        \vspace*{3.0cm}
+
+        \normalsize
+        A thesis \\
+        presented to the University of Waterloo \\
+        in fulfillment of the \\
+        thesis requirement for the degree of \\
+        Master of Mathematics \\
+        in \\
+        Computer Science \\
+
+        \vspace*{2.0cm}
+
+        Waterloo, Ontario, Canada, 2017 \\
+
+        \vspace*{1.0cm}
+
+        \copyright\ Robert Schluntz 2017 \\
+        \end{center}
+\end{titlepage}
+
+% The rest of the front pages should contain no headers and be numbered using Roman numerals starting with `ii'
+\pagestyle{plain}
+\setcounter{page}{2}
+
+\cleardoublepage % Ends the current page and causes all figures and tables that have so far appeared in the input to be printed.
+% In a two-sided printing style, it also makes the next page a right-hand (odd-numbered) page, producing a blank page if necessary.
+
+
+
+% D E C L A R A T I O N   P A G E
+% -------------------------------
+  % The following is the sample Delaration Page as provided by the GSO
+  % December 13th, 2006.  It is designed for an electronic thesis.
+  \noindent
+I hereby declare that I am the sole author of this thesis. This is a true copy of the thesis, including any required final revisions, as accepted by my examiners.
+
+  \bigskip
+
+  \noindent
+I understand that my thesis may be made electronically available to the public.
+
+\cleardoublepage
+%\newpage
+
+% A B S T R A C T
+% ---------------
+
+\begin{center}\textbf{Abstract}\end{center}
+
+\CFA is a modern, non-object-oriented extension of the C programming language.
+This thesis addresses several critical deficiencies of C, notably: resource management, a limited function-return mechanism, and unsafe variadic functions.
+To solve these problems, two fundamental language features are introduced: tuples and constructors/destructors.
+While these features exist in prior programming languages, the contribution of this work is engineering these features into a highly complex type system.
+C is an established language with a dedicated user-base.
+An important goal is to add new features in a way that naturally feels like C, to appeal to this core user-base, and due to huge amounts of legacy code, maintaining backwards compatibility is crucial.
+
+\cleardoublepage
+%\newpage
+
+% A C K N O W L E D G E M E N T S
+% -------------------------------
+
+\begin{center}\textbf{Acknowledgements}\end{center}
+
+I would like to thank my supervisor, Professor Peter Buhr, for all of his help, including reading the many drafts of this thesis and providing guidance throughout my degree.
+This work would not have been as enjoyable, nor would it have been as strong without Peter's knowledge, help, and encouragement.
+
+I would like to thank my readers, Professors Gregor Richards and Patrick Lam for all of their helpful feedback.
+
+Thanks to Aaron Moss and Thierry Delisle for many helpful discussions, both work-related and not, and for all of the work they have put into the \CFA project.
+This thesis would not have been the same without their efforts.
+
+I thank Glen Ditchfield and Richard Bilson, for all of their help with both the design and implementation of \CFA.
+
+I thank my partner, Erin Blackmere, for all of her love and support.
+Without her, I would not be who I am today.
+
+Thanks to my parents, Bob and Jackie Schluntz, for their love and support throughout my life, and for always encouraging me to be my best.
+
+Thanks to my best friends, Travis Bartlett, Abraham Dubrisingh, and Kevin Wu, whose companionship is always appreciated.
+The time we've spent together over the past 4 years has always kept me entertained.
+An extra shout-out to Kaleb Alway, Max Bardakov, Ten Bradley, and Ed Lee, with whom I've shared many a great meal; thank you for being my friend.
+
+Finally, I would like to acknowledge financial support in the form of a David R. Cheriton Graduate Scholarship and a corporate partnership with Huawei Ltd.
+
+\cleardoublepage
+%\newpage
+
+% % D E D I C A T I O N
+% % -------------------
+
+% \begin{center}\textbf{Dedication}\end{center}
+
+% % This is dedicated to the one I love.
+% TODO
+% \cleardoublepage
+% %\newpage
+
+% T A B L E   O F   C O N T E N T S
+% ---------------------------------
+\renewcommand\contentsname{Table of Contents}
+\tableofcontents
+\cleardoublepage
+\phantomsection
+%\newpage
+
+% L I S T   O F   T A B L E S
+% ---------------------------
+\addcontentsline{toc}{chapter}{List of Tables}
+\listoftables
+\cleardoublepage
+\phantomsection		% allows hyperref to link to the correct page
+%\newpage
+
+% % L I S T   O F   F I G U R E S
+% % -----------------------------
+% \addcontentsline{toc}{chapter}{List of Figures}
+% \listoffigures
+% \cleardoublepage
+% \phantomsection		% allows hyperref to link to the correct page
+% %\newpage
+
+% L I S T   O F   S Y M B O L S
+% -----------------------------
+% To include a Nomenclature section
+% \addcontentsline{toc}{chapter}{\textbf{Nomenclature}}
+% \renewcommand{\nomname}{Nomenclature}
+% \printglossary
+% \cleardoublepage
+% \phantomsection % allows hyperref to link to the correct page
+% \newpage
+
+% Change page numbering back to Arabic numerals
+\pagenumbering{arabic}
+
Index: doc/theses/rob_schluntz/thesis.bib
===================================================================
--- doc/theses/rob_schluntz/thesis.bib	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/thesis.bib	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,67 @@
+@article{Choi:1999:EAJ:320385.320386,
+  author = {Choi, Jong-Deok and Gupta, Manish and Serrano, Mauricio and Sreedhar, Vugranam C. and Midkiff, Sam},
+  title = {Escape Analysis for Java},
+  journal = {SIGPLAN Not.},
+  issue_date = {Oct. 1999},
+  volume = {34},
+  number = {10},
+  month = oct,
+  year = {1999},
+  issn = {0362-1340},
+  pages = {1--19},
+  numpages = {19},
+  url = {http://doi.acm.org/10.1145/320385.320386},
+  doi = {10.1145/320385.320386},
+  acmid = {320386},
+  publisher = {ACM},
+  address = {New York, NY, USA},
+}
+
+@online{TryWithResources,
+  author = {Julien Ponge},
+  contributer = {rschlunt@uwaterloo.ca},
+  title = {Better Resource Management with Java SE 7: Beyond Syntactic Sugar},
+  year = 2011,
+  url = {http://www.oracle.com/technetwork/articles/java/trywithresources-401775.html},
+  note = {\url{http://www.oracle.com/technetwork/articles/java/trywithresources-401775.html}},
+  urldate = {2017-04-03}
+}
+
+@online{ExceptSafe,
+  author = {Andrei Alexandrescu and Petru Marginean},
+  contributer = {rschlunt@uwaterloo.ca},
+  title = {Generic: Change the Way You Write Exception-Safe Code - Forever},
+  year = 2000,
+  url = {http://www.drdobbs.com/cpp/generic-change-the-way-you-write-excepti/184403758},
+  note = {\url{http://www.drdobbs.com/cpp/generic-change-the-way-you-write-excepti/184403758}},
+  urldate = {2017-04-03}
+}
+
+@manual{Swift,
+  keywords  = {Swift programming language},
+  contributer = {pabuhr@plg},
+  title = {The {Swift} Programming Language (Swift 3.1)},
+  organization= {Apple Inc.},
+  year  = 2017,
+  note  = {\url{https://developer.apple.com/library/content/documentation/Swift/Conceptual/Swift_Programming_Language/AboutTheLanguageReference.html}},
+}
+
+@article{StructuredBindings,
+  author = {Herb Sutter and Bjarne Stroustrup and Gabriel Dos Reis},
+  title = {Structured bindings},
+  issue_date = {2015-10-14},
+  month = oct,
+  year = {2015},
+  pages = {1--6},
+  numpages = {6},
+  note = {\url{http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/p0144r0.pdf}},
+}
+
+@manual{atexit,
+  keywords  = {The Linux Programmer's Manual atexit},
+  contributer = {rschlunt@uwaterloo.ca},
+  title = {The Linux Programmer's Manual},
+  organization= {The GNU Project},
+  year  = 2017,
+  note  = {\url{http://man7.org/linux/man-pages/man3/atexit.3.html}},
+}
Index: doc/theses/rob_schluntz/thesis.tex
===================================================================
--- doc/theses/rob_schluntz/thesis.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/thesis.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,298 @@
+% uWaterloo Thesis Template for LaTeX
+% Last Updated May 24, 2011 by Stephen Carr, IST Client Services
+% FOR ASSISTANCE, please send mail to rt-IST-CSmathsci@ist.uwaterloo.ca
+
+% Effective October 2006, the University of Waterloo
+% requires electronic thesis submission. See the uWaterloo thesis regulations at
+% http://www.grad.uwaterloo.ca/Thesis_Regs/thesistofc.asp.
+
+% DON'T FORGET TO ADD YOUR OWN NAME AND TITLE in the "hyperref" package
+% configuration below. THIS INFORMATION GETS EMBEDDED IN THE PDF FINAL PDF DOCUMENT.
+% You can view the information if you view Properties of the PDF document.
+
+% Many faculties/departments also require one or more printed
+% copies. This template attempts to satisfy both types of output.
+% It is based on the standard "book" document class which provides all necessary
+% sectioning structures and allows multi-part theses.
+
+% DISCLAIMER
+% To the best of our knowledge, this template satisfies the current uWaterloo requirements.
+% However, it is your responsibility to assure that you have met all
+% requirements of the University and your particular department.
+% Many thanks to the feedback from many graduates that assisted the development of this template.
+
+% -----------------------------------------------------------------------
+
+% By default, output is produced that is geared toward generating a PDF
+% version optimized for viewing on an electronic display, including
+% hyperlinks within the PDF.
+
+% E.g. to process a thesis called "mythesis.tex" based on this template, run:
+
+% pdflatex mythesis	-- first pass of the pdflatex processor
+% bibtex mythesis	-- generates bibliography from .bib data file(s)
+% pdflatex mythesis	-- fixes cross-references, bibliographic references, etc
+% pdflatex mythesis	-- fixes cross-references, bibliographic references, etc
+
+% If you use the recommended LaTeX editor, Texmaker, you would open the mythesis.tex
+% file, then click the pdflatex button. Then run BibTeX (under the Tools menu).
+% Then click the pdflatex button two more times. If you have an index as well,
+% you'll need to run MakeIndex from the Tools menu as well, before running pdflatex
+% the last two times.
+
+% N.B. The "pdftex" program allows graphics in the following formats to be
+% included with the "\includegraphics" command: PNG, PDF, JPEG, TIFF
+% Tip 1: Generate your figures and photos in the size you want them to appear
+% in your thesis, rather than scaling them with \includegraphics options.
+% Tip 2: Any drawings you do should be in scalable vector graphic formats:
+% SVG, PNG, WMF, EPS and then converted to PNG or PDF, so they are scalable in
+% the final PDF as well.
+% Tip 3: Photographs should be cropped and compressed so as not to be too large.
+
+% To create a PDF output that is optimized for double-sided printing:
+%
+% 1) comment-out the \documentclass statement in the preamble below, and
+% un-comment the second \documentclass line.
+%
+% 2) change the value assigned below to the boolean variable
+% "PrintVersion" from "false" to "true".
+
+% --------------------- Start of Document Preamble -----------------------
+
+% Specify the document class, default style attributes, and page dimensions
+% For hyperlinked PDF, suitable for viewing on a computer, use this:
+\PassOptionsToPackage{
+dvipsnames
+% ,monochrome % toggle black and white mode
+}{xcolor}
+\PassOptionsToPackage{pdftex}{graphicx}
+\documentclass[letterpaper,12pt,titlepage,oneside,final]{book}
+
+% For PDF, suitable for double-sided printing, change the PrintVersion variable below
+% to "true" and use this \documentclass line instead of the one above:
+% \documentclass[letterpaper,12pt,titlepage,openright,twoside,final]{book}
+
+\usepackage[T1]{fontenc}                                % allow Latin1 (extended ASCII) characters
+\usepackage{textcomp}
+% \usepackage[utf8]{inputenc}
+% \usepackage[latin1]{inputenc}
+\usepackage{fullpage,times,comment}
+% \usepackage{epic,eepic}
+\usepackage{upquote}                                    % switch curled `'" to straight
+% \usepackage{calc}
+\usepackage{xspace}
+% \usepackage{graphicx}
+\usepackage{varioref}                                   % extended references
+\usepackage{listings}                                   % format program code
+% \usepackage[flushmargin]{footmisc}                      % support label/reference in footnote
+% \usepackage{latexsym}                                   % \Box glyph
+% \usepackage{mathptmx}                                   % better math font with "times"
+% \usepackage[usenames]{color}
+% \usepackage[pagewise]{lineno}
+% \renewcommand{\linenumberfont}{\scriptsize\sffamily}
+\usepackage{courier}
+\input{common}                                          % bespoke macros used in the document
+
+\usepackage{bigfoot}
+
+\interfootnotelinepenalty=10000
+
+% Some LaTeX commands I define for my own nomenclature.
+% If you have to, it's better to change nomenclature once here than in a
+% million places throughout your thesis!
+\newcommand{\package}[1]{\textbf{#1}} % package names in bold text
+\newcommand{\cmmd}[1]{\textbackslash\texttt{#1}} % command name in tt font
+\newcommand{\href}[1]{#1} % does nothing, but defines the command so the
+    % print-optimized version will ignore \href tags (redefined by hyperref pkg).
+%\newcommand{\texorpdfstring}[2]{#1} % does nothing, but defines the command
+% Anything defined here may be redefined by packages added below...
+
+% This package allows if-then-else control structures.
+\usepackage{ifthen}
+\newboolean{PrintVersion}
+\setboolean{PrintVersion}{false}
+% CHANGE THIS VALUE TO "true" as necessary, to improve printed results for hard copies
+% by overriding some options of the hyperref package below.
+
+%\usepackage{nomencl} % For a nomenclature (optional; available from ctan.org)
+\usepackage{amsmath,amssymb,amstext} % Lots of math symbols and environments
+\usepackage[pdftex]{graphicx} % For including graphics N.B. pdftex graphics driver
+
+\usepackage{xcolor}
+\usepackage{listings}
+
+\input{cfa-format.tex}
+
+% Hyperlinks make it very easy to navigate an electronic document.
+% In addition, this is where you should specify the thesis title
+% and author as they appear in the properties of the PDF document.
+% Use the "hyperref" package
+% N.B. HYPERREF MUST BE THE LAST PACKAGE LOADED; ADD ADDITIONAL PKGS ABOVE
+\usepackage[pdftex,letterpaper=true,pagebackref=false]{hyperref} % with basic options
+		% N.B. pagebackref=true provides links back from the References to the body text. This can cause trouble for printing.
+\hypersetup{
+    plainpages=false,       % needed if Roman numbers in frontpages
+    pdfpagelabels=true,     % adds page number as label in Acrobat's page count
+    bookmarks=true,         % show bookmarks bar?
+    unicode=false,          % non-Latin characters in Acrobat's bookmarks
+    pdftoolbar=true,        % show Acrobat's toolbar?
+    pdfmenubar=true,        % show Acrobat's menu?
+    pdffitwindow=false,     % window fit to page when opened
+    pdfstartview={FitH},    % fits the width of the page to the window
+    pdftitle={Resource Management and Tuples in \CFA},    % title: CHANGE THIS TEXT!
+    pdfauthor={Rob Schluntz},    % author: CHANGE THIS TEXT! and uncomment this line
+    pdfsubject={Programming Languages},  % subject: CHANGE THIS TEXT! and uncomment this line
+%    pdfkeywords={keyword1} {key2} {key3}, % list of keywords, and uncomment this line if desired
+    pdfnewwindow=true,      % links in new window
+    colorlinks=true,        % false: boxed links; true: colored links
+    linkcolor=blue,         % color of internal links
+    citecolor=green,        % color of links to bibliography
+    filecolor=magenta,      % color of file links
+    urlcolor=cyan           % color of external links
+}
+\ifthenelse{\boolean{PrintVersion}}{   % for improved print quality, change some hyperref options
+\hypersetup{	% override some previously defined hyperref options
+%    colorlinks,%
+    citecolor=black,%
+    filecolor=black,%
+    linkcolor=black,%
+    urlcolor=black}
+}{} % end of ifthenelse (no else)
+
+% Setting up the page margins...
+% uWaterloo thesis requirements specify a minimum of 1 inch (72pt) margin at the
+% top, bottom, and outside page edges and a 1.125 in. (81pt) gutter
+% margin (on binding side). While this is not an issue for electronic
+% viewing, a PDF may be printed, and so we have the same page layout for
+% both printed and electronic versions, we leave the gutter margin in.
+% Set margins to minimum permitted by uWaterloo thesis regulations:
+\setlength{\marginparwidth}{0pt} % width of margin notes
+% N.B. If margin notes are used, you must adjust \textwidth, \marginparwidth
+% and \marginparsep so that the space left between the margin notes and page
+% edge is less than 15 mm (0.6 in.)
+\setlength{\marginparsep}{0pt} % width of space between body text and margin notes
+\setlength{\evensidemargin}{0.125in} % Adds 1/8 in. to binding side of all
+% even-numbered pages when the "twoside" printing option is selected
+\setlength{\oddsidemargin}{0.125in} % Adds 1/8 in. to the left of all pages
+% when "oneside" printing is selected, and to the left of all odd-numbered
+% pages when "twoside" printing is selected
+\setlength{\textwidth}{6.375in} % assuming US letter paper (8.5 in. x 11 in.) and
+% side margins as above
+\raggedbottom
+
+% The following statement specifies the amount of space between
+% paragraphs. Other reasonable specifications are \bigskipamount and \smallskipamount.
+\setlength{\parskip}{\medskipamount}
+
+% The following statement controls the line spacing.  The default
+% spacing corresponds to good typographic conventions and only slight
+% changes (e.g., perhaps "1.2"), if any, should be made.
+\renewcommand{\baselinestretch}{1} % this is the default line space setting
+
+% By default, each chapter will start on a recto (right-hand side)
+% page.  We also force each section of the front pages to start on
+% a recto page by inserting \cleardoublepage commands.
+% In many cases, this will require that the verso page be
+% blank and, while it should be counted, a page number should not be
+% printed.  The following statements ensure a page number is not
+% printed on an otherwise blank verso page.
+\let\origdoublepage\cleardoublepage
+\newcommand{\clearemptydoublepage}{%
+  \clearpage{\pagestyle{empty}\origdoublepage}}
+\let\cleardoublepage\clearemptydoublepage
+
+%======================================================================
+%   L O G I C A L    D O C U M E N T -- the content of your thesis
+%======================================================================
+\begin{document}
+
+% For a large document, it is a good idea to divide your thesis
+% into several files, each one containing one chapter.
+% To illustrate this idea, the "front pages" (i.e., title page,
+% declaration, borrowers' page, abstract, acknowledgements,
+% dedication, table of contents, list of tables, list of figures,
+% nomenclature) are contained within the file "thesis-frontpgs.tex" which is
+% included into the document by the following statement.
+%----------------------------------------------------------------------
+% FRONT MATERIAL
+%----------------------------------------------------------------------
+\input{thesis-frontpgs}
+
+%----------------------------------------------------------------------
+% MAIN BODY
+%----------------------------------------------------------------------
+
+\input{intro}
+
+\input{ctordtor}
+
+\input{tuples}
+
+\input{variadic}
+
+\input{conclusions}
+
+% The \appendix statement indicates the beginning of the appendices.
+% \appendix
+
+% % Add a title page before the appendices and a line in the Table of Contents
+% \chapter*{APPENDICES}
+% \addcontentsline{toc}{chapter}{APPENDICES}
+% %======================================================================
+% \chapter[PDF Plots From Matlab]{Matlab Code for Making a PDF Plot}
+% \label{AppendixA}
+% % Tip 4: Example of how to get a shorter chapter title for the Table of Contents
+% %======================================================================
+% \section{Using the GUI}
+% Properties of Matab plots can be adjusted from the plot window via a graphical interface. Under the Desktop menu in the Figure window, select the Property Editor. You may also want to check the Plot Browser and Figure Palette for more tools. To adjust properties of the axes, look under the Edit menu and select Axes Properties.
+
+% To set the figure size and to save as PDF or other file formats, click the Export Setup button in the figure Property Editor.
+
+% \section{From the Command Line}
+% All figure properties can also be manipulated from the command line. Here's an example:
+% \begin{verbatim}
+% x=[0:0.1:pi];
+% hold on % Plot multiple traces on one figure
+% plot(x,sin(x))
+% plot(x,cos(x),'--r')
+% plot(x,tan(x),'.-g')
+% title('Some Trig Functions Over 0 to \pi') % Note LaTeX markup!
+% legend('{\it sin}(x)','{\it cos}(x)','{\it tan}(x)')
+% hold off
+% set(gca,'Ylim',[-3 3]) % Adjust Y limits of "current axes"
+% set(gcf,'Units','inches') % Set figure size units of "current figure"
+% set(gcf,'Position',[0,0,6,4]) % Set figure width (6 in.) and height (4 in.)
+% cd n:\thesis\plots % Select where to save
+% print -dpdf plot.pdf % Save as PDF
+% \end{verbatim}
+
+%----------------------------------------------------------------------
+% END MATERIAL
+%----------------------------------------------------------------------
+
+% B I B L I O G R A P H Y
+% -----------------------
+
+% The following statement selects the style to use for references.  It controls the sort order of the entries in the bibliography and also the formatting for the in-text labels.
+\bibliographystyle{plain}
+% This specifies the location of the file containing the bibliographic information.
+% It assumes you're using BibTeX (if not, why not?).
+\cleardoublepage % This is needed if the book class is used, to place the anchor in the correct page,
+                 % because the bibliography will start on its own page.
+                 % Use \clearpage instead if the document class uses the "oneside" argument
+\phantomsection  % With hyperref package, enables hyperlinking from the table of contents to bibliography
+% The following statement causes the title "References" to be used for the bibliography section:
+\renewcommand*{\bibname}{References}
+
+% Add the References to the Table of Contents
+\addcontentsline{toc}{chapter}{\textbf{References}}
+
+\bibliography{cfa,thesis}
+% Tip 5: You can create multiple .bib files to organize your references.
+% Just list them all in the \bibliogaphy command, separated by commas (no spaces).
+
+% The following statement causes the specified references to be added to the bibliography% even if they were not
+% cited in the text. The asterisk is a wildcard that causes all entries in the bibliographic database to be included (optional).
+% \nocite{*}
+
+\end{document}
Index: doc/theses/rob_schluntz/tuples.tex
===================================================================
--- doc/theses/rob_schluntz/tuples.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/tuples.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,801 @@
+%======================================================================
+\chapter{Tuples}
+%======================================================================
+
+\section{Multiple-Return-Value Functions}
+\label{s:MRV_Functions}
+In standard C, functions can return at most one value.
+This restriction results in code which emulates functions with multiple return values by \emph{aggregation} or by \emph{aliasing}.
+In the former situation, the function designer creates a record type that combines all of the return values into a single type.
+For example, consider a function returning the most frequently occurring letter in a string, and its frequency.
+This example is complex enough to illustrate that an array is insufficient, since arrays are homogeneous, and demonstrates a potential pitfall that exists with aliasing.
+\begin{cfacode}
+struct mf_ret {
+  int freq;
+  char ch;
+};
+
+struct mf_ret most_frequent(const char * str) {
+  char freqs [26] = { 0 };
+  struct mf_ret ret = { 0, 'a' };
+  for (int i = 0; str[i] != '\0'; ++i) {
+    if (isalpha(str[i])) {        // only count letters
+      int ch = tolower(str[i]);   // convert to lower case
+      int idx = ch-'a';
+      if (++freqs[idx] > ret.freq) {  // update on new max
+        ret.freq = freqs[idx];
+        ret.ch = ch;
+      }
+    }
+  }
+  return ret;
+}
+
+const char * str = "hello world";
+struct mf_ret ret = most_frequent(str);
+printf("%s -- %d %c\n", str, ret.freq, ret.ch);
+\end{cfacode}
+Of note, the designer must come up with a name for the return type and for each of its fields.
+Unnecessary naming is a common programming language issue, introducing verbosity and a complication of the user's mental model.
+That is, adding another named type creates another association in the programmer's mind that needs to be kept track of when reading and writing code.
+As such, this technique is effective when used sparingly, but can quickly get out of hand if many functions need to return different combinations of types.
+
+In the latter approach, the designer simulates multiple return values by passing the additional return values as pointer parameters.
+The pointer parameters are assigned inside of the routine body to emulate a return.
+Using the same example,
+\begin{cfacode}
+int most_frequent(const char * str, char * ret_ch) {
+  char freqs [26] = { 0 };
+  int ret_freq = 0;
+  for (int i = 0; str[i] != '\0'; ++i) {
+    if (isalpha(str[i])) {        // only count letters
+      int ch = tolower(str[i]);   // convert to lower case
+      int idx = ch-'a';
+      if (++freqs[idx] > ret_freq) {  // update on new max
+        ret_freq = freqs[idx];
+        *ret_ch = ch;   // assign to out parameter
+      }
+    }
+  }
+  return ret_freq;  // only one value returned directly
+}
+
+const char * str = "hello world";
+char ch;                            // pre-allocate return value
+int freq = most_frequent(str, &ch); // pass return value as out parameter
+printf("%s -- %d %c\n", str, freq, ch);
+\end{cfacode}
+Notably, using this approach, the caller is directly responsible for allocating storage for the additional temporary return values, which complicates the call site with a sequence of variable declarations leading up to the call.
+Also, while a disciplined use of @const@ can give clues about whether a pointer parameter is going to be used as an out parameter, it is not immediately obvious from only the routine signature whether the callee expects such a parameter to be initialized before the call.
+Furthermore, while many C routines that accept pointers are designed so that it is safe to pass @NULL@ as a parameter, there are many C routines that are not null-safe.
+On a related note, C does not provide a standard mechanism to state that a parameter is going to be used as an additional return value, which makes the job of ensuring that a value is returned more difficult for the compiler.
+Interestingly, there is a subtle bug in the previous example, in that @ret_ch@ is never assigned for a string that does not contain any letters, which can lead to undefined behaviour.
+In this particular case, it turns out that the frequency return value also doubles as an error code, where a frequency of 0 means the character return value should be ignored.
+Still, not every routine with multiple return values should be required to return an error code, and error codes are easily ignored, so this is not a satisfying solution.
+As with the previous approach, this technique can simulate multiple return values, but in practice it is verbose and error prone.
+
+In \CFA, functions can be declared to return multiple values with an extension to the function declaration syntax.
+Multiple return values are declared as a comma-separated list of types in square brackets in the same location that the return type appears in standard C function declarations.
+The ability to return multiple values from a function requires a new syntax for the return statement.
+For consistency, the return statement in \CFA accepts a comma-separated list of expressions in square brackets.
+The expression resolution phase of the \CFA translator ensures that the correct form is used depending on the values being returned and the return type of the current function.
+A multiple-returning function with return type @T@ can return any expression that is implicitly convertible to @T@.
+Using the running example, the @most_frequent@ function can be written using multiple return values as such,
+\begin{cfacode}
+[int, char] most_frequent(const char * str) {
+  char freqs [26] = { 0 };
+  int ret_freq = 0;
+  char ret_ch = 'a';  // arbitrary default value for consistent results
+  for (int i = 0; str[i] != '\0'; ++i) {
+    if (isalpha(str[i])) {        // only count letters
+      int ch = tolower(str[i]);   // convert to lower case
+      int idx = ch-'a';
+      if (++freqs[idx] > ret_freq) {  // update on new max
+        ret_freq = freqs[idx];
+        ret_ch = ch;
+      }
+    }
+  }
+  return [ret_freq, ret_ch];
+}
+\end{cfacode}
+This approach provides the benefits of compile-time checking for appropriate return statements as in aggregation, but without the required verbosity of declaring a new named type, which precludes the bug seen with out-parameters.
+
+The addition of multiple-return-value functions necessitates a syntax for accepting multiple values at the call-site.
+The simplest mechanism for retaining a return value in C is variable assignment.
+By assigning the return value into a variable, its value can be retrieved later at any point in the program.
+As such, \CFA allows assigning multiple values from a function into multiple variables, using a square-bracketed list of lvalue expressions on the left side.
+\begin{cfacode}
+const char * str = "hello world";
+int freq;
+char ch;
+[freq, ch] = most_frequent(str);  // assign into multiple variables
+printf("%s -- %d %c\n", str, freq, ch);
+\end{cfacode}
+It is also common to use a function's output as the input to another function.
+\CFA also allows this case, without any new syntax.
+When a function call is passed as an argument to another call, the expression resolver attempts to find the best match of actual arguments to formal parameters given all of the possible expression interpretations in the current scope \cite{Bilson03}.
+For example,
+\begin{cfacode}
+void process(int);       // (1)
+void process(char);      // (2)
+void process(int, char); // (3)
+void process(char, int); // (4)
+
+process(most_frequent("hello world"));  // selects (3)
+\end{cfacode}
+In this case, there is only one option for a function named @most_frequent@ that takes a string as input.
+This function returns two values, one @int@ and one @char@.
+There are four options for a function named @process@, but only two that accept two arguments, and of those the best match is (3), which is also an exact match.
+This expression first calls @most_frequent("hello world")@, which produces the values @3@ and @'l'@, which are fed directly to the first and second parameters of (3), respectively.
+
+\section{Tuple Expressions}
+Multiple-return-value functions provide \CFA with a new syntax for expressing a combination of expressions in the return statement and a combination of types in a function signature.
+These notions can be generalized to provide \CFA with \emph{tuple expressions} and \emph{tuple types}.
+A tuple expression is an expression producing a fixed-size, ordered list of values of heterogeneous types.
+The type of a tuple expression is the tuple of the subexpression types, or a \emph{tuple type}.
+In \CFA, a tuple expression is denoted by a comma-separated list of expressions enclosed in square brackets.
+For example, the expression @[5, 'x', 10.5]@ has type @[int, char, double]@.
+The previous expression has 3 \emph{components}.
+Each component in a tuple expression can be any \CFA expression, including another tuple expression.
+The order of evaluation of the components in a tuple expression is unspecified, to allow a compiler the greatest flexibility for program optimization.
+It is, however, guaranteed that each component of a tuple expression is evaluated for side-effects, even if the result is not used.
+Multiple-return-value functions can equivalently be called \emph{tuple-returning functions}.
+
+\subsection{Tuple Variables}
+The call-site of the @most_frequent@ routine has a notable blemish, in that it required the preallocation of return variables in a manner similar to the aliasing example, since it is impossible to declare multiple variables of different types in the same declaration in standard C.
+In \CFA, it is possible to overcome this restriction by declaring a \emph{tuple variable}.
+\begin{cfacode}[emph=ret, emphstyle=\color{red}]
+const char * str = "hello world";
+[int, char] ret = most_frequent(str);  // initialize tuple variable
+printf("%s -- %d %c\n", str, ret);
+\end{cfacode}
+It is now possible to accept multiple values into a single piece of storage, in much the same way that it was previously possible to pass multiple values from one function call to another.
+These variables can be used in any of the contexts where a tuple expression is allowed, such as in the @printf@ function call.
+As in the @process@ example, the components of the tuple value are passed as separate parameters to @printf@, allowing very simple printing of tuple expressions.
+One way to access the individual components is with a simple assignment, as in previous examples.
+\begin{cfacode}
+int freq;
+char ch;
+[freq, ch] = ret;
+\end{cfacode}
+
+\begin{sloppypar}
+In addition to variables of tuple type, it is also possible to have pointers to tuples, and arrays of tuples.
+Tuple types can be composed of any types, except for array types, since array assignment is disallowed, which makes tuple assignment difficult when a tuple contains an array.
+\begin{cfacode}
+[double, int] di;
+[double, int] * pdi
+[double, int] adi[10];
+\end{cfacode}
+This examples declares a variable of type @[double, int]@, a variable of type pointer to @[double, int]@, and an array of ten @[double, int]@.
+\end{sloppypar}
+
+\subsection{Tuple Indexing}
+At times, it is desirable to access a single component of a tuple-valued expression without creating unnecessary temporary variables to assign to.
+Given a tuple-valued expression @e@ and a compile-time constant integer $i$ where $0 \leq i < n$, where $n$ is the number of components in @e@, @e.i@ accesses the $i$\textsuperscript{th} component of @e@.
+For example,
+\begin{cfacode}
+[int, double] x;
+[char *, int] f();
+void g(double, int);
+[int, double] * p;
+
+int y = x.0;              // access int component of x
+y = f().1;                // access int component of f
+p->0 = 5;                 // access int component of tuple pointed-to by p
+g(x.1, x.0);              // rearrange x to pass to g
+double z = [x, f()].0.1;  // access second component of first component
+                          // of tuple expression
+\end{cfacode}
+As seen above, tuple-index expressions can occur on any tuple-typed expression, including tuple-returning functions, square-bracketed tuple expressions, and other tuple-index expressions, provided the retrieved component is also a tuple.
+This feature was proposed for \KWC but never implemented \cite[p.~45]{Till89}.
+
+\subsection{Flattening and Structuring}
+As evident in previous examples, tuples in \CFA do not have a rigid structure.
+In function call contexts, tuples support implicit flattening and restructuring conversions.
+Tuple flattening recursively expands a tuple into the list of its basic components.
+Tuple structuring packages a list of expressions into a value of tuple type.
+\begin{cfacode}
+int f(int, int);
+int g([int, int]);
+int h(int, [int, int]);
+[int, int] x;
+int y;
+
+f(x);      // flatten
+g(y, 10);  // structure
+h(x, y);   // flatten & structure
+\end{cfacode}
+In \CFA, each of these calls is valid.
+In the call to @f@, @x@ is implicitly flattened so that the components of @x@ are passed as the two arguments to @f@.
+For the call to @g@, the values @y@ and @10@ are structured into a single argument of type @[int, int]@ to match the type of the parameter of @g@.
+Finally, in the call to @h@, @x@ is flattened to yield an argument list of length 3, of which the first component of @x@ is passed as the first parameter of @h@, and the second component of @x@ and @y@ are structured into the second argument of type @[int, int]@.
+The flexible structure of tuples permits a simple and expressive function-call syntax to work seamlessly with both single- and multiple-return-value functions, and with any number of arguments of arbitrarily complex structure.
+
+In \KWC \cite{Buhr94a,Till89}, there were 4 tuple coercions: opening, closing, flattening, and structuring.
+Opening coerces a tuple value into a tuple of values, while closing converts a tuple of values into a single tuple value.
+Flattening coerces a nested tuple into a flat tuple, \ie it takes a tuple with tuple components and expands it into a tuple with only non-tuple components.
+Structuring moves in the opposite direction, \ie it takes a flat tuple value and provides structure by introducing nested tuple components.
+
+In \CFA, the design has been simplified to require only the two conversions previously described, which trigger only in function call and return situations.
+This simplification is a primary contribution of this thesis to the design of tuples in \CFA.
+Specifically, the expression resolution algorithm examines all of the possible alternatives for an expression to determine the best match.
+In resolving a function call expression, each combination of function value and list of argument alternatives is examined.
+Given a particular argument list and function value, the list of argument alternatives is flattened to produce a list of non-tuple valued expressions.
+Then the flattened list of expressions is compared with each value in the function's parameter list.
+If the parameter's type is not a tuple type, then the current argument value is unified with the parameter type, and on success the next argument and parameter are examined.
+If the parameter's type is a tuple type, then the structuring conversion takes effect, recursively applying the parameter matching algorithm using the tuple's component types as the parameter list types.
+Assuming a successful unification, eventually the algorithm gets to the end of the tuple type, which causes all of the matching expressions to be consumed and structured into a tuple expression.
+For example, in
+\begin{cfacode}
+int f(int, [double, int]);
+f([5, 10.2], 4);
+\end{cfacode}
+There is only a single definition of @f@, and 3 arguments with only single interpretations.
+First, the argument alternative list @[5, 10.2], 4@ is flattened to produce the argument list @5, 10.2, 4@.
+Next, the parameter matching algorithm begins, with $P = $@int@ and $A = $@int@, which unifies exactly.
+Moving to the next parameter and argument, $P = $@[double, int]@ and $A = $@double@.
+This time, the parameter is a tuple type, so the algorithm applies recursively with $P' = $@double@ and $A = $@double@, which unifies exactly.
+Then $P' = $@int@ and $A = $@double@, which again unifies exactly.
+At this point, the end of $P'$ has been reached, so the arguments @10.2, 4@ are structured into the tuple expression @[10.2, 4]@.
+Finally, the end of the parameter list $P$ has also been reached, so the final expression is @f(5, [10.2, 4])@.
+
+\section{Tuple Assignment}
+\label{s:TupleAssignment}
+An assignment where the left side of the assignment operator has a tuple type is called tuple assignment.
+There are two kinds of tuple assignment depending on whether the right side of the assignment operator has a tuple type or a non-tuple type, called \emph{Multiple} and \emph{Mass} Assignment, respectively.
+\begin{cfacode}
+int x;
+double y;
+[int, double] z;
+[y, x] = 3.14;  // mass assignment
+[x, y] = z;     // multiple assignment
+z = 10;         // mass assignment
+z = [x, y];     // multiple assignment
+\end{cfacode}
+Let $L_i$ for $i$ in $[0, n)$ represent each component of the flattened left side, $R_i$ represent each component of the flattened right side of a multiple assignment, and $R$ represent the right side of a mass assignment.
+
+For a multiple assignment to be valid, both tuples must have the same number of elements when flattened.
+For example, the following is invalid because the number of components on the left does not match the number of components on the right.
+\begin{cfacode}
+[int, int] x, y, z;
+[x, y] = z;   // multiple assignment, invalid 4 != 2
+\end{cfacode}
+Multiple assignment assigns $R_i$ to $L_i$ for each $i$.
+That is, @?=?(&$L_i$, $R_i$)@ must be a well-typed expression.
+In the previous example, @[x, y] = z@, @z@ is flattened into @z.0, z.1@, and the assignments @x = z.0@ and @y = z.1@ happen.
+
+A mass assignment assigns the value $R$ to each $L_i$.
+For a mass assignment to be valid, @?=?(&$L_i$, $R$)@ must be a well-typed expression.
+These semantics differ from C cascading assignment (\eg @a=b=c@) in that conversions are applied to $R$ in each individual assignment, which prevents data loss from the chain of conversions that can happen during a cascading assignment.
+For example, @[y, x] = 3.14@ performs the assignments @y = 3.14@ and @x = 3.14@, which results in the value @3.14@ in @y@ and the value @3@ in @x@.
+On the other hand, the C cascading assignment @y = x = 3.14@ performs the assignments @x = 3.14@ and @y = x@, which results in the value @3@ in @x@, and as a result the value @3@ in @y@ as well.
+
+Both kinds of tuple assignment have parallel semantics, such that each value on the left side and right side is evaluated \emph{before} any assignments occur.
+As a result, it is possible to swap the values in two variables without explicitly creating any temporary variables or calling a function.
+\begin{cfacode}
+int x = 10, y = 20;
+[x, y] = [y, x];
+\end{cfacode}
+After executing this code, @x@ has the value @20@ and @y@ has the value @10@.
+
+In \CFA, tuple assignment is an expression where the result type is the type of the left side of the assignment, as in normal assignment.
+That is, a tuple assignment produces the value of the left-hand side after assignment.
+These semantics allow cascading tuple assignment to work out naturally in any context where a tuple is permitted.
+These semantics are a change from the original tuple design in \KWC \cite{Till89}, wherein tuple assignment was a statement that allows cascading assignments as a special case.
+Restricting tuple assignment to statements was an attempt to to fix what was seen as a problem with side-effects, wherein assignment can be used in many different locations, such as in function-call argument position.
+While permitting assignment as an expression does introduce the potential for subtle complexities, it is impossible to remove assignment expressions from \CFA without affecting backwards compatibility.
+Furthermore, there are situations where permitting assignment as an expression improves readability by keeping code succinct and reducing repetition, and complicating the definition of tuple assignment puts a greater cognitive burden on the user.
+In another language, tuple assignment as a statement could be reasonable, but it would be inconsistent for tuple assignment to be the only kind of assignment that is not an expression.
+In addition, \KWC permits the compiler to optimize tuple assignment as a block copy, since it does not support user-defined assignment operators.
+This optimization could be implemented in \CFA, but it requires the compiler to verify that the selected assignment operator is trivial.
+
+The following example shows multiple, mass, and cascading assignment used in one expression
+\begin{cfacode}
+  int a, b;
+  double c, d;
+  [void] f([int, int]);
+  f([c, a] = [b, d] = 1.5);  // assignments in parameter list
+\end{cfacode}
+The tuple expression begins with a mass assignment of @1.5@ into @[b, d]@, which assigns @1.5@ into @b@, which is truncated to @1@, and @1.5@ into @d@, producing the tuple @[1, 1.5]@ as a result.
+That tuple is used as the right side of the multiple assignment (\ie, @[c, a] = [1, 1.5]@) that assigns @1@ into @c@ and @1.5@ into @a@, which is truncated to @1@, producing the result @[1, 1]@.
+Finally, the tuple @[1, 1]@ is used as an expression in the call to @f@.
+
+\subsection{Tuple Construction}
+Tuple construction and destruction follow the same rules and semantics as tuple assignment, except that in the case where there is no right side, the default constructor or destructor is called on each component of the tuple.
+As constructors and destructors did not exist in previous versions of \CFA or in \KWC, this is a primary contribution of this thesis to the design of tuples.
+\begin{cfacode}
+struct S;
+void ?{}(S *);         // (1)
+void ?{}(S *, int);    // (2)
+void ?{}(S * double);  // (3)
+void ?{}(S *, S);      // (4)
+
+[S, S] x = [3, 6.28];  // uses (2), (3), specialized constructors
+[S, S] y;              // uses (1), (1), default constructor
+[S, S] z = x.0;        // uses (4), (4), copy constructor
+\end{cfacode}
+In this example, @x@ is initialized by the multiple constructor calls @?{}(&x.0, 3)@ and @?{}(&x.1, 6.28)@, while @y@ is initialized by two default constructor calls @?{}(&y.0)@ and @?{}(&y.1)@.
+@z@ is initialized by mass copy constructor calls @?{}(&z.0, x.0)@ and @?{}(&z.1, x.0)@.
+Finally, @x@, @y@, and @z@ are destructed, \ie the calls @^?{}(&x.0)@, @^?{}(&x.1)@, @^?{}(&y.0)@, @^?{}(&y.1)@, @^?{}(&z.0)@, and @^?{}(&z.1)@.
+
+It is possible to define constructors and assignment functions for tuple types that provide new semantics, if the existing semantics do not fit the needs of an application.
+For example, the function @void ?{}([T, U] *, S);@ can be defined to allow a tuple variable to be constructed from a value of type @S@.
+\begin{cfacode}
+struct S { int x; double y; };
+void ?{}([int, double] * this, S s) {
+  this->0 = s.x;
+  this->1 = s.y;
+}
+\end{cfacode}
+Due to the structure of generated constructors, it is possible to pass a tuple to a generated constructor for a type with a member prefix that matches the type of the tuple.
+For example,
+\begin{cfacode}
+struct S { int x; double y; int z };
+[int, double] t;
+S s = t;
+\end{cfacode}
+The initialization of @s@ with @t@ works by default because @t@ is flattened into its components, which satisfies the generated field constructor @?{}(S *, int, double)@ to initialize the first two values.
+
+\section{Member-Access Tuple Expression}
+\label{s:MemberAccessTuple}
+It is possible to access multiple fields from a single expression using a \emph{Member-Access Tuple Expression}.
+The result is a single tuple-valued expression whose type is the tuple of the types of the members.
+For example,
+\begin{cfacode}
+struct S { int x; double y; char * z; } s;
+s.[x, y, z];
+\end{cfacode}
+Here, the type of @s.[x, y, z]@ is @[int, double, char *]@.
+A member tuple expression has the form @a.[x, y, z];@ where @a@ is an expression with type @T@, where @T@ supports member access expressions, and @x, y, z@ are all members of @T@ with types @T$_x$@, @T$_y$@, and @T$_z$@ respectively.
+Then the type of @a.[x, y, z]@ is @[T_x, T_y, T_z]@.
+
+Since tuple index expressions are a form of member-access expression, it is possible to use tuple-index expressions in conjunction with member tuple expressions to manually restructure a tuple (\eg, rearrange components, drop components, duplicate components, etc.).
+\begin{cfacode}
+[int, int, long, double] x;
+void f(double, long);
+
+f(x.[0, 3]);          // f(x.0, x.3)
+x.[0, 1] = x.[1, 0];  // [x.0, x.1] = [x.1, x.0]
+[long, int, long] y = x.[2, 0, 2];
+\end{cfacode}
+
+It is possible for a member tuple expression to contain other member access expressions.
+For example,
+\begin{cfacode}
+struct A { double i; int j; };
+struct B { int * k; short l; };
+struct C { int x; A y; B z; } v;
+v.[x, y.[i, j], z.k];
+\end{cfacode}
+This expression is equivalent to @[v.x, [v.y.i, v.y.j], v.z.k]@.
+That is, the aggregate expression is effectively distributed across the tuple, which allows simple and easy access to multiple components in an aggregate, without repetition.
+It is guaranteed that the aggregate expression to the left of the @.@ in a member tuple expression is evaluated exactly once.
+As such, it is safe to use member tuple expressions on the result of a side-effecting function.
+\begin{cfacode}
+[int, float, double] f();
+[double, float] x = f().[2, 1];
+\end{cfacode}
+
+In \KWC, member tuple expressions are known as \emph{record field tuples} \cite{Till89}.
+Since \CFA permits these tuple-access expressions using structures, unions, and tuples, \emph{member tuple expression} or \emph{field tuple expression} is more appropriate.
+
+It is possible to extend member-access expressions further.
+Currently, a member-access expression whose member is a name requires that the aggregate is a structure or union, while a constant integer member requires the aggregate to be a tuple.
+In the interest of orthogonal design, \CFA could apply some meaning to the remaining combinations as well.
+For example,
+\begin{cfacode}
+struct S { int x, y; } s;
+[S, S] z;
+
+s.x;  // access member
+z.0;  // access component
+
+s.1;  // ???
+z.y;  // ???
+\end{cfacode}
+One possibility is for @s.1@ to select the second member of @s@.
+Under this interpretation, it becomes possible to not only access members of a struct by name, but also by position.
+Likewise, it seems natural to open this mechanism to enumerations as well, wherein the left side would be a type, rather than an expression.
+One benefit of this interpretation is familiarity, since it is extremely reminiscent of tuple-index expressions.
+On the other hand, it could be argued that this interpretation is brittle in that changing the order of members or adding new members to a structure becomes a brittle operation.
+This problem is less of a concern with tuples, since modifying a tuple affects only the code that directly uses the tuple, whereas modifying a structure has far reaching consequences for every instance of the structure.
+
+As for @z.y@, one interpretation is to extend the meaning of member tuple expressions.
+That is, currently the tuple must occur as the member, \ie to the right of the dot.
+Allowing tuples to the left of the dot could distribute the member across the elements of the tuple, in much the same way that member tuple expressions distribute the aggregate across the member tuple.
+In this example, @z.y@ expands to @[z.0.y, z.1.y]@, allowing what is effectively a very limited compile-time field-sections map operation, where the argument must be a tuple containing only aggregates having a member named @y@.
+It is questionable how useful this would actually be in practice, since structures often do not have names in common with other structures, and further this could cause maintainability issues in that it encourages programmers to adopt very simple naming conventions to maximize the amount of overlap between different types.
+Perhaps more useful would be to allow arrays on the left side of the dot, which would likewise allow mapping a field access across the entire array, producing an array of the contained fields.
+The immediate problem with this idea is that C arrays do not carry around their size, which would make it impossible to use this extension for anything other than a simple stack allocated array.
+
+Supposing this feature works as described, it would be necessary to specify an ordering for the expansion of member-access expressions versus member-tuple expressions.
+\begin{cfacode}
+struct { int x, y; };
+[S, S] z;
+z.[x, y];  // ???
+// => [z.0, z.1].[x, y]
+// => [z.0.x, z.0.y, z.1.x, z.1.y]
+// or
+// => [z.x, z.y]
+// => [[z.0, z.1].x, [z.0, z.1].y]
+// => [z.0.x, z.1.x, z.0.y, z.1.y]
+\end{cfacode}
+Depending on exactly how the two tuples are combined, different results can be achieved.
+As such, a specific ordering would need to be imposed to make this feature useful.
+Furthermore, this addition moves a member-tuple expression's meaning from being clear statically to needing resolver support, since the member name needs to be distributed appropriately over each member of the tuple, which could itself be a tuple.
+
+A second possibility is for \CFA to have named tuples, as they exist in Swift and D.
+\begin{cfacode}
+typedef [int x, int y] Point2D;
+Point2D p1, p2;
+p1.x + p1.y + p2.x + p2.y;
+p1.0 + p1.1 + p2.0 + p2.1;  // equivalent
+\end{cfacode}
+In this simpler interpretation, a tuple type carries with it a list of possibly empty identifiers.
+This approach fits naturally with the named return-value feature, and would likely go a long way towards implementing it.
+
+Ultimately, the first two extensions introduce complexity into the model, with relatively little perceived benefit, and so were dropped from consideration.
+Named tuples are a potentially useful addition to the language, provided they can be parsed with a reasonable syntax.
+
+
+\section{Casting}
+In C, the cast operator is used to explicitly convert between types.
+In \CFA, the cast operator has a secondary use, which is type ascription, since it forces the expression resolution algorithm to choose the lowest cost conversion to the target type.
+That is, a cast can be used to select the type of an expression when it is ambiguous, as in the call to an overloaded function.
+\begin{cfacode}
+int f();     // (1)
+double f();  // (2)
+
+f();       // ambiguous - (1),(2) both equally viable
+(int)f();  // choose (2)
+\end{cfacode}
+Since casting is a fundamental operation in \CFA, casts need to be given a meaningful interpretation in the context of tuples.
+Taking a look at standard C provides some guidance with respect to the way casts should work with tuples.
+\begin{cfacode}[numbers=left]
+int f();
+void g();
+
+(void)f();  // valid, ignore results
+(int)g();   // invalid, void cannot be converted to int
+
+struct A { int x; };
+(struct A)f();  // invalid, int cannot be converted to A
+\end{cfacode}
+In C, line 4 is a valid cast, which calls @f@ and discards its result.
+On the other hand, line 5 is invalid, because @g@ does not produce a result, so requesting an @int@ to materialize from nothing is nonsensical.
+Finally, line 8 is also invalid, because in C casts only provide conversion between scalar types \cite[p.~91]{C11}.
+For consistency, this implies that any case wherein the number of components increases as a result of the cast is invalid, while casts that have the same or fewer number of components may be valid.
+
+Formally, a cast to tuple type is valid when $T_n \leq S_m$, where $T_n$ is the number of components in the target type and $S_m$ is the number of components in the source type, and for each $i$ in $[0, n)$, $S_i$ can be cast to $T_i$.
+Excess elements ($S_j$ for all $j$ in $[n, m)$) are evaluated, but their values are discarded so that they are not included in the result expression.
+This discarding naturally follows the way that a cast to void works in C.
+
+For example,
+\begin{cfacode}
+  [int, int, int] f();
+  [int, [int, int], int] g();
+
+  ([int, double])f();           // (1) valid
+  ([int, int, int])g();         // (2) valid
+  ([void, [int, int]])g();      // (3) valid
+  ([int, int, int, int])g();    // (4) invalid
+  ([int, [int, int, int]])g();  // (5) invalid
+\end{cfacode}
+
+(1) discards the last element of the return value and converts the second element to type double.
+Since @int@ is effectively a 1-element tuple, (2) discards the second component of the second element of the return value of @g@.
+If @g@ is free of side effects, this is equivalent to @[(int)(g().0), (int)(g().1.0), (int)(g().2)]@.
+Since @void@ is effectively a 0-element tuple, (3) discards the first and third return values, which is effectively equivalent to @[(int)(g().1.0), (int)(g().1.1)]@).
+% will this always hold true? probably, as constructors should give all of the conversion power we need. if casts become function calls, what would they look like? would need a way to specify the target type, which seems awkward. Also, C++ basically only has this because classes are closed to extension, while we don't have that problem (can have floating constructors for any type).
+Note that a cast is not a function call in \CFA, so flattening and structuring conversions do not occur for cast expressions.
+As such, (4) is invalid because the cast target type contains 4 components, while the source type contains only 3.
+Similarly, (5) is invalid because the cast @([int, int, int])(g().1)@ is invalid.
+That is, it is invalid to cast @[int, int]@ to @[int, int, int]@.
+
+\section{Polymorphism}
+Due to the implicit flattening and structuring conversions involved in argument passing, @otype@ and @dtype@ parameters are restricted to matching only with non-tuple types.
+The integration of polymorphism, type assertions, and monomorphic specialization of tuple-assertions are a primary contribution of this thesis to the design of tuples.
+\begin{cfacode}
+forall(otype T, dtype U)
+void f(T x, U * y);
+
+f([5, "hello"]);
+\end{cfacode}
+In this example, @[5, "hello"]@ is flattened, so that the argument list appears as @5, "hello"@.
+The argument matching algorithm binds @T@ to @int@ and @U@ to @const char@, and calls the function as normal.
+
+Tuples can contain otype and dtype components.
+For example, a plus operator can be written to add two triples of a type together.
+\begin{cfacode}
+forall(otype T | { T ?+?(T, T); })
+[T, T, T] ?+?([T, T, T] x, [T, T, T] y) {
+  return [x.0+y.0, x.1+y.1, x.2+y.2];
+}
+[int, int, int] x;
+int i1, i2, i3;
+[i1, i2, i3] = x + ([10, 20, 30]);
+\end{cfacode}
+Note that due to the implicit tuple conversions, this function is not restricted to the addition of two triples.
+A call to this plus operator type checks as long as a total of 6 non-tuple arguments are passed after flattening, and all of the arguments have a common type that can bind to @T@, with a pairwise @?+?@ over @T@.
+For example, these expressions also succeed and produce the same value.
+\begin{cfacode}
+([x.0, x.1]) + ([x.2, 10, 20, 30]);  // x + ([10, 20, 30])
+x.0 + ([x.1, x.2, 10, 20, 30]);      // x + ([10, 20, 30])
+\end{cfacode}
+This presents a potential problem if structure is important, as these three expressions look like they should have different meanings.
+Furthermore, these calls can be made ambiguous by introducing seemingly different functions.
+\begin{cfacode}
+forall(otype T | { T ?+?(T, T); })
+[T, T, T] ?+?([T, T] x, [T, T, T, T]);
+forall(otype T | { T ?+?(T, T); })
+[T, T, T] ?+?(T x, [T, T, T, T, T]);
+\end{cfacode}
+It is also important to note that these calls could be disambiguated if the function return types were different, as they likely would be for a reasonable implementation of @?+?@, since the return type is used in overload resolution.
+Still, these semantics are a deficiency of the current argument matching algorithm, and depending on the function, differing return values may not always be appropriate.
+These issues could be rectified by applying an appropriate conversion cost to the structuring and flattening conversions, which are currently 0-cost conversions in the expression resolver.
+Care would be needed in this case to ensure that exact matches do not incur such a cost.
+\begin{cfacode}
+void f([int, int], int, int);
+
+f([0, 0], 0, 0);    // no cost
+f(0, 0, 0, 0);      // cost for structuring
+f([0, 0,], [0, 0]); // cost for flattening
+f([0, 0, 0], 0);    // cost for flattening and structuring
+\end{cfacode}
+
+Until this point, it has been assumed that assertion arguments must match the parameter type exactly, modulo polymorphic specialization (\ie, no implicit conversions are applied to assertion arguments).
+This decision presents a conflict with the flexibility of tuples.
+\subsection{Assertion Inference}
+\begin{cfacode}
+int f([int, double], double);
+forall(otype T, otype U | { T f(T, U, U); })
+void g(T, U);
+g(5, 10.21);
+\end{cfacode}
+If assertion arguments must match exactly, then the call to @g@ cannot be resolved, since the expected type of @f@ is flat, while the only @f@ in scope requires a tuple type.
+Since tuples are fluid, this requirement reduces the usability of tuples in polymorphic code.
+To ease this pain point, function parameter and return lists are flattened for the purposes of type unification, which allows the previous example to pass expression resolution.
+
+This relaxation is made possible by extending the existing thunk generation scheme, as described by Bilson \cite{Bilson03}.
+Now, whenever a candidate's parameter structure does not exactly match the formal parameter's structure, a thunk is generated to specialize calls to the actual function.
+\begin{cfacode}
+int _thunk(int _p0, double _p1, double _p2) {
+  return f([_p0, _p1], _p2);
+}
+\end{cfacode}
+Essentially, this provides flattening and structuring conversions to inferred functions, improving the compatibility of tuples and polymorphism.
+
+\section{Implementation}
+Tuples are implemented in the \CFA translator via a transformation into generic types.
+Generic types are an independent contribution developed at the same time.
+The transformation into generic types and the generation of tuple-specific code are primary contributions of this thesis to tuples.
+
+The first time an $N$-tuple is seen for each $N$ in a scope, a generic type with $N$ type parameters is generated.
+For example,
+\begin{cfacode}
+[int, int] f() {
+  [double, double] x;
+  [int, double, int] y;
+}
+\end{cfacode}
+is transformed into
+\begin{cfacode}
+forall(dtype T0, dtype T1 | sized(T0) | sized(T1))
+struct _tuple2_ {  // generated before the first 2-tuple
+  T0 field_0;
+  T1 field_1;
+};
+_tuple2_(int, int) f() {
+  _tuple2_(double, double) x;
+  forall(dtype T0, dtype T1, dtype T2 | sized(T0) | sized(T1) | sized(T2))
+  struct _tuple3_ {  // generated before the first 3-tuple
+    T0 field_0;
+    T1 field_1;
+    T2 field_2;
+  };
+  _tuple3_(int, double, int) y;
+}
+\end{cfacode}
+
+Tuple expressions are then simply converted directly into compound literals
+\begin{cfacode}
+[5, 'x', 1.24];
+\end{cfacode}
+becomes
+\begin{cfacode}
+(_tuple3_(int, char, double)){ 5, 'x', 1.24 };
+\end{cfacode}
+
+Since tuples are essentially structures, tuple indexing expressions are just field accesses.
+\begin{cfacode}
+void f(int, [double, char]);
+[int, double] x;
+
+x.0+x.1;
+printf("%d %g\n", x);
+f(x, 'z');
+\end{cfacode}
+is transformed into
+\begin{cfacode}
+void f(int, _tuple2_(double, char));
+_tuple2_(int, double) x;
+
+x.field_0+x.field_1;
+printf("%d %g\n", x.field_0, x.field_1);
+f(x.field_0, (_tuple2){ x.field_1, 'z' });
+\end{cfacode}
+Note that due to flattening, @x@ used in the argument position is converted into the list of its fields.
+In the call to @f@, the second and third argument components are structured into a tuple argument.
+
+Expressions that may contain side effects are made into \emph{unique expressions} before being expanded by the flattening conversion.
+Each unique expression is assigned an identifier and is guaranteed to be executed exactly once.
+\begin{cfacode}
+void g(int, double);
+[int, double] h();
+g(h());
+\end{cfacode}
+Internally, this is converted to pseudo-\CFA
+\begin{cfacode}
+void g(int, double);
+[int, double] h();
+lazy [int, double] unq0 = h(); // deferred execution
+g(unq0.0, unq0.1);             // execute h() once
+\end{cfacode}
+That is, the function @h@ is evaluated lazily and its result is stored for subsequent accesses.
+Ultimately, unique expressions are converted into two variables and an expression.
+\begin{cfacode}
+void g(int, double);
+[int, double] h();
+
+_Bool _unq0_finished_ = 0;
+[int, double] _unq0;
+g(
+  (_unq0_finished_ ? _unq0 : (_unq0 = h(), _unq0_finished_ = 1, _unq0)).0,
+  (_unq0_finished_ ? _unq0 : (_unq0 = h(), _unq0_finished_ = 1, _unq0)).1,
+);
+\end{cfacode}
+Since argument evaluation order is not specified by the C programming language, this scheme is built to work regardless of evaluation order.
+The first time a unique expression is executed, the actual expression is evaluated and the accompanying boolean is set to true.
+Every subsequent evaluation of the unique expression then results in an access to the stored result of the actual expression.
+
+Currently, the \CFA translator has a very broad, imprecise definition of impurity (side-effects), where every function call is assumed to be impure.
+This notion could be made more precise for certain intrinsic, auto-generated, and built-in functions, and could analyze function bodies, when they are available, to recursively detect impurity, to eliminate some unique expressions.
+It is possible that lazy evaluation could be exposed to the user through a lazy keyword with little additional effort.
+
+Tuple-member expressions are recursively expanded into a list of member-access expressions.
+\begin{cfacode}
+[int, [double, int, double], int]] x;
+x.[0, 1.[0, 2]];
+\end{cfacode}
+becomes
+\begin{cfacode}
+[x.0, [x.1.0, x.1.2]];
+\end{cfacode}
+Tuple-member expressions also take advantage of unique expressions in the case of possible impurity.
+
+Finally, the various kinds of tuple assignment, constructors, and destructors generate GNU C statement expressions.
+For example, a mass assignment
+\begin{cfacode}
+int x, z;
+double y;
+[double, double] f();
+
+[x, y, z] = 1.5;            // mass assignment
+\end{cfacode}
+generates the following
+\begin{cfacode}
+// [x, y, z] = 1.5;
+_tuple3_(int, double, int) _tmp_stmtexpr_ret0;
+({ // GNU C statement expression
+  // assign LHS address temporaries
+  int *__massassign_L0 = &x;    // ?{}
+  double *__massassign_L1 = &y; // ?{}
+  int *__massassign_L2 = &z;    // ?{}
+
+  // assign RHS value temporary
+  double __massassign_R0 = 1.5; // ?{}
+
+  ({ // tuple construction - construct statement expr return variable
+    // assign LHS address temporaries
+    int *__multassign_L0 = (int *)&_tmp_stmtexpr_ret0.0;       // ?{}
+    double *__multassign_L1 = (double *)&_tmp_stmtexpr_ret0.1; // ?{}
+    int *__multassign_L2 = (int *)&_tmp_stmtexpr_ret0.2;       // ?{}
+
+    // assign RHS value temporaries and mass-assign to L0, L1, L2
+    int __multassign_R0 = (*__massassign_L0=(int)__massassign_R0); // ?{}
+    double __multassign_R1 = (*__massassign_L1=__massassign_R0);   // ?{}
+    int __multassign_R2 = (*__massassign_L2=(int)__massassign_R0); // ?{}
+
+    // perform construction of statement expr return variable using
+    // RHS value temporary
+    ((*__multassign_L0 = __multassign_R0 /* ?{} */),
+     (*__multassign_L1 = __multassign_R1 /* ?{} */),
+     (*__multassign_L2 = __multassign_R2 /* ?{} */));
+  });
+  _tmp_stmtexpr_ret0;
+});
+({ // tuple destruction - destruct assign expr value
+  int *__massassign_L3 = (int *)&_tmp_stmtexpr_ret0.0;       // ?{}
+  double *__massassign_L4 = (double *)&_tmp_stmtexpr_ret0.1; // ?{}
+  int *__massassign_L5 = (int *)&_tmp_stmtexpr_ret0.2;       // ?{}
+  ((*__massassign_L3 /* ^?{} */),
+   (*__massassign_L4 /* ^?{} */),
+   (*__massassign_L5 /* ^?{} */));
+});
+\end{cfacode}
+A variable is generated to store the value produced by a statement expression, since its fields may need to be constructed with a non-trivial constructor and it may need to be referred to multiple time, \eg, in a unique expression.
+$N$ LHS variables are generated and constructed using the address of the tuple components, and a single RHS variable is generated to store the value of the RHS without any loss of precision.
+A nested statement expression is generated that performs the individual assignments and constructs the return value using the results of the individual assignments.
+Finally, the statement expression temporary is destroyed at the end of the expression.
+
+Similarly, a multiple assignment
+\begin{cfacode}
+[x, y, z] = [f(), 3];       // multiple assignment
+\end{cfacode}
+generates the following
+\begin{cfacode}
+// [x, y, z] = [f(), 3];
+_tuple3_(int, double, int) _tmp_stmtexpr_ret0;
+({
+  // assign LHS address temporaries
+  int *__multassign_L0 = &x;    // ?{}
+  double *__multassign_L1 = &y; // ?{}
+  int *__multassign_L2 = &z;    // ?{}
+
+  // assign RHS value temporaries
+  _tuple2_(double, double) _tmp_cp_ret0;
+  _Bool _unq0_finished_ = 0;
+  double __multassign_R0 =
+    (_unq0_finished_ ?
+      _tmp_cp_ret0 :
+      (_tmp_cp_ret0=f(), _unq0_finished_=1, _tmp_cp_ret0)).0; // ?{}
+  double __multassign_R1 =
+    (_unq0_finished_ ?
+      _tmp_cp_ret0 :
+      (_tmp_cp_ret0=f(), _unq0_finished_=1, _tmp_cp_ret0)).1; // ?{}
+  ({ // tuple destruction - destruct f() return temporary
+    // assign LHS address temporaries
+    double *__massassign_L3 = (double *)&_tmp_cp_ret0.0;  // ?{}
+    double *__massassign_L4 = (double *)&_tmp_cp_ret0.1;  // ?{}
+    // perform destructions - intrinsic, so NOP
+    ((*__massassign_L3 /* ^?{} */),
+     (*__massassign_L4 /* ^?{} */));
+  });
+  int __multassign_R2 = 3; // ?{}
+
+  ({ // tuple construction - construct statement expr return variable
+    // assign LHS address temporaries
+    int *__multassign_L3 = (int *)&_tmp_stmtexpr_ret0.0;       // ?{}
+    double *__multassign_L4 = (double *)&_tmp_stmtexpr_ret0.1; // ?{}
+    int *__multassign_L5 = (int *)&_tmp_stmtexpr_ret0.2;       // ?{}
+
+    // assign RHS value temporaries and multiple-assign to L0, L1, L2
+    int __multassign_R3 = (*__multassign_L0=(int)__multassign_R0);  // ?{}
+    double __multassign_R4 = (*__multassign_L1=__multassign_R1);    // ?{}
+    int __multassign_R5 = (*__multassign_L2=__multassign_R2);       // ?{}
+
+    // perform construction of statement expr return variable using
+    // RHS value temporaries
+    ((*__multassign_L3=__multassign_R3 /* ?{} */),
+     (*__multassign_L4=__multassign_R4 /* ?{} */),
+     (*__multassign_L5=__multassign_R5 /* ?{} */));
+  });
+  _tmp_stmtexpr_ret0;
+});
+({  // tuple destruction - destruct assign expr value
+  // assign LHS address temporaries
+  int *__massassign_L5 = (int *)&_tmp_stmtexpr_ret0.0;       // ?{}
+  double *__massassign_L6 = (double *)&_tmp_stmtexpr_ret0.1; // ?{}
+  int *__massassign_L7 = (int *)&_tmp_stmtexpr_ret0.2;       // ?{}
+  // perform destructions - intrinsic, so NOP
+  ((*__massassign_L5 /* ^?{} */),
+   (*__massassign_L6 /* ^?{} */),
+   (*__massassign_L7 /* ^?{} */));
+});
+\end{cfacode}
+The difference here is that $N$ RHS values are stored into separate temporary variables.
+
+The use of statement expressions allows the translator to arbitrarily generate additional temporary variables as needed, but binds the implementation to a non-standard extension of the C language.
+There are other places where the \CFA translator makes use of GNU C extensions, such as its use of nested functions, so this is not a new restriction.
Index: doc/theses/rob_schluntz/variadic.tex
===================================================================
--- doc/theses/rob_schluntz/variadic.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/rob_schluntz/variadic.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,538 @@
+%======================================================================
+\chapter{Variadic Functions}
+%======================================================================
+
+\section{Design Criteria} % TODO: better section name???
+C provides variadic functions through the manipulation of @va_list@ objects.
+In C, a variadic function is one which contains at least one parameter, followed by @...@ as the last token in the parameter list.
+In particular, some form of \emph{argument descriptor} or \emph{sentinel value} is needed to inform the function of the number of arguments and their types.
+Two common argument descriptors are format strings or counter parameters.
+It is important to note that both of these mechanisms are inherently redundant, because they require the user to explicitly specify information that the compiler already knows \footnote{While format specifiers can convey some information the compiler does not know, such as whether to print a number in decimal or hexadecimal, the number of arguments is wholly redundant.}.
+This required repetition is error prone, because it is easy for the user to add or remove arguments without updating the argument descriptor.
+In addition, C requires the programmer to hard code all of the possible expected types.
+As a result, it is cumbersome to write a function that is open to extension.
+For example, a simple function to sum $N$ @int@s,
+\begin{cfacode}
+int sum(int N, ...) {
+  va_list args;
+  va_start(args, N);
+  int ret = 0;
+  while(N) {
+    ret += va_arg(args, int);  // have to specify type
+    N--;
+  }
+  va_end(args);
+  return ret;
+}
+sum(3, 10, 20, 30);  // need to keep counter in sync
+\end{cfacode}
+The @va_list@ type is a special C data type that abstracts variadic-argument manipulation.
+The @va_start@ macro initializes a @va_list@, given the last named parameter.
+Each use of the @va_arg@ macro allows access to the next variadic argument, given a type.
+Since the function signature does not provide any information on what types can be passed to a variadic function, the compiler does not perform any error checks on a variadic call.
+As such, it is possible to pass any value to the @sum@ function, including pointers, floating-point numbers, and structures.
+In the case where the provided type is not compatible with the argument's actual type after default argument promotions, or if too many arguments are accessed, the behaviour is undefined \cite[p.~81]{C11}.
+Furthermore, there is no way to perform the necessary error checks in the @sum@ function at run-time, since type information is not carried into the function body.
+Since they rely on programmer convention rather than compile-time checks, variadic functions are unsafe.
+
+In practice, compilers can provide warnings to help mitigate some of the problems.
+For example, GCC provides the @format@ attribute to specify that a function uses a format string, which allows the compiler to perform some checks related to the standard format-specifiers.
+Unfortunately, this approach does not permit extensions to the format-string syntax, so a programmer cannot extend the attribute to warn for mismatches with custom types.
+
+As a result, C's variadic functions are a deficient language feature.
+Two options were examined to provide better, type-safe variadic functions in \CFA.
+\subsection{Whole Tuple Matching}
+Option 1 is to change the argument matching algorithm, so that type parameters can match whole tuples, rather than just their components.
+This option could be implemented with two phases of argument matching when a function contains type parameters and the argument list contains tuple arguments.
+If flattening and structuring fail to produce a match, a second attempt at matching the function and argument combination is made where tuple arguments are not expanded and structure must match exactly, modulo non-tuple implicit conversions.
+For example:
+\begin{cfacode}
+  forall(otype T, otype U | { T g(U); })
+  void f(T, U);
+
+  [int, int] g([int, int, int, int]);
+
+  f([1, 2], [3, 4, 5, 6]);
+\end{cfacode}
+With flattening and structuring, the call is first transformed into @f(1, 2, 3, 4, 5, 6)@.
+Since the first argument of type @T@ does not have a tuple type, unification decides that @T=int@ and @1@ is matched as the first parameter.
+Likewise, @U@ does not have a tuple type, so @U=int@ and @2@ is accepted as the second parameter.
+There are now no remaining formal parameters, but there are remaining arguments and the function is not variadic, so the match fails.
+
+With the addition of an exact matching attempt, @T=[int,int]@ and @U=[int,int,int,int]@, and so the arguments type check.
+Likewise, when inferring assertion @g@, an exact match is found.
+
+This approach is strict with respect to argument structure, by nature, which makes it syntactically awkward to use in ways that the existing tuple design is not.
+For example, consider a @new@ function that allocates memory using @malloc@, and constructs the result using arbitrary arguments.
+\begin{cfacode}
+struct Array;
+void ?{}(Array *, int, int, int);
+
+forall(dtype T, otype Params | sized(T) | { void ?{}(T *, Params); })
+T * new(Params p) {
+  return malloc(){ p };
+}
+Array(int) * x = new([1, 2, 3]);
+\end{cfacode}
+The call to @new@ is not particularly appealing, since it requires the use of square brackets at the call-site, which is not required in any other function call.
+This shifts the burden from the compiler to the programmer, which is almost always wrong, and creates an odd inconsistency within the language.
+Similarly, in order to pass 0 variadic arguments, an explicit empty tuple must be passed into the argument list, otherwise the exact matching rule would not have an argument to bind against.
+
+It should be otherwise noted that the addition of an exact matching rule only affects the outcome for polymorphic type-binding when tuples are involved.
+For non-tuple arguments, exact matching and flattening and structuring are equivalent.
+For tuple arguments to a function without polymorphic formal-parameters, flattening and structuring work whenever an exact match would have worked, since the tuple is flattened and implicitly restructured to its original structure.
+Thus there is nothing to be gained from permitting the exact matching rule to take effect when a function does not contain polymorphism and none of the arguments are tuples.
+
+Overall, this option takes a step in the right direction, but is contrary to the flexibility of the existing tuple design.
+
+\subsection{A New Typeclass}
+A second option is the addition of another kind of type parameter, @ttype@.
+Matching against a @ttype@ parameter consumes all remaining argument components and packages them into a tuple, binding to the resulting tuple of types.
+In a given parameter list, there should be at most one @ttype@ parameter that must occur last, otherwise the call can never resolve, given the previous rule.
+This idea essentially matches normal variadic semantics, with a strong feeling of similarity to \CCeleven variadic templates.
+As such, @ttype@ variables are also referred to as argument packs.
+This approach is the option that has been added to \CFA.
+
+Like variadic templates, the main way to manipulate @ttype@ polymorphic functions is through recursion.
+Since nothing is known about a parameter pack by default, assertion parameters are key to doing anything meaningful.
+Unlike variadic templates, @ttype@ polymorphic functions can be separately compiled.
+
+For example, a simple translation of the C sum function using @ttype@ is
+\begin{cfacode}
+int sum(void){ return 0; }        // (0)
+forall(ttype Params | { int sum(Params); })
+int sum(int x, Params rest) { // (1)
+  return x+sum(rest);
+}
+sum(10, 20, 30);
+\end{cfacode}
+Since (0) does not accept any arguments, it is not a valid candidate function for the call @sum(10, 20, 30)@.
+In order to call (1), @10@ is matched with @x@, and the argument resolution moves on to the argument pack @rest@, which consumes the remainder of the argument list and @Params@ is bound to @[20, 30]@.
+In order to finish the resolution of @sum@, an assertion parameter that matches @int sum(int, int)@ is required.
+Like in the previous iteration, (0) is not a valid candidate, so (1) is examined with @Params@ bound to @[int]@, requiring the assertion @int sum(int)@.
+Next, (0) fails, and to satisfy (1) @Params@ is bound to @[]@, requiring an assertion @int sum()@.
+Finally, (0) matches and (1) fails, which terminates the recursion.
+Effectively, this traces as @sum(10, 20, 30)@ $\rightarrow$ @10+sum(20, 30)@ $\rightarrow$ @10+(20+sum(30))@ $\rightarrow$ @10+(20+(30+sum()))@ $\rightarrow$ @10+(20+(30+0))@.
+
+Interestingly, this version does not require any form of argument descriptor, since the \CFA type system keeps track of all of these details.
+It might be reasonable to take the @sum@ function a step further to enforce a minimum number of arguments, which could be done simply
+\begin{cfacode}
+int sum(int x, int y){
+  return x+y;
+}
+forall(ttype Params | { int sum(int, Params); })
+int sum(int x, int y, Params rest) {
+  return sum(x+y, rest);
+}
+sum(10);          // invalid
+sum(10, 20);      // valid
+sum(10, 20, 30);  // valid
+...
+\end{cfacode}
+
+One more iteration permits the summation of any summable type, as long as all arguments are the same type.
+\begin{cfacode}
+trait summable(otype T) {
+  T ?+?(T, T);
+};
+forall(otype R | summable(R))
+R sum(R x, R y){
+  return x+y;
+}
+forall(otype R, ttype Params
+  | summable(R)
+  | { R sum(R, Params); })
+R sum(R x, R y, Params rest) {
+  return sum(x+y, rest);
+}
+sum(3, 10, 20, 30);
+\end{cfacode}
+Unlike C, it is not necessary to hard code the expected type.
+This @sum@ function is naturally open to extension, in that any user-defined type with a @?+?@ operator is automatically able to be used with the @sum@ function.
+That is to say, the programmer who writes @sum@ does not need full program knowledge of every possible data type, unlike what is necessary to write an equivalent function using the standard C mechanisms.
+
+\begin{sloppypar}
+Going one last step, it is possible to achieve full generality in \CFA, allowing the summation of arbitrary lists of summable types.
+\begin{cfacode}
+trait summable(otype T1, otype T2, otype R) {
+  R ?+?(T1, T2);
+};
+forall(otype T1, otype T2, otype R | summable(T1, T2, R))
+R sum(T1 x, T2 y) {
+  return x+y;
+}
+forall(otype T1, otype T2, otype T3, otype R, ttype Params
+  | summable(T1, T2, T3)
+  | { R sum(T3, Params); })
+R sum(T1 x, T2 y, Params rest ) {
+  return sum(x+y, rest);
+}
+sum(3, 10.5, 20, 30.3);
+\end{cfacode}
+The \CFA translator requires adding explicit @double ?+?(int, double)@ and @double ?+?(double, int)@ functions for this call to work, since implicit conversions are not supported for assertions.
+\end{sloppypar}
+
+A notable limitation of this approach is that it heavily relies on recursive assertions.
+The \CFA translator imposes a limitation on the depth of the recursion for assertion satisfaction.
+Currently, the limit is set to 4, which means that the first version of the @sum@ function is limited to at most 5 arguments, while the second version can support up to 6 arguments.
+The limit is set low due to inefficiencies in the current implementation of the \CFA expression resolver.
+There is ongoing work to improve the performance of the resolver, and with noticeable gains, the limit can be relaxed to allow longer argument lists to @ttype@ functions.
+
+C variadic syntax and @ttype@ polymorphism probably should not be mixed, since it is not clear where to draw the line to decide which arguments belong where.
+Furthermore, it might be desirable to disallow polymorphic functions to use C variadic syntax to encourage a \CFA style.
+Aside from calling C variadic functions, it is not obvious that there is anything that can be done with C variadics that could not also be done with @ttype@ parameters.
+
+Variadic templates in \CC require an ellipsis token to express that a parameter is a parameter pack and to expand a parameter pack.
+\CFA does not need an ellipsis in either case, since the type class @ttype@ is only used for variadics.
+An alternative design is to use an ellipsis combined with an existing type class.
+This approach was not taken because the largest benefit of the ellipsis token in \CC is the ability to expand a parameter pack within an expression, \eg, in fold expressions, which requires compile-time knowledge of the structure of the parameter pack, which is not available in \CFA.
+\begin{cppcode}
+template<typename... Args>
+void f(Args &... args) {
+  g(&args...);  // expand to addresses of pack elements
+}
+\end{cppcode}
+As such, the addition of an ellipsis token would be purely an aesthetic change in \CFA today.
+
+It is possible to write a type-safe variadic print routine, which can replace @printf@
+\begin{cfacode}
+struct S { int x, y; };
+forall(otype T, ttype Params |
+  { void print(T); void print(Params); })
+void print(T arg, Params rest) {
+  print(arg);
+  print(rest);
+}
+void print(char * x) { printf("%s", x); }
+void print(int x) { printf("%d", x);  }
+void print(S s) { print("{ ", s.x, ",", s.y, " }"); }
+print("s = ", (S){ 1, 2 }, "\n");
+\end{cfacode}
+This example routine showcases a variadic-template-like decomposition of the provided argument list.
+The individual @print@ routines allow printing a single element of a type.
+The polymorphic @print@ allows printing any list of types, as long as each individual type has a @print@ function.
+The individual print functions can be used to build up more complicated @print@ routines, such as for @S@, which is something that cannot be done with @printf@ in C.
+
+It is also possible to use @ttype@ polymorphism to provide arbitrary argument forwarding functions.
+For example, it is possible to write @new@ as a library function.
+\begin{cfacode}
+struct Array;
+void ?{}(Array *, int, int, int);
+
+forall(dtype T, ttype Params | sized(T) | { void ?{}(T *, Params); })
+T * new(Params p) {
+  return malloc(){ p }; // construct result of malloc
+}
+Array * x = new(1, 2, 3);
+\end{cfacode}
+In the call to @new@, @Array@ is selected to match @T@, and @Params@ is expanded to match @[int, int, int, int]@. To satisfy the assertions, a constructor with an interface compatible with @void ?{}(Array *, int, int, int)@ must exist in the current scope.
+
+The @new@ function provides the combination of polymorphic @malloc@ with a constructor call, so that it becomes impossible to forget to construct dynamically-allocated objects.
+This approach provides the type-safety of @new@ in \CC, without the need to specify the allocated type, thanks to return-type inference.
+
+\section{Implementation}
+
+The definition of @new@
+\begin{cfacode}
+forall(dtype T | sized(T)) T * malloc();
+
+forall(dtype T, ttype Params | sized(T) | { void ?{}(T *, Params); })
+T * new(Params p) {
+  return malloc(){ p }; // construct result of malloc
+}
+\end{cfacode}
+generates the following
+\begin{cfacode}
+void *malloc(long unsigned int _sizeof_T, long unsigned int _alignof_T);
+
+void *new(
+  void (*_adapter_)(void (*)(), void *, void *),
+  long unsigned int _sizeof_T,
+  long unsigned int _alignof_T,
+  long unsigned int _sizeof_Params,
+  long unsigned int _alignof_Params,
+  void (* _ctor_T)(void *, void *),
+  void *p
+){
+  void *_retval_new;
+  void *_tmp_cp_ret0;
+  void *_tmp_ctor_expr0;
+  _retval_new=
+    (_adapter_(_ctor_T,
+      (_tmp_ctor_expr0=(_tmp_cp_ret0=malloc(_sizeof_2tT, _alignof_2tT),
+        _tmp_cp_ret0)),
+      p),
+    _tmp_ctor_expr0); // ?{}
+  *(void **)&_tmp_cp_ret0; // ^?{}
+  return _retval_new;
+}
+\end{cfacode}
+The constructor for @T@ is called indirectly through the adapter function on the result of @malloc@ and the parameter pack.
+The variable that is allocated and constructed is then returned from @new@.
+
+A call to @new@
+\begin{cfacode}
+struct S { int x, y; };
+void ?{}(S *, int, int);
+
+S * s = new(3, 4);
+\end{cfacode}
+Generates the following
+\begin{cfacode}
+struct _tuple2_ {  // _tuple2_(T0, T1)
+  void *field_0;
+  void *field_1;
+};
+struct _conc__tuple2_0 {  // _tuple2_(int, int)
+  int field_0;
+  int field_1;
+};
+struct _conc__tuple2_0 _tmp_cp1;  // tuple argument to new
+struct S *_tmp_cp_ret1;           // return value from new
+void _thunk0(  // ?{}(S *, [int, int])
+  struct S *_p0,
+  struct _conc__tuple2_0 _p1
+){
+  _ctor_S(_p0, _p1.field_0, _p1.field_1);  // restructure tuple parameter
+}
+void _adapter(void (*_adaptee)(), void *_p0, void *_p1){
+  // apply adaptee to arguments after casting to actual types
+  ((void (*)(struct S *, struct _conc__tuple2_0))_adaptee)(
+    _p0,
+    *(struct _conc__tuple2_0 *)_p1
+  );
+}
+struct S *s = (struct S *)(_tmp_cp_ret1=
+  new(
+    _adapter,
+    sizeof(struct S),
+    __alignof__(struct S),
+    sizeof(struct _conc__tuple2_0),
+    __alignof__(struct _conc__tuple2_0),
+    (void (*)(void *, void *))&_thunk0,
+    (({ // copy construct tuple argument to new
+      int *__multassign_L0 = (int *)&_tmp_cp1.field_0;
+      int *__multassign_L1 = (int *)&_tmp_cp1.field_1;
+      int __multassign_R0 = 3;
+      int __multassign_R1 = 4;
+      ((*__multassign_L0=__multassign_R0 /* ?{} */) ,
+       (*__multassign_L1=__multassign_R1 /* ?{} */));
+    }), &_tmp_cp1)
+  ), _tmp_cp_ret1);
+*(struct S **)&_tmp_cp_ret1; // ^?{}  // destroy return value from new
+({  // destroy argument temporary
+  int *__massassign_L0 = (int *)&_tmp_cp1.field_0;
+  int *__massassign_L1 = (int *)&_tmp_cp1.field_1;
+  ((*__massassign_L0 /* ^?{} */) , (*__massassign_L1 /* ^?{} */));
+});
+\end{cfacode}
+Of note, @_thunk0@ is generated to translate calls to @?{}(S *, [int, int])@ into calls to @?{}(S *, int, int)@.
+The call to @new@ constructs a tuple argument using the supplied arguments.
+
+The @print@ function
+\begin{cfacode}
+forall(otype T, ttype Params |
+  { void print(T); void print(Params); })
+void print(T arg, Params rest) {
+  print(arg);
+  print(rest);
+}
+\end{cfacode}
+generates the following
+\begin{cfacode}
+void print_variadic(
+  void (*_adapterF_7tParams__P)(void (*)(), void *),
+  void (*_adapterF_2tT__P)(void (*)(), void *),
+  void (*_adapterF_P2tT2tT__MP)(void (*)(), void *, void *),
+  void (*_adapterF2tT_P2tT2tT_P_MP)(void (*)(), void *, void *, void *),
+  long unsigned int _sizeof_T,
+  long unsigned int _alignof_T,
+  long unsigned int _sizeof_Params,
+  long unsigned int _alignof_Params,
+  void *(*_assign_TT)(void *, void *),
+  void (*_ctor_T)(void *),
+  void (*_ctor_TT)(void *, void *),
+  void (*_dtor_T)(void *),
+  void (*print_T)(void *),
+  void (*print_Params)(void *),
+  void *arg,
+  void *rest
+){
+  void *_tmp_cp0 = __builtin_alloca(_sizeof_T);
+  _adapterF_2tT__P(  // print(arg)
+    ((void (*)())print_T),
+    (_adapterF_P2tT2tT__MP( // copy construct argument
+      ((void (*)())_ctor_TT),
+      _tmp_cp0,
+      arg
+    ), _tmp_cp0)
+  );
+  _dtor_T(_tmp_cp0);  // destroy argument temporary
+  _adapterF_7tParams__P(  // print(rest)
+    ((void (*)())print_Params),
+    rest
+  );
+}
+\end{cfacode}
+The @print_T@ routine is called indirectly through an adapter function with a copy constructed argument, followed by an indirect call to @print_Params@.
+
+A call to print
+\begin{cfacode}
+void print(const char * x) { printf("%s", x); }
+void print(int x) { printf("%d", x);  }
+
+print("x = ", 123, ".\n");
+\end{cfacode}
+generates the following
+\begin{cfacode}
+void print_string(const char *x){
+  int _tmp_cp_ret0;
+  (_tmp_cp_ret0=printf("%s", x)) , _tmp_cp_ret0;
+  *(int *)&_tmp_cp_ret0; // ^?{}
+}
+void print_int(int x){
+  int _tmp_cp_ret1;
+  (_tmp_cp_ret1=printf("%d", x)) , _tmp_cp_ret1;
+  *(int *)&_tmp_cp_ret1; // ^?{}
+}
+
+struct _tuple2_ {  // _tuple2_(T0, T1)
+  void *field_0;
+  void *field_1;
+};
+struct _conc__tuple2_0 {  // _tuple2_(int, const char *)
+  int field_0;
+  const char *field_1;
+};
+struct _conc__tuple2_0 _tmp_cp6;  // _tuple2_(int, const char *)
+const char *_thunk0(const char **_p0, const char *_p1){
+        // const char * ?=?(const char **, const char *)
+  return *_p0=_p1;
+}
+void _thunk1(const char **_p0){ // void ?{}(const char **)
+  *_p0; // ?{}
+}
+void _thunk2(const char **_p0, const char *_p1){
+        // void ?{}(const char **, const char *)
+  *_p0=_p1; // ?{}
+}
+void _thunk3(const char **_p0){ // void ^?{}(const char **)
+  *_p0; // ^?{}
+}
+void _thunk4(struct _conc__tuple2_0 _p0){
+        // void print([int, const char *])
+  struct _tuple1_ { // _tuple1_(T0)
+    void *field_0;
+  };
+  struct _conc__tuple1_1 { // _tuple1_(const char *)
+    const char *field_0;
+  };
+  void _thunk5(struct _conc__tuple1_1 _pp0){ // void print([const char *])
+    print_string(_pp0.field_0);  // print(rest.0)
+  }
+  void _adapter_i_pii_(
+    void (*_adaptee)(),
+    void *_ret,
+    void *_p0,
+    void *_p1
+  ){
+    *(int *)_ret=((int (*)(int *, int))_adaptee)(_p0, *(int *)_p1);
+  }
+  void _adapter_pii_(void (*_adaptee)(), void *_p0, void *_p1){
+    ((void (*)(int *, int ))_adaptee)(_p0, *(int *)_p1);
+  }
+  void _adapter_i_(void (*_adaptee)(), void *_p0){
+    ((void (*)(int))_adaptee)(*(int *)_p0);
+  }
+  void _adapter_tuple1_5_(void (*_adaptee)(), void *_p0){
+    ((void (*)(struct _conc__tuple1_1 ))_adaptee)(
+      *(struct _conc__tuple1_1 *)_p0
+    );
+  }
+  print_variadic(
+    _adapter_tuple1_5,
+    _adapter_i_,
+    _adapter_pii_,
+    _adapter_i_pii_,
+    sizeof(int),
+    __alignof__(int),
+    sizeof(struct _conc__tuple1_1),
+    __alignof__(struct _conc__tuple1_1),
+    (void *(*)(void *, void *))_assign_i,    // int ?=?(int *, int)
+    (void (*)(void *))_ctor_i,               // void ?{}(int *)
+    (void (*)(void *, void *))_ctor_ii,      // void ?{}(int *, int)
+    (void (*)(void *))_dtor_ii,              // void ^?{}(int *)
+    (void (*)(void *))print_int,             // void print(int)
+    (void (*)(void *))&_thunk5,              // void print([const char *])
+    &_p0.field_0,                            // rest.0
+    &(struct _conc__tuple1_1 ){ _p0.field_1 }// [rest.1]
+  );
+}
+struct _tuple1_ {  // _tuple1_(T0)
+  void *field_0;
+};
+struct _conc__tuple1_6 {  // _tuple_1(const char *)
+  const char *field_0;
+};
+const char *_temp0;
+_temp0="x = ";
+void _adapter_pstring_pstring_string(
+  void (*_adaptee)(),
+  void *_ret,
+  void *_p0,
+  void *_p1
+){
+  *(const char **)_ret=
+    ((const char *(*)(const char **, const char *))_adaptee)(
+      _p0,
+      *(const char **)_p1
+    );
+}
+void _adapter_pstring_string(void (*_adaptee)(), void *_p0, void *_p1){
+  ((void (*)(const char **, const char *))_adaptee)(
+    _p0,
+    *(const char **)_p1
+  );
+}
+void _adapter_string_(void (*_adaptee)(), void *_p0){
+  ((void (*)(const char *))_adaptee)(*(const char **)_p0);
+}
+void _adapter_tuple2_0_(void (*_adaptee)(), void *_p0){
+  ((void (*)(struct _conc__tuple2_0 ))_adaptee)(
+    *(struct _conc__tuple2_0 *)_p0
+  );
+}
+print_variadic(
+  _adapter_tuple2_0_,
+  _adapter_string_,
+  _adapter_pstring_string_,
+  _adapter_pstring_pstring_string_,
+  sizeof(const char *),
+  __alignof__(const char *),
+  sizeof(struct _conc__tuple2_0 ),
+  __alignof__(struct _conc__tuple2_0 ),
+  &_thunk0,     // const char * ?=?(const char **, const char *)
+  &_thunk1,     // void ?{}(const char **)
+  &_thunk2,     // void ?{}(const char **, const char *)
+  &_thunk3,     // void ^?{}(const char **)
+  print_string, // void print(const char *)
+  &_thunk4,     // void print([int, const char *])
+  &_temp0,                             // "x = "
+  (({  // copy construct tuple argument to print
+    int *__multassign_L0 = (int *)&_tmp_cp6.field_0;
+    const char **__multassign_L1 = (const char **)&_tmp_cp6.field_1;
+    int __multassign_R0 = 123;
+    const char *__multassign_R1 = ".\n";
+    ((*__multassign_L0=__multassign_R0 /* ?{} */),
+     (*__multassign_L1=__multassign_R1 /* ?{} */));
+  }), &_tmp_cp6)                        // [123, ".\n"]
+);
+({  // destroy argument temporary
+  int *__massassign_L0 = (int *)&_tmp_cp6.field_0;
+  const char **__massassign_L1 = (const char **)&_tmp_cp6.field_1;
+  ((*__massassign_L0 /* ^?{} */) , (*__massassign_L1 /* ^?{} */));
+});
+\end{cfacode}
+The type @_tuple2_@ is generated to allow passing the @rest@ argument to @print_variadic@.
+Thunks 0 through 3 provide wrappers for the @otype@ parameters for @const char *@, while @_thunk4@ translates a call to @print([int, const char *])@ into a call to @print_variadic(int, [const char *])@.
+This all builds to a call to @print_variadic@, with the appropriate copy construction of the tuple argument.
Index: doc/theses/thierry/.gitignore
===================================================================
--- doc/theses/thierry/.gitignore	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,29 +1,0 @@
-build/*.aux
-build/*.acn
-build/*.acr
-build/*.alg
-build/*.bbl
-build/*.blg
-build/*.brf
-build/*.dvi
-build/*.glg
-build/*.glo
-build/*.gls
-build/*.idx
-build/*.ind
-build/*.ist
-build/*.lof
-build/*.log
-build/*.lol
-build/*.lot
-build/*.out
-build/*.ps
-build/*.pstex
-build/*.pstex_t
-build/*.tex
-build/*.toc
-*.pdf
-*.png
-figures/*.tex
-
-examples
Index: doc/theses/thierry/Makefile
===================================================================
--- doc/theses/thierry/Makefile	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,147 +1,0 @@
-## Define the appropriate configuration variables.
-
-TeXLIB = .:./style:./text:./annex:./build:../../LaTeXmacros:../../LaTeXmacros/listings:../../LaTeXmacros/enumitem:~/bibliographies:/usr/local/bibliographies:
-LaTeX  = TEXINPUTS=${TeXLIB} && export TEXINPUTS && latex -halt-on-error -output-directory=build -interaction=nonstopmode
-BibTeX = BIBINPUTS=${TeXLIB} && export BIBINPUTS && bibtex -terse
-
-## Define the text source files.
-
-SOURCES = ${addsuffix .tex, \
-thesis \
-style/style \
-style/cfa-format \
-annex/glossary \
-text/frontpgs \
-text/intro \
-text/basics \
-text/cforall \
-text/concurrency \
-text/internals \
-text/parallelism \
-text/results \
-text/together \
-text/future \
-}
-
-FIGURES = ${addprefix build/, ${addsuffix .tex, \
-	monitor \
-	ext_monitor \
-	int_monitor \
-	dependency \
-}}
-
-PICTURES = ${addprefix build/, ${addsuffix .pstex, \
-	system \
-	monitor_structs \
-}}
-
-PROGRAMS = ${addsuffix .tex, \
-}
-
-GRAPHS = ${addsuffix .tex, \
-}
-
-## Define the documents that need to be made.
-
-DOCUMENT = thesis.pdf
-
-# Directives #
-
-all : ${DOCUMENT}
-
-clean :
-	@rm -fv ${DOCUMENT} \
-	build/*.acn     \
-	build/*.acr     \
-	build/*.alg     \
-	build/*.aux     \
-	build/*.bbl     \
-	build/*.blg     \
-	build/*.brf     \
-	build/*.cf      \
-	build/*.dvi     \
-	build/*.glg     \
-	build/*.glo     \
-	build/*.gls     \
-	build/*.ist     \
-	build/*.idx     \
-	build/*.ilg     \
-	build/*.ind     \
-	build/*.log     \
-	build/*.out     \
-	build/*.ps      \
-	build/*.pstex   \
-	build/*.pstex_t \
-	build/*.tex     \
-	build/*.toc     \
-	build/*.lof     \
-	build/*.lol     \
-	build/*.lot     \
-	figures/*.tex   \
-	*.png           \
-
-
-# File Dependencies #
-
-${DOCUMENT} : build/${basename ${DOCUMENT}}.ps
-	ps2pdf $<
-
-build/${basename ${DOCUMENT}}.ps : build/${basename ${DOCUMENT}}.dvi
-	dvips $< -o $@
-
-build/${basename ${DOCUMENT}}.dvi : Makefile ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} ${basename ${DOCUMENT}}.tex ../../LaTeXmacros/common.tex ../../LaTeXmacros/indexstyle annex/local.bib
-
-	@ if [ ! -r ${basename $@}.ind ] ; then touch ${basename $@}.ind ; fi 				# Conditionally create an empty *.ind (index) file for inclusion until makeindex is run.
-	@ echo "Citation lookup"											# Must have *.aux file containing citations for bibtex
-	@ if [ ! -r ${basename $@}.aux ] ; then ${LaTeX} ${basename ${notdir $@}}.tex ; fi
-	@ echo "Citation Pass 1"
-	@ -${BibTeX} ${basename $@}											# Some citations reference others so run steps again to resolve these citations
-	@ echo "Citation Pass 2"
-	@ ${LaTeX} ${basename ${notdir $@}}.tex
-	@ -${BibTeX} ${basename $@}
-	@ echo "Glossary"
-	@ makeglossaries -q -s ${basename $@}.ist ${basename $@}						# Make index from *.aux entries and input index at end of document
-	@ echo ".dvi generation"
-	@ -build/bump_ver.sh
-	@ ${LaTeX} ${basename ${notdir $@}}.tex									# Run again to get index title into table of contents
-
-
-predefined :
-	sed -f predefined.sed ${basename ${DOCUMENT}}.tex > ${basename $@}.cf
-
-## Define the default recipes.
-
-build/%.tex : figures/%.fig
-	fig2dev -L eepic $< > $@
-
-build/%.ps : figures/%.fig
-	fig2dev -L ps $< > $@
-
-build/%.pstex : figures/%.fig
-	fig2dev -L pstex $< > $@
-	fig2dev -L pstex_t -p $@ $< > $@_t
-
-figures/%.tex: build/%.pstex
-	echo -n 	"\documentclass[preview]{standalone}\n" 	\
-			"\usepackage[T1]{fontenc}\n" 			\
-			"\usepackage[usenames]{color}\n" 		\
-			"\usepackage{graphicx}\n" 			\
-			"\usepackage{listings}\n" 			\
-			"\usepackage{xspace}\n" 			\
-			"\input{style}\n" 				\
-			"\\\\begin{document}\n"				\
-			"{\\\\resizebox{3\\\\textwidth}{!}{\input{${basename ${notdir $@}}.pstex_t}}}\n" \
-			"\end{document}" > $@
-
-%.png : build/%.pstex figures/%.tex
-	echo ${basename $@}
-	${LaTeX} figures/${basename $@}.tex
-	dvips build/${basename $@}.dvi -o build/${basename $@}.ps
-	ps2pdf build/${basename $@}.ps
-	convert -negate ${basename $@}.pdf $@
-
-
-
-# Local Variables: #
-# compile-command: "make" #
-# End: #
Index: doc/theses/thierry/annex/glossary.tex
===================================================================
--- doc/theses/thierry/annex/glossary.tex	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,101 +1,0 @@
-\makeglossaries
-
-\longnewglossaryentry{callsite-locking}
-{name={callsite-locking}}
-{
-Locking done by the calling routine. With this technique, a routine calling a monitor routine aquires the monitor \emph{before} making the call to the actuall routine.
-}
-
-\longnewglossaryentry{entry-point-locking}
-{name={entry-point-locking}}
-{
-Locking done by the called routine. With this technique, a monitor routine called by another routine aquires the monitor \emph{after} entering the routine body but prior to any other code.
-}
-
-\longnewglossaryentry{bulk-acq}
-{name={bulk-acquiring}}
-{
-Implicitly acquiring several monitors when entering a monitor.
-}
-
-\longnewglossaryentry{multi-acq}
-{name={multiple-acquisition}}
-{
-Any locking technique that allows a single thread to acquire the same lock multiple times.
-}
-
-\longnewglossaryentry{mon-ctx}
-{name={monitor context}}
-{
-The state of the current thread regarding which monitors are owned.
-}
-
-
-\longnewglossaryentry{uthread}
-{name={user-level thread}}
-{
-Threads created and managed inside user-space. Each thread has its own stack and its own thread of execution. User-level threads are invisible to the underlying operating system.
-
-\textit{Synonyms : User threads, Lightweight threads, Green threads, Virtual threads, Tasks.}
-}
-
-\longnewglossaryentry{kthread}
-{name={kernel-level thread}}
-{
-Threads created and managed inside kernel-space. Each thread has its own stack and its own thread of execution. Kernel-level threads are owned, managed and scheduled by the underlying operating system.
-
-\textit{Synonyms : OS threads, Hardware threads, Physical threads.}
-}
-
-\longnewglossaryentry{fiber}
-{name={fiber}}
-{
-Fibers are non-preemptive user-level threads. They share most of the caracteristics of user-level threads except that they cannot be preempted by another fiber.
-
-\textit{Synonyms : Tasks.}
-}
-
-\longnewglossaryentry{job}
-{name={job}}
-{
-Unit of work, often sent to a thread pool or worker pool to be executed. Has neither its own stack nor its own thread of execution.
-
-\textit{Synonyms : Tasks.}
-}
-
-\longnewglossaryentry{pool}
-{name={thread-pool}}
-{
-Group of homogeneuous threads that loop executing units of works after another.
-
-\textit{Synonyms : }
-}
-
-\longnewglossaryentry{cfacluster}
-{name={cluster}}
-{
-A group of \gls{kthread} executed in isolation.
-
-\textit{Synonyms : None.}
-}
-
-\longnewglossaryentry{cfathread}
-{name={thread}}
-{
-User level threads that are the default in \CFA. Generally declared using the \code{thread} keyword.
-
-\textit{Synonyms : None.}
-}
-
-\longnewglossaryentry{preemption}
-{name={preemption}}
-{
-Involuntary context switch imposed on threads at a specified rate.
-
-\textit{Synonyms : None.}
-}
-
-\newacronym{tls}{TLS}{Thread Local Storage}
-\newacronym{api}{API}{Application Program Interface}
-\newacronym{raii}{RAII}{Resource Acquisition Is Initialization}
-\newacronym{numa}{NUMA}{Non-Uniform Memory Access}
Index: doc/theses/thierry/annex/local.bib
===================================================================
--- doc/theses/thierry/annex/local.bib	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,150 +1,0 @@
-%    Predefined journal names:
-%  acmcs: Computing Surveys		acta: Acta Infomatica
-%  cacm: Communications of the ACM
-%  ibmjrd: IBM J. Research & Development ibmsj: IBM Systems Journal
-%  ieeese: IEEE Trans. on Soft. Eng.	ieeetc: IEEE Trans. on Computers
-%  ieeetcad: IEEE Trans. on Computer-Aided Design of Integrated Circuits
-%  ipl: Information Processing Letters	jacm: Journal of the ACM
-%  jcss: J. Computer & System Sciences	scp: Science of Comp. Programming
-%  sicomp: SIAM J. on Computing		tocs: ACM Trans. on Comp. Systems
-%  tods: ACM Trans. on Database Sys.	tog: ACM Trans. on Graphics
-%  toms: ACM Trans. on Math. Software	toois: ACM Trans. on Office Info. Sys.
-%  toplas: ACM Trans. on Prog. Lang. & Sys.
-%  tcs: Theoretical Computer Science
-@string{ieeepds="IEEE Transactions on Parallel and Distributed Systems"}
-@string{ieeese="IEEE Transactions on Software Engineering"}
-@string{spe="Software---\-Practice and Experience"}
-@string{sigplan="SIGPLAN Notices"}
-@string{joop="Journal of Object-Oriented Programming"}
-@string{popl="Conference Record of the ACM Symposium on Principles of Programming Languages"}
-@string{osr="Operating Systems Review"}
-@string{pldi="Programming Language Design and Implementation"}
-
-
-@article{HPP:Study,
-	keywords 	= {Parallel, Productivity},
-	author 	= {Lorin Hochstein and Jeff Carver and Forrest Shull and Sima Asgari and Victor Basili and Jeffrey K. Hollingsworth and Marvin V. Zelkowitz },
-	title 	= {Parallel Programmer Productivity: A Case Study of Novice Parallel Programmers},
-}
-
-@article{Chicken,
-	keywords	= {Chicken},
-	author	= {Doug Zongker},
-	title		= {Chicken Chicken Chicken: Chicken Chicken},
-	year		= 2006
-}
-
-@article{TBB,
-	key	= {TBB},
-	keywords 	= {Intel, TBB},
-	title 	= {Intel Thread Building Blocks},
-	note		= "\url{https://www.threadingbuildingblocks.org/}"
-}
-
-@manual{www-cfa,
-	key	= {CFA},
-	keywords 	= {Cforall},
-	author	= {C$\forall$},
-	title 	= {C$\forall$ Programmming Language},
-	note	= {\url{https://plg.uwaterloo.ca/~cforall}},
-}
-
-@mastersthesis{rob-thesis,
-	keywords 	= {Constructors, Destructors, Tuples},
-	author	= {Rob Schluntz},
-	title 	= {Resource Management and Tuples in Cforall},
-	year		= 2017,
-	school	= {University of Waterloo},
-	note	= {\url{https://uwspace.uwaterloo.ca/handle/10012/11830}},
-}
-
-@manual{Cpp-Transactions,
-	keywords	= {C++, Transactional Memory},
-	title		= {Technical Specification for C++ Extensions for Transactional Memory},
-	organization= {International Standard ISO/IEC TS 19841:2015 },
-	publisher   = {American National Standards Institute},
-	address	= {http://www.iso.org},
-	year		= 2015,
-}
-
-@article{BankTransfer,
-	key	= {Bank Transfer},
-	keywords 	= {Bank Transfer},
-	title 	= {Bank Account Transfer Problem},
-	publisher	= {Wiki Wiki Web},
-	address	= {http://wiki.c2.com},
-	year		= 2010
-}
-
-@misc{2FTwoHardThings,
-	keywords 	= {Hard Problem},
-	title 	= {TwoHardThings},
-	author	= {Martin Fowler},
-	howpublished= "\url{https://martinfowler.com/bliki/TwoHardThings.html}",
-	year		= 2009
-}
-
-@article{IntrusiveData,
-	title		= {Intrusive Data Structures},
-	author	= {Jiri Soukup},
-	journal	= {CppReport},
-	year		= 1998,
-	month		= May,
-	volume	= {10/No5.},
-	page		= 22
-}
-
-@article{Herlihy93,
-	author	= {Herlihy, Maurice and Moss, J. Eliot B.},
-	title	= {Transactional memory: architectural support for lock-free data structures},
-	journal	= {SIGARCH Comput. Archit. News},
-	issue_date	= {May 1993},
-	volume	= {21},
-	number	= {2},
-	month	= may,
-	year	= {1993},
-	pages	= {289--300},
-	numpages	= {12},
-	publisher	= {ACM},
-	address	= {New York, NY, USA},
-}
-
-@manual{affinityLinux,
-	key	= {TBB},
-	title		= "{Linux man page - sched\_setaffinity(2)}"
-}
-
-@manual{affinityWindows,
-	title		= "{Windows (vs.85) - SetThreadAffinityMask function}"
-}
-
-@manual{switchToWindows,
-	title		= "{Windows (vs.85) - SwitchToFiber function}"
-}
-
-@manual{affinityFreebsd,
-	title		= "{FreeBSD General Commands Manual - CPUSET(1)}"
-}
-
-@manual{affinityNetbsd,
-	title		= "{NetBSD Library Functions Manual - AFFINITY(3)}"
-}
-
-@manual{affinityMacosx,
-	title		= "{Affinity API Release Notes for OS X v10.5}"
-}
-
-@misc{NodeJs,
-	title		= "{Node.js}",
-	howpublished= "\url{https://nodejs.org/en/}",
-}
-
-@misc{SpringMVC,
-	title		= "{Spring Web MVC}",
-	howpublished= "\url{https://docs.spring.io/spring/docs/current/spring-framework-reference/web.html}",
-}
-
-@misc{Django,
-	title		= "{Django}",
-	howpublished= "\url{https://www.djangoproject.com/}",
-}
Index: doc/theses/thierry/build/bump_ver.sh
===================================================================
--- doc/theses/thierry/build/bump_ver.sh	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,6 +1,0 @@
-#!/bin/bash
-if [ ! -f version ]; then
-    echo "0.0.0" > version
-fi
-
-sed -r 's/([0-9]+\.[0-9]+.)([0-9]+)/echo "\1\$((\2+1))" > version/ge' version > /dev/null
Index: doc/theses/thierry/figures/dependency.fig
===================================================================
--- doc/theses/thierry/figures/dependency.fig	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,119 +1,0 @@
-#FIG 3.2  Produced by xfig version 3.2.5c
-Landscape
-Center
-Inches
-Letter  
-100.00
-Single
--2
-1200 2
-6 750 2250 2250 2850
-1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 1050 2550 300 300 750 2550 1350 2550
-4 0 0 50 -1 0 20 0.0000 2 315 1305 900 2700 $\\alpha$3\001
--6
-6 750 1350 2250 1950
-1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 1050 1650 300 300 750 1650 1350 1650
-4 0 0 50 -1 0 20 0.0000 2 315 1305 900 1800 $\\alpha$2\001
--6
-6 750 450 2250 1050
-1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 1050 750 300 300 750 750 1350 750
-4 0 0 50 -1 0 20 0.0000 2 315 1305 900 900 $\\alpha$1\001
--6
-6 750 3150 2250 3750
-1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 1050 3450 300 300 750 3450 1350 3450
-4 0 0 50 -1 0 20 0.0000 2 315 1305 900 3600 $\\alpha$4\001
--6
-6 750 4050 2250 4650
-1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 1050 4350 300 300 750 4350 1350 4350
-4 0 0 50 -1 0 20 0.0000 2 315 1305 900 4500 $\\alpha$5\001
--6
-6 3000 1350 4800 1950
-1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 3300 1650 300 300 3000 1650 3600 1650
-4 0 0 50 -1 0 20 0.0000 2 315 1560 3150 1800 $\\gamma$2\001
--6
-6 3000 450 4800 1050
-1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 3300 750 300 300 3000 750 3600 750
-4 0 0 50 -1 0 20 0.0000 2 315 1560 3150 900 $\\gamma$1\001
--6
-6 3000 2250 4800 2850
-6 3000 2250 3600 2850
-6 3000 2250 3600 2850
-1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 3300 2550 300 300 3000 2550 3600 2550
--6
--6
-4 0 0 50 -1 0 20 0.0000 2 315 1560 3150 2700 $\\gamma$3\001
--6
-6 3000 3150 4800 3750
-1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 3300 3450 300 300 3000 3450 3600 3450
-4 0 0 50 -1 0 20 0.0000 2 315 1560 3150 3600 $\\gamma$4\001
--6
-6 3000 4050 4800 4650
-1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 3300 4350 300 300 3000 4350 3600 4350
-4 0 0 50 -1 0 20 0.0000 2 315 1560 3150 4500 $\\gamma$5\001
--6
-6 3000 4950 4800 5550
-1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 3300 5250 300 300 3000 5250 3600 5250
-4 0 0 50 -1 0 20 0.0000 2 315 1560 3150 5400 $\\gamma$6\001
--6
-6 5400 1800 6750 4200
-6 5400 1800 6750 2400
-1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 5700 2100 300 300 5400 2100 6000 2100
-4 0 0 50 -1 0 20 0.0000 2 270 1140 5550 2250 $\\beta$1\001
--6
-6 5400 2700 6750 3300
-1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 5700 3000 300 300 5400 3000 6000 3000
-4 0 0 50 -1 0 20 0.0000 2 270 1140 5550 3150 $\\beta$2\001
--6
-6 5400 3600 6750 4200
-1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 5700 3900 300 300 5400 3900 6000 3900
-4 0 0 50 -1 0 20 0.0000 2 270 1140 5550 4050 $\\beta$3\001
--6
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 1.00 60.00 120.00
-	 5700 2700 5700 2400
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 1.00 60.00 120.00
-	 5700 3600 5700 3300
--6
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 1.00 60.00 120.00
-	 1050 1350 1050 1050
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 1.00 60.00 120.00
-	 3300 1350 3300 1050
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 1.00 60.00 120.00
-	 3300 2250 3300 1950
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 1.00 60.00 120.00
-	 1050 2250 1050 1950
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 1.00 60.00 120.00
-	 1050 3150 1050 2850
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 1.00 60.00 120.00
-	 3300 3150 3300 2850
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 1.00 60.00 120.00
-	 1050 4050 1050 3750
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 1.00 60.00 120.00
-	 3300 4050 3300 3750
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 1.00 60.00 120.00
-	 3300 4950 3300 4650
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 1.00 60.00 120.00
-	 1350 2550 3000 2550
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 1 2
-	1 1 1.00 60.00 120.00
-	 1350 3450 3000 3450
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 1 2
-	1 1 1.00 60.00 120.00
-	 3000 5175 1350 4500
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 1 2
-	1 1 1.00 60.00 120.00
-	 5462 4060 3582 5156
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 1 2
-	1 1 1.00 60.00 120.00
-	 3564 4198 5438 3144
Index: doc/theses/thierry/figures/ext_monitor.fig
===================================================================
--- doc/theses/thierry/figures/ext_monitor.fig	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,96 +1,0 @@
-#FIG 3.2  Produced by xfig version 3.2.5c
-Landscape
-Center
-Inches
-Letter  
-100.00
-Single
--2
-1200 2
-5 1 0 1 -1 -1 0 0 -1 0.000 0 1 0 0 3150.000 3450.000 3150 3150 2850 3450 3150 3750
-5 1 0 1 -1 -1 0 0 -1 0.000 0 1 0 0 3150.000 4350.000 3150 4050 2850 4350 3150 4650
-6 5850 1950 6150 2250
-1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 6000 2100 105 105 6000 2100 6105 2205
-4 1 -1 0 0 0 10 0.0000 2 105 90 6000 2160 d\001
--6
-6 5100 2100 5400 2400
-1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 5250 2250 105 105 5250 2250 5355 2250
-4 1 -1 0 0 0 10 0.0000 2 105 120 5250 2295 X\001
--6
-6 5100 1800 5400 2100
-1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 5250 1950 105 105 5250 1950 5355 1950
-4 1 -1 0 0 0 10 0.0000 2 105 120 5250 2010 Y\001
--6
-6 5850 1650 6150 1950
-1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 6000 1800 105 105 6000 1800 6105 1905
-4 1 -1 0 0 0 10 0.0000 2 105 90 6000 1860 b\001
--6
-6 3070 5445 7275 5655
-1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3150 5550 80 80 3150 5550 3230 5630
-1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4500 5550 105 105 4500 5550 4605 5655
-1 3 0 1 -1 -1 0 0 4 0.000 1 0.0000 6000 5550 105 105 6000 5550 6105 5655
-4 0 -1 0 0 0 12 0.0000 2 135 1035 4725 5625 blocked task\001
-4 0 -1 0 0 0 12 0.0000 2 135 870 3300 5625 active task\001
-4 0 -1 0 0 0 12 0.0000 2 135 1050 6225 5625 routine mask\001
--6
-1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 3300 3600 105 105 3300 3600 3405 3705
-1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 3600 3600 105 105 3600 3600 3705 3705
-1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 6600 3900 105 105 6600 3900 6705 4005
-1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 6900 3900 105 105 6900 3900 7005 4005
-1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 6000 2700 105 105 6000 2700 6105 2805
-1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 6000 2400 105 105 6000 2400 6105 2505
-1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 5100 4575 80 80 5100 4575 5180 4655
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 4050 2925 5475 2925 5475 3225 4050 3225 4050 2925
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4
-	 3150 3750 3750 3750 3750 4050 3150 4050
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 3
-	 3150 3450 3750 3450 3900 3675
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
-	 3750 3150 3600 3375
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 3
-	 3150 4350 3750 4350 3900 4575
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
-	 3750 4050 3600 4275
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4
-	 3150 4650 3750 4650 3750 4950 4950 4950
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
-	 6450 3750 6300 3975
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
-	 4950 4950 5175 5100
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 9
-	 5250 4950 6450 4950 6450 4050 7050 4050 7050 3750 6450 3750
-	 6450 2850 6150 2850 6150 1650
-2 2 1 1 -1 -1 0 0 -1 4.000 0 0 0 0 0 5
-	 5850 4200 5850 3300 4350 3300 4350 4200 5850 4200
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2
-	1 1 1.00 60.00 120.00
-	7 1 1.00 60.00 120.00
-	 5250 3150 5250 2400
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 3150 3150 3750 3150 3750 2850 5700 2850 5700 1650
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
-	 5700 2850 6150 3000
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 5100 1800 5400 1800 5400 2400 5100 2400 5100 1800
-4 1 -1 0 0 0 10 0.0000 2 75 75 6000 2745 a\001
-4 1 -1 0 0 0 10 0.0000 2 75 75 6000 2445 c\001
-4 1 -1 0 0 0 12 0.0000 2 135 315 5100 5325 exit\001
-4 1 -1 0 0 0 12 0.0000 2 135 135 3300 3075 A\001
-4 1 -1 0 0 0 12 0.0000 2 135 795 3300 4875 condition\001
-4 1 -1 0 0 0 12 0.0000 2 135 135 3300 5100 B\001
-4 0 -1 0 0 0 12 0.0000 2 135 420 6600 3675 stack\001
-4 0 -1 0 0 0 12 0.0000 2 180 750 6600 3225 acceptor/\001
-4 0 -1 0 0 0 12 0.0000 2 180 750 6600 3450 signalled\001
-4 1 -1 0 0 0 12 0.0000 2 135 795 3300 2850 condition\001
-4 1 -1 0 0 0 12 0.0000 2 165 420 6000 1350 entry\001
-4 1 -1 0 0 0 12 0.0000 2 135 495 6000 1575 queue\001
-4 0 -1 0 0 0 12 0.0000 2 135 525 6300 2400 arrival\001
-4 0 -1 0 0 0 12 0.0000 2 135 630 6300 2175 order of\001
-4 1 -1 0 0 0 12 0.0000 2 135 525 5100 3675 shared\001
-4 1 -1 0 0 0 12 0.0000 2 135 735 5100 3975 variables\001
-4 0 0 50 -1 0 11 0.0000 2 165 855 4275 3150 Acceptables\001
-4 0 0 50 -1 0 11 0.0000 2 120 165 5775 2700 W\001
-4 0 0 50 -1 0 11 0.0000 2 120 135 5775 2400 X\001
-4 0 0 50 -1 0 11 0.0000 2 120 105 5775 2100 Z\001
-4 0 0 50 -1 0 11 0.0000 2 120 135 5775 1800 Y\001
Index: doc/theses/thierry/figures/int_monitor.fig
===================================================================
--- doc/theses/thierry/figures/int_monitor.fig	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,109 +1,0 @@
-#FIG 3.2  Produced by xfig version 3.2.5c
-Landscape
-Center
-Inches
-Letter  
-100.00
-Single
--2
-1200 2
-5 1 0 1 0 7 50 -1 -1 0.000 0 1 0 0 675.000 2700.000 675 2400 375 2700 675 3000
-6 4533 2866 4655 3129
-5 1 0 1 0 7 50 -1 -1 0.000 0 1 0 0 4657.017 2997.000 4655 2873 4533 2997 4655 3121
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
-	 4655 2866 4655 3129
--6
-6 4725 2866 4847 3129
-5 1 0 1 0 7 50 -1 -1 0.000 0 1 0 0 4849.017 2997.000 4847 2873 4725 2997 4847 3121
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
-	 4847 2866 4847 3129
--6
-6 4911 2866 5033 3129
-5 1 0 1 0 7 50 -1 -1 0.000 0 1 0 0 5035.017 2997.000 5033 2873 4911 2997 5033 3121
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
-	 5033 2866 5033 3129
--6
-6 9027 2866 9149 3129
-5 1 0 1 0 7 50 -1 -1 0.000 0 0 0 0 9024.983 2997.000 9027 2873 9149 2997 9027 3121
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
-	 9027 2866 9027 3129
--6
-6 9253 2866 9375 3129
-5 1 0 1 0 7 50 -1 -1 0.000 0 0 0 0 9250.983 2997.000 9253 2873 9375 2997 9253 3121
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
-	 9253 2866 9253 3129
--6
-6 9478 2866 9600 3129
-5 1 0 1 0 7 50 -1 -1 0.000 0 0 0 0 9475.983 2997.000 9478 2873 9600 2997 9478 3121
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
-	 9478 2866 9478 3129
--6
-1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 7650 3675 80 80 7650 3675 7730 3755
-1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3150 3675 80 80 3150 3675 3230 3755
-1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 4047 1793 125 125 4047 1793 3929 1752
-1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 4050 1500 125 125 4050 1500 3932 1459
-1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 8550 1500 125 125 8550 1500 8432 1459
-1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 8550 1800 125 125 8550 1800 8432 1759
-1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 1200 2850 125 125 1200 2850 1082 2809
-1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 900 2850 125 125 900 2850 782 2809
-1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 6000 4650 105 105 6000 4650 6105 4755
-1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3900 4650 80 80 3900 4650 3980 4730
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
-	 3900 1950 4200 2100
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 5
-	 3000 4050 1800 4050 1800 1950 3900 1950 3900 1350
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 9
-	 7800 4050 9000 4050 9000 3150 9600 3150 9600 2850 9000 2850
-	 9000 1950 8700 1950 8700 1350
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
-	 8400 1950 8700 2100
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 9
-	 3300 4050 4500 4050 4500 3150 5100 3150 5100 2850 4500 2850
-	 4500 1950 4200 1950 4200 1350
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 5
-	 7500 4050 6300 4050 6300 1950 8400 1950 8400 1350
-2 2 1 1 -1 -1 0 0 -1 4.000 0 0 0 0 0 5
-	 8400 3300 8400 2400 6900 2400 6900 3300 8400 3300
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
-	 9000 2850 8850 3150
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
-	 7500 4050 7800 4200
-2 2 1 1 -1 -1 0 0 -1 4.000 0 0 0 0 0 5
-	 3900 3300 3900 2400 2400 2400 2400 3300 3900 3300
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
-	 4500 2850 4350 3150
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
-	 3000 4050 3300 4200
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
-	 675 3000 1425 3000
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
-	 675 2400 1425 2400
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
-	 1425 2700 1500 2925
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
-	 1425 2400 1350 2625
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
-	 675 2700 1425 2700
-4 1 -1 0 0 0 12 0.0000 2 135 315 2850 4275 exit\001
-4 1 -1 0 0 0 12 0.0000 2 135 315 7350 4275 exit\001
-4 0 -1 0 0 0 12 0.0000 2 180 750 9150 2325 acceptor/\001
-4 0 -1 0 0 0 12 0.0000 2 180 750 9150 2550 signalled\001
-4 0 -1 0 0 0 12 0.0000 2 135 420 9150 2775 stack\001
-4 1 -1 0 0 0 12 0.0000 2 135 525 7650 2775 shared\001
-4 1 -1 0 0 0 12 0.0000 2 135 735 7650 3075 variables\001
-4 1 -1 0 0 0 12 0.0000 2 135 495 8550 1275 queue\001
-4 1 -1 0 0 0 12 0.0000 2 165 420 8550 1125 entry\001
-4 0 -1 0 0 0 12 0.0000 2 135 630 8850 1575 order of\001
-4 0 -1 0 0 0 12 0.0000 2 135 525 8850 1725 arrival\001
-4 0 -1 0 0 0 12 0.0000 2 180 750 4650 2325 acceptor/\001
-4 0 -1 0 0 0 12 0.0000 2 180 750 4650 2550 signalled\001
-4 0 -1 0 0 0 12 0.0000 2 135 420 4650 2775 stack\001
-4 1 -1 0 0 0 12 0.0000 2 135 525 3150 2775 shared\001
-4 1 -1 0 0 0 12 0.0000 2 135 735 3150 3075 variables\001
-4 0 -1 0 0 0 12 0.0000 2 135 525 4350 1725 arrival\001
-4 0 -1 0 0 0 12 0.0000 2 135 630 4350 1500 order of\001
-4 1 -1 0 0 0 12 0.0000 2 135 495 4050 1275 queue\001
-4 1 -1 0 0 0 12 0.0000 2 165 420 4050 1050 entry\001
-4 0 0 50 -1 0 11 0.0000 2 120 705 600 2325 Condition\001
-4 0 -1 0 0 0 12 0.0000 2 135 1215 6150 4725 blocked thread\001
-4 0 -1 0 0 0 12 0.0000 2 135 1050 4050 4725 active thread\001
Index: doc/theses/thierry/figures/monitor.fig
===================================================================
--- doc/theses/thierry/figures/monitor.fig	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,101 +1,0 @@
-#FIG 3.2  Produced by xfig version 3.2.5c
-Landscape
-Center
-Inches
-Letter  
-100.00
-Single
--2
-1200 2
-5 1 0 1 -1 -1 0 0 -1 0.000 0 1 0 0 1500.000 2700.000 1500 2400 1200 2700 1500 3000
-5 1 0 1 -1 -1 0 0 -1 0.000 0 1 0 0 1500.000 3600.000 1500 3300 1200 3600 1500 3900
-6 4200 1200 4500 1500
-1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4350 1350 105 105 4350 1350 4455 1455
-4 1 -1 0 0 0 10 0.0000 2 105 90 4350 1410 d\001
--6
-6 4200 900 4500 1200
-1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4350 1050 105 105 4350 1050 4455 1155
-4 1 -1 0 0 0 10 0.0000 2 105 90 4350 1110 b\001
--6
-6 2400 1500 2700 1800
-1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 2550 1650 105 105 2550 1650 2655 1650
-4 1 -1 0 0 0 10 0.0000 2 105 90 2550 1710 b\001
--6
-6 2400 1800 2700 2100
-1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 2550 1950 105 105 2550 1950 2655 1950
-4 1 -1 0 0 0 10 0.0000 2 75 75 2550 1995 a\001
--6
-6 3300 1500 3600 1800
-1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 3450 1650 105 105 3450 1650 3555 1650
-4 1 -1 0 0 0 10 0.0000 2 105 90 3450 1710 d\001
--6
-6 1350 4650 5325 4950
-1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 1500 4800 80 80 1500 4800 1580 4880
-1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 2850 4800 105 105 2850 4800 2955 4905
-1 3 0 1 -1 -1 0 0 4 0.000 1 0.0000 4350 4800 105 105 4350 4800 4455 4905
-4 0 -1 0 0 0 12 0.0000 2 180 765 4575 4875 duplicate\001
-4 0 -1 0 0 0 12 0.0000 2 135 1035 3075 4875 blocked task\001
-4 0 -1 0 0 0 12 0.0000 2 135 870 1650 4875 active task\001
--6
-1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 1650 2850 105 105 1650 2850 1755 2955
-1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 1950 2850 105 105 1950 2850 2055 2955
-1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4950 3150 105 105 4950 3150 5055 3255
-1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 5250 3150 105 105 5250 3150 5355 3255
-1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4350 1950 105 105 4350 1950 4455 2055
-1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4350 1650 105 105 4350 1650 4455 1755
-1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3450 3825 80 80 3450 3825 3530 3905
-1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 3450 1950 105 105 3450 1950 3555 1950
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
-	 2400 2100 2625 2250
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
-	 3300 2100 3525 2250
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
-	 4200 2100 4425 2250
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 5
-	 1500 2400 2100 2400 2100 2100 2400 2100 2400 1500
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4
-	 1500 3000 2100 3000 2100 3300 1500 3300
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 3
-	 1500 2700 2100 2700 2250 2925
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
-	 2100 2400 1950 2625
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 3
-	 1500 3600 2100 3600 2250 3825
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
-	 2100 3300 1950 3525
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4
-	 1500 3900 2100 3900 2100 4200 3300 4200
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
-	 4800 3000 4650 3225
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
-	 3300 4200 3525 4350
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4
-	 3600 1500 3600 2100 4200 2100 4200 900
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4
-	 2700 1500 2700 2100 3300 2100 3300 1500
-2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 9
-	 3600 4200 4800 4200 4800 3300 5400 3300 5400 3000 4800 3000
-	 4800 2100 4500 2100 4500 900
-2 2 1 1 -1 -1 0 0 -1 4.000 0 0 0 0 0 5
-	 4200 3450 4200 2550 2700 2550 2700 3450 4200 3450
-4 1 -1 0 0 0 10 0.0000 2 75 75 4350 1995 a\001
-4 1 -1 0 0 0 10 0.0000 2 75 75 4350 1695 c\001
-4 1 -1 0 0 0 12 0.0000 2 135 315 3450 4575 exit\001
-4 1 -1 0 0 0 12 0.0000 2 135 135 1650 2325 A\001
-4 1 -1 0 0 0 12 0.0000 2 135 795 1650 4125 condition\001
-4 1 -1 0 0 0 12 0.0000 2 135 135 1650 4350 B\001
-4 0 -1 0 0 0 12 0.0000 2 135 420 4950 2925 stack\001
-4 0 -1 0 0 0 12 0.0000 2 180 750 4950 2475 acceptor/\001
-4 0 -1 0 0 0 12 0.0000 2 180 750 4950 2700 signalled\001
-4 1 -1 0 0 0 12 0.0000 2 135 795 1650 2100 condition\001
-4 1 -1 0 0 0 12 0.0000 2 135 135 2550 1425 X\001
-4 1 -1 0 0 0 12 0.0000 2 135 135 3450 1425 Y\001
-4 1 -1 0 0 0 12 0.0000 2 165 420 4350 600 entry\001
-4 1 -1 0 0 0 12 0.0000 2 135 495 4350 825 queue\001
-4 0 -1 0 0 0 12 0.0000 2 135 525 4650 1650 arrival\001
-4 0 -1 0 0 0 12 0.0000 2 135 630 4650 1425 order of\001
-4 1 -1 0 0 0 12 0.0000 2 135 525 3450 2925 shared\001
-4 1 -1 0 0 0 12 0.0000 2 135 735 3450 3225 variables\001
-4 1 -1 0 0 0 12 0.0000 2 120 510 3000 975 mutex\001
-4 1 -1 0 0 0 10 0.0000 2 75 75 3450 1995 c\001
-4 1 -1 0 0 0 12 0.0000 2 135 570 3000 1200 queues\001
Index: doc/theses/thierry/figures/monitor_structs.fig
===================================================================
--- doc/theses/thierry/figures/monitor_structs.fig	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,71 +1,0 @@
-#FIG 3.2  Produced by xfig version 3.2.5c
-Landscape
-Center
-Inches
-Letter  
-100.00
-Single
--2
-1200 2
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 1500 1200 2100 1200 2100 1500 1500 1500 1500 1200
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 1500 1500 2100 1500 2100 1800 1500 1800 1500 1500
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 3000 1200 3300 1200 3300 1500 3000 1500 3000 1200
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 3000 1500 3300 1500 3300 1800 3000 1800 3000 1500
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 3000 1800 3300 1800 3300 2100 3000 2100 3000 1800
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 3000 2100 3300 2100 3300 2400 3000 2400 3000 2100
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 1500 900 2100 900 2100 1200 1500 1200 1500 900
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2
-	1 1 1.00 90.00 120.00
-	5 1 1.00 45.00 90.00
-	 1800 1050 4050 1050
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 5100 900 5700 900 5700 1800 5100 1800 5100 900
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 6900 1500 7500 1500 7500 2400 6900 2400 6900 1500
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 6000 1200 6600 1200 6600 2100 6000 2100 6000 1200
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 7800 1800 8400 1800 8400 2700 7800 2700 7800 1800
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2
-	1 1 1.00 90.00 120.00
-	5 1 1.00 45.00 90.00
-	 1800 1350 3000 1350
-3 2 0 3 0 7 50 -1 -1 0.000 1 0 0 10
-	 4275 900 4050 975 4350 1050 4050 1125 4350 1200 4050 1275
-	 4350 1350 4050 1425 4350 1500 4125 1575
-	 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
-	 -1.000 0.000
-3 2 0 1 0 7 50 -1 -1 0.000 0 1 1 3
-	1 1 1.00 90.00 120.00
-	5 1 1.00 30.00 90.00
-	 3150 1950 4875 2400 6900 1650
-	 0.000 -1.000 0.000
-3 2 0 1 0 7 50 -1 -1 0.000 0 1 1 3
-	1 1 1.00 90.00 120.00
-	5 1 1.00 60.00 90.00
-	 3150 1350 4200 1650 5100 1050
-	 0.000 -1.000 0.000
-3 2 0 1 0 7 50 -1 -1 0.000 0 1 1 3
-	1 1 1.00 90.00 120.00
-	5 1 1.00 60.00 90.00
-	 3150 1650 4575 2025 6000 1350
-	 0.000 -1.000 0.000
-3 2 0 1 0 7 50 -1 -1 0.000 0 1 1 3
-	1 1 1.00 90.00 120.00
-	5 1 1.00 60.00 90.00
-	 3150 2250 5175 2775 7800 1950
-	 0.000 -1.000 0.000
-4 0 0 50 -1 0 11 0.0000 2 120 705 3000 675 Condition\001
-4 0 0 50 -1 0 11 0.0000 2 120 630 3000 885 Criterion\001
-4 0 0 50 -1 0 11 0.0000 2 120 705 1425 675 Condition\001
-4 0 0 50 -1 0 11 0.0000 2 120 390 1425 825 Node\001
-4 0 0 50 -1 0 11 0.0000 2 120 660 6225 675 Monitors\001
-4 0 0 50 -1 0 11 0.0000 2 165 555 3900 675 Waiting\001
-4 0 0 50 -1 0 11 0.0000 2 120 495 3900 825 Thread\001
Index: doc/theses/thierry/figures/system.fig
===================================================================
--- doc/theses/thierry/figures/system.fig	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,166 +1,0 @@
-#FIG 3.2  Produced by xfig version 3.2.5c
-Landscape
-Center
-Inches
-Letter  
-100.00
-Single
--2
-1200 2
-6 5175 2700 6150 3737
-3 2 0 4 0 7 49 -1 -1 0.000 1 0 0 10
-	 5475 2702 5625 2777 5325 2852 5625 2927 5325 3002 5625 3077
-	 5325 3152 5625 3227 5325 3302 5475 3377
-	 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
-	 -1.000 0.000
-4 0 0 50 -1 0 11 0.0000 2 120 885 5175 3737 Processor N\001
-4 0 0 50 -1 0 11 0.0000 2 120 975 5175 3527 PThread N+2\001
--6
-6 3300 2700 4140 3737
-3 2 0 4 0 7 49 -1 -1 0.000 1 0 0 10
-	 3600 2702 3750 2777 3450 2852 3750 2927 3450 3002 3750 3077
-	 3450 3152 3750 3227 3450 3302 3600 3377
-	 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
-	 -1.000 0.000
-4 0 0 50 -1 0 11 0.0000 2 120 840 3300 3737 Processor 0\001
-4 0 0 50 -1 0 11 0.0000 2 120 735 3300 3527 PThread 2\001
--6
-6 600 2700 1725 3737
-3 2 0 4 0 7 49 -1 -1 0.000 1 0 0 10
-	 900 2702 1050 2777 750 2852 1050 2927 750 3002 1050 3077
-	 750 3152 1050 3227 750 3302 900 3377
-	 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
-	 -1.000 0.000
-4 0 0 50 -1 0 11 0.0000 2 120 1125 600 3737 Main Processor\001
-4 0 0 50 -1 0 11 0.0000 2 120 735 600 3527 PThread 0\001
--6
-6 2100 2700 2835 3737
-3 2 0 4 0 7 49 -1 -1 0.000 1 0 0 10
-	 2400 2702 2550 2777 2250 2852 2550 2927 2250 3002 2550 3077
-	 2250 3152 2550 3227 2250 3302 2400 3377
-	 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
-	 -1.000 0.000
-4 0 0 50 -1 0 11 0.0000 2 120 450 2100 3737 Alarm\001
-4 0 0 50 -1 0 11 0.0000 2 120 735 2100 3527 PThread 1\001
--6
-6 600 6301 1290 7367
-3 2 0 2 0 7 49 -1 -1 0.000 1 0 0 10
-	 900 6302 1050 6377 750 6452 1050 6527 750 6602 1050 6677
-	 750 6752 1050 6827 750 6902 900 6977
-	 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
-	 -1.000 0.000
-4 0 0 50 -1 0 11 0.0000 2 150 690 600 7337 int main()\001
-4 0 0 50 -1 0 11 0.0000 2 120 570 600 7127 thread 0\001
--6
-6 1635 6300 2205 7336
-3 2 0 2 0 7 49 -1 -1 0.000 1 0 0 10
-	 1935 6301 2085 6376 1785 6451 2085 6526 1785 6601 2085 6676
-	 1785 6751 2085 6826 1785 6901 1935 6976
-	 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
-	 -1.000 0.000
-4 0 0 50 -1 0 11 0.0000 2 120 570 1635 7126 thread 1\001
--6
-6 2475 6300 3045 7336
-3 2 0 2 0 7 49 -1 -1 0.000 1 0 0 10
-	 2775 6301 2925 6376 2625 6451 2925 6526 2625 6601 2925 6676
-	 2625 6751 2925 6826 2625 6901 2775 6976
-	 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
-	 -1.000 0.000
-4 0 0 50 -1 0 11 0.0000 2 120 570 2475 7126 thread 2\001
--6
-6 3300 6300 3870 7336
-3 2 0 2 0 7 49 -1 -1 0.000 1 0 0 10
-	 3600 6301 3750 6376 3450 6451 3750 6526 3450 6601 3750 6676
-	 3450 6751 3750 6826 3450 6901 3600 6976
-	 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
-	 -1.000 0.000
-4 0 0 50 -1 0 11 0.0000 2 120 570 3300 7126 thread 3\001
--6
-6 5325 6300 5970 7336
-3 2 0 2 0 7 49 -1 -1 0.000 1 0 0 10
-	 5625 6301 5775 6376 5475 6451 5775 6526 5475 6601 5775 6676
-	 5475 6751 5775 6826 5475 6901 5625 6976
-	 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
-	 -1.000 0.000
-4 0 0 50 -1 0 11 0.0000 2 120 645 5325 7126 thread M\001
--6
-6 4125 6300 4695 7336
-3 2 0 2 0 7 49 -1 -1 0.000 1 0 0 10
-	 4425 6301 4575 6376 4275 6451 4575 6526 4275 6601 4575 6676
-	 4275 6751 4575 6826 4275 6901 4425 6976
-	 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
-	 -1.000 0.000
-4 0 0 50 -1 0 11 0.0000 2 120 570 4125 7126 thread 4\001
--6
-6 6975 4050 9525 7875
-2 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5
-	 7125 5400 7575 5400 7575 5850 7125 5850 7125 5400
-2 2 0 1 0 7 50 -1 18 0.000 0 1 -1 0 0 5
-	 7125 4200 7575 4200 7575 4650 7125 4650 7125 4200
-2 2 0 1 0 7 50 -1 45 0.000 0 1 -1 0 0 5
-	 7125 4800 7575 4800 7575 5250 7125 5250 7125 4800
-2 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5
-	 6975 4050 9525 4050 9525 7875 6975 7875 6975 4050
-3 2 0 2 0 7 49 -1 -1 0.000 1 0 0 10
-	 7350 6900 7500 6975 7200 7050 7500 7125 7200 7200 7500 7275
-	 7200 7350 7500 7425 7200 7500 7350 7575
-	 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
-	 -1.000 0.000
-3 2 0 4 0 7 49 -1 -1 0.000 1 0 0 10
-	 7350 6000 7500 6075 7200 6150 7500 6225 7200 6300 7500 6375
-	 7200 6450 7500 6525 7200 6600 7350 6675
-	 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
-	 -1.000 0.000
-4 0 0 50 -1 0 11 0.0000 2 120 945 7725 4500 Pthread stack\001
-4 0 0 50 -1 0 11 0.0000 2 150 1530 7725 5100 Pthread stack (stolen)\001
-4 0 0 50 -1 0 11 0.0000 2 120 540 7725 6375 Pthread\001
-4 0 0 50 -1 0 11 0.0000 2 150 1065 7725 7275 $\\CFA$ thread\001
-4 0 0 50 -1 0 11 0.0000 2 150 990 7725 5700 $\\CFA$ stack\001
--6
-1 2 0 1 0 7 50 -1 -1 0.000 1 3.1416 3150 5250 750 450 2400 4800 3900 5700
-2 1 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 2
-	 1200 3900 2475 5025
-2 1 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 2
-	 3600 3900 3450 4800
-2 1 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 2
-	 5550 3900 3825 5025
-2 1 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 2
-	 900 6225 2400 5400
-2 1 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 2
-	 2100 6225 2625 5550
-2 1 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 2
-	 2850 6225 3000 5700
-2 1 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 2
-	 3600 6225 3375 5700
-2 1 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 2
-	 4350 6300 3675 5625
-2 1 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 2
-	 5625 6225 3900 5400
-2 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5
-	 525 975 1275 975 1275 2625 525 2625 525 975
-2 2 0 1 0 7 50 -1 45 0.000 0 1 -1 0 0 5
-	 3225 975 3975 975 3975 2625 3225 2625 3225 975
-2 2 0 1 0 7 50 -1 45 0.000 0 1 -1 0 0 5
-	 5100 975 5850 975 5850 2625 5100 2625 5100 975
-2 2 0 1 0 7 50 -1 45 0.000 0 1 -1 0 0 5
-	 525 7425 1275 7425 1275 9075 525 9075 525 7425
-2 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5
-	 1575 7425 2325 7425 2325 9075 1575 9075 1575 7425
-2 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5
-	 2400 7425 3150 7425 3150 9075 2400 9075 2400 7425
-2 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5
-	 3225 7425 3975 7425 3975 9075 3225 9075 3225 7425
-2 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5
-	 4050 7425 4800 7425 4800 9075 4050 9075 4050 7425
-2 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5
-	 5250 7425 6000 7425 6000 9075 5250 9075 5250 7425
-2 1 1 8 0 7 50 -1 -1 4.000 0 0 -1 1 0 2
-	1 1 2.00 180.00 75.00
-	 2400 3900 2775 4800
-2 2 0 1 0 7 50 -1 18 0.000 0 1 -1 0 0 5
-	 2025 2625 2775 2625 2775 975 2025 975 2025 2625
-4 0 0 50 -1 0 18 0.0000 2 30 225 4500 3150 ...\001
-4 0 0 50 -1 0 18 0.0000 2 30 225 3750 4500 ...\001
-4 0 0 50 -1 0 11 0.0000 2 120 705 2775 5325 Scheduler\001
-4 0 0 50 -1 0 18 0.0000 2 30 225 4950 6600 ...\001
-4 0 0 50 -1 0 18 0.0000 2 30 225 4200 5850 ...\001
Index: doc/theses/thierry/notes/cor-thread-traits.c
===================================================================
--- doc/theses/thierry/notes/cor-thread-traits.c	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,90 +1,0 @@
-//-----------------------------------------------------------------------------
-// Coroutine trait
-// Anything that implements this trait can be resumed.
-// Anything that is resumed is a coroutine.
-trait is_coroutine(dtype T) {
-      void main(T* this);
-      coroutine_handle* get_handle(T* this);
-}
-
-//-----------------------------------------------------------------------------
-forall(dtype T | {coroutine_handle* T.c})
-coroutine_handle* get_handle(T* this) {
-	return this->c
-}
-
-//-----------------------------------------------------------------------------
-struct myCoroutine {
-	int bla;
-	coroutine_handle c;
-};
-
-void main(myCoroutine* this) {
-	sout | this->bla | endl;
-}
-
-void foo() {
-	//Run the coroutine
-	myCoroutine myc;
-	resume(myc);
-}
-
-//-----------------------------------------------------------------------------
-// Thread trait
-// Alternative 1
-trait is_thread(dtype T) { 
-      void main(T* this);
-      thread_handle* get_handle(T* this);
-	thread T;
-};
-
-//-----------------------------------------------------------------------------
-forall(dtype T | {thread_handle* T.t})
-thread_handle* get_handle(T* this) {
-	return this->t
-}
-
-//-----------------------------------------------------------------------------
-thread myThread {
-	int bla;
-	thread_handle c;
-};
-
-void main(myThread* this) {
-	sout | this->bla | endl;
-}
-
-void foo() {
-	//Run the thread
-	myThread myc;
-}
-
-//-----------------------------------------------------------------------------
-// Thread trait
-// Alternative 2
-trait is_thread(dtype T) {
-      void main(T* this);
-      thread_handle* get_handle(T* this);
-	
-};
-
-//-----------------------------------------------------------------------------
-forall(dtype T | {thread_handle* T.t})
-thread_handle* get_handle(T* this) {
-	return this->t
-}
-
-//-----------------------------------------------------------------------------
-struct myThread {
-	int bla;
-	thread_handle c;
-};
-
-void main(myThread* this) {
-	sout | this->bla | endl;
-}
-
-void foo() {
-	//Run the thread
-	thread(myThread) myc;
-}
Index: doc/theses/thierry/notes/lit-review.md
===================================================================
--- doc/theses/thierry/notes/lit-review.md	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,25 +1,0 @@
-lit review :
-
-Lister77 : nested monitor calls
-	- explains the problem
-	- no solution
-	- Lister : An implementation of monitors.
-	- Lister : Hierarchical monitors.
-
-Haddon77 : Nested monitor calls
-	- monitors should be release before acquiring a new one.
-
-Horst Wettstein : The problem of nested monitor calls revisited
-	- Solves nested monitor by allowing barging
-
-David L. Parnas : The non problem of nesied monitor calls
-	- not an actual problem in real life
-
-M. Joseph and VoR. Prasad : More on nested monitor call
-	- WTF... don't use monitors, use pure classes instead, whatever that is
-
-Joseph et al, 1978). 
-
-Toby bloom : Evaluating Synchronization Mechanisms
-	- Methods to evaluate concurrency
-
Index: doc/theses/thierry/notes/notes.md
===================================================================
--- doc/theses/thierry/notes/notes.md	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,14 +1,0 @@
-Internal scheduling notes.
-
-Internal scheduling requires a stack or queue to make sense.
-We also need a stack of "monitor contexts" to be able to restuore stuff.
-
-Multi scheduling try 1 
- - adding threads to many monitors and synching the monitors
- - Too hard
-
-Multi scheduling try 2
- - using a leader when in a group
- - it's hard but doable to manage who is the leader and keep the current context
- - basically __monitor_guard_t always saves an restore the leader and current context
- 
Index: doc/theses/thierry/style/cfa-format.tex
===================================================================
--- doc/theses/thierry/style/cfa-format.tex	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,279 +1,0 @@
-\usepackage[usenames,dvipsnames]{xcolor}
-\usepackage{listings}
-\usepackage{inconsolata}
-
-\definecolor{basicCol}{HTML}{000000}
-\definecolor{commentCol}{HTML}{000000}
-\definecolor{stringCol}{HTML}{000000}
-\definecolor{keywordCol}{HTML}{000000}
-\definecolor{identifierCol}{HTML}{000000}
-
-% from https://gist.github.com/nikolajquorning/92bbbeef32e1dd80105c9bf2daceb89a
-\lstdefinelanguage{sml} {
-  morekeywords= {
-    EQUAL, GREATER, LESS, NONE, SOME, abstraction, abstype, and, andalso, array, as, before, bool, case, char, datatype, do, else, end, eqtype, exception, exn, false, fn, fun, functor, handle, if, in, include, infix, infixr, int, let, list, local, nil, nonfix, not, o, of, op, open, option, orelse, overload, print, raise, real, rec, ref, sharing, sig, signature, string, struct, structure, substring, then, true, type, unit, val, vector, where, while, with, withtype, word
-  },
-  morestring=[b]",
-  morecomment=[s]{(*}{*)},
-}
-
-\lstdefinelanguage{D}{
-  % Keywords
-  morekeywords=[1]{
-    abstract, alias, align, auto, body, break, cast, catch, class, const,
-    continue, debug, delegate, delete, deprecated, do, else, enum, export,
-    false, final, finally, for, foreach, foreach_reverse, function, goto, if,
-    immutable, import, in, inout, interface, invariant, is, lazy, macro, mixin,
-    module, new, nothrow, null, out, override, package, pragma, private,
-    protected, public, pure, ref, return, shared, static, struct, super,
-    switch, synchronized, template, this, throw, true, try, typedef, typeid,
-    typeof, union, unittest, volatile, while, with
-  },
-  % Special identifiers, common functions
-  morekeywords=[2]{enforce},
-  % Ugly identifiers
-  morekeywords=[3]{
-    __DATE__, __EOF__, __FILE__, __LINE__, __TIMESTAMP__, __TIME__, __VENDOR__,
-    __VERSION__, __ctfe, __gshared, __monitor, __thread, __vptr, _argptr,
-    _arguments, _ctor, _dtor
-  },
-  % Basic types
-  morekeywords=[4]{
-     byte, ubyte, short, ushort, int, uint, long, ulong, cent, ucent, void,
-     bool, bit, float, double, real, ushort, int, uint, long, ulong, float,
-     char, wchar, dchar, string, wstring, dstring, ireal, ifloat, idouble,
-     creal, cfloat, cdouble, size_t, ptrdiff_t, sizediff_t, equals_t, hash_t
-  },
-  % Strings
-  morestring=[b]{"},
-  morestring=[b]{'},
-  morestring=[b]{`},
-  % Comments
-  comment=[l]{//},
-  morecomment=[s]{/*}{*/},
-  morecomment=[s][\color{blue}]{/**}{*/},
-  morecomment=[n]{/+}{+/},
-  morecomment=[n][\color{blue}]{/++}{+/},
-  % Options
-  sensitive=true
-}
-
-\lstdefinelanguage{rust}{
-  % Keywords
-  morekeywords=[1]{
-    abstract, alignof, as, become, box,
-    break, const, continue, crate, do,
-    else, enum, extern, false, final,
-    fn, for, if, impl, in,
-    let, loop, macro, match, mod,
-    move, mut, offsetof, override, priv,
-    proc, pub, pure, ref, return,
-    Self, self, sizeof, static, struct,
-    super, trait, true,  type, typeof,
-    unsafe, unsized, use, virtual, where,
-    while, yield
-  },
-  % Strings
-  morestring=[b]{"},
-  % Comments
-  comment=[l]{//},
-  morecomment=[s]{/*}{*/},
-  % Options
-  sensitive=true
-}
-
-\lstdefinelanguage{pseudo}{
-	morekeywords={string,uint,int,bool,float},%
-	sensitive=true,%
-	morecomment=[l]{//},%
-	morecomment=[s]{/*}{*/},%
-	morestring=[b]',%
-	morestring=[b]",%
-	morestring=[s]{`}{`},%
-}%
-
-\newcommand{\KWC}{K-W C\xspace}
-
-\lstdefinestyle{pseudoStyle}{
-  escapeinside={@@},
-  basicstyle=\linespread{0.9}\sf\footnotesize,		% reduce line spacing and use typewriter font
-  keywordstyle=\bfseries\color{blue},
-  keywordstyle=[2]\bfseries\color{Plum},
-  commentstyle=\itshape\color{OliveGreen},		    % green and italic comments
-  identifierstyle=\color{identifierCol},
-  stringstyle=\sf\color{Mahogany},			          % use sanserif font
-  mathescape=true,
-  columns=fixed,
-  aboveskip=4pt,                                  % spacing above/below code block
-  belowskip=3pt,
-  keepspaces=true,
-  tabsize=4,
-  % frame=lines,
-  literate=,
-  showlines=true,                                 % show blank lines at end of code
-  showspaces=false,
-  showstringspaces=false,
-  escapechar=\$,
-  xleftmargin=\parindentlnth,                     % indent code to paragraph indentation
-  moredelim=[is][\color{red}\bfseries]{**R**}{**R**},    % red highlighting
-  % moredelim=* detects keywords, comments, strings, and other delimiters and applies their formatting
-  % moredelim=** allows cumulative application
-}
-
-\lstdefinestyle{defaultStyle}{
-  escapeinside={@@},
-  basicstyle=\linespread{0.9}\tt\footnotesize,		% reduce line spacing and use typewriter font
-  keywordstyle=\bfseries\color{blue},
-  keywordstyle=[2]\bfseries\color{Plum},
-  commentstyle=\itshape\color{OliveGreen},		    % green and italic comments
-  identifierstyle=\color{identifierCol},
-  stringstyle=\sf\color{Mahogany},			          % use sanserif font
-  mathescape=true,
-  columns=fixed,
-  aboveskip=4pt,                                  % spacing above/below code block
-  belowskip=3pt,
-  keepspaces=true,
-  tabsize=4,
-  % frame=lines,
-  literate=,
-  showlines=true,                                 % show blank lines at end of code
-  showspaces=false,
-  showstringspaces=false,
-  escapechar=\$,
-  xleftmargin=\parindentlnth,                     % indent code to paragraph indentation
-  moredelim=[is][\color{red}\bfseries]{**R**}{**R**},    % red highlighting
-  % moredelim=* detects keywords, comments, strings, and other delimiters and applies their formatting
-  % moredelim=** allows cumulative application
-}
-
-\lstdefinestyle{cfaStyle}{
-  escapeinside={@@},
-  basicstyle=\linespread{0.9}\tt\footnotesize,		% reduce line spacing and use typewriter font
-  keywordstyle=\bfseries\color{blue},
-  keywordstyle=[2]\bfseries\color{Plum},
-  commentstyle=\sf\itshape\color{OliveGreen},		  % green and italic comments
-  identifierstyle=\color{identifierCol},
-  stringstyle=\sf\color{Mahogany},			          % use sanserif font
-  mathescape=true,
-  columns=fixed,
-  aboveskip=4pt,                                  % spacing above/below code block
-  belowskip=3pt,
-  keepspaces=true,
-  tabsize=4,
-  % frame=lines,
-  literate=,
-  showlines=true,                                 % show blank lines at end of code
-  showspaces=false,
-  showstringspaces=false,
-  escapechar=\$,
-  xleftmargin=\parindentlnth,                     % indent code to paragraph indentation
-  moredelim=[is][\color{red}\bfseries]{**R**}{**R**},    % red highlighting
-  morekeywords=[2]{accept, signal, signal_block, wait, waitfor},
-}
-
-\lstMakeShortInline[basewidth=0.5em,breaklines=true,basicstyle=\normalsize\ttfamily\color{basicCol}]@  % single-character for \lstinline
-
-\lstnewenvironment{ccode}[1][]{
-  \lstset{
-    language = C,
-    style=defaultStyle,
-    captionpos=b,
-    #1
-  }
-}{}
-
-\lstnewenvironment{cfacode}[1][]{
-  \lstset{
-    language = CFA,
-    style=cfaStyle,
-    captionpos=b,
-    #1
-  }
-}{}
-
-\lstnewenvironment{pseudo}[1][]{
-  \lstset{
-    language = pseudo,
-    style=pseudoStyle,
-    captionpos=b,
-    #1
-  }
-}{}
-
-\lstnewenvironment{cppcode}[1][]{
-  \lstset{
-    language = c++,
-    style=defaultStyle,
-    captionpos=b,
-    #1
-  }
-}{}
-
-\lstnewenvironment{ucppcode}[1][]{
-  \lstset{
-    language = c++,
-    style=defaultStyle,
-    captionpos=b,
-    #1
-  }
-}{}
-
-\lstnewenvironment{javacode}[1][]{
-  \lstset{
-    language = java,
-    style=defaultStyle,
-    captionpos=b,
-    #1
-  }
-}{}
-
-\lstnewenvironment{scalacode}[1][]{
-  \lstset{
-    language = scala,
-    style=defaultStyle,
-    captionpos=b,
-    #1
-  }
-}{}
-
-\lstnewenvironment{smlcode}[1][]{
-  \lstset{
-    language = sml,
-    style=defaultStyle,
-    captionpos=b,
-    #1
-  }
-}{}
-
-\lstnewenvironment{dcode}[1][]{
-  \lstset{
-    language = D,
-    style=defaultStyle,
-    captionpos=b,
-    #1
-  }
-}{}
-
-\lstnewenvironment{rustcode}[1][]{
-  \lstset{
-    language = rust,
-    style=defaultStyle,
-    captionpos=b,
-    #1
-  }
-}{}
-
-\lstnewenvironment{gocode}[1][]{
-  \lstset{
-    language = Golang,
-    style=defaultStyle,
-    captionpos=b,
-    #1
-  }
-}{}
-
-\newcommand{\zero}{\lstinline{zero_t}\xspace}
-\newcommand{\one}{\lstinline{one_t}\xspace}
-\newcommand{\ateq}{\lstinline{\@=}\xspace}
-\newcommand{\code}[1]{\lstinline[language=CFA,style=cfaStyle]{#1}}
-\newcommand{\pscode}[1]{\lstinline[language=pseudo,style=pseudoStyle]{#1}}
Index: doc/theses/thierry/style/style.tex
===================================================================
--- doc/theses/thierry/style/style.tex	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,12 +1,0 @@
-\input{common}                                          % bespoke macros used in the document
-\input{cfa-format}
-
-% \CFADefaultStyle
-
-% \lstset{
-% morekeywords=[2]{nomutex,mutex,thread,wait,wait_release,signal,signal_block,accept,monitor,suspend,resume,coroutine},
-% keywordstyle=[2]\color{blue},				% second set of keywords for concurency
-% basicstyle=\linespread{0.9}\tt\small,		% reduce line spacing and use typewriter font
-% stringstyle=\sf\color{Mahogany},			% use sanserif font
-% commentstyle=\itshape\color{OliveGreen},		% green and italic comments
-% }%
Index: doc/theses/thierry/text/basics.tex
===================================================================
--- doc/theses/thierry/text/basics.tex	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,497 +1,0 @@
-% ======================================================================
-% ======================================================================
-\chapter{Concurrency Basics}\label{basics}
-% ======================================================================
-% ======================================================================
-Before any detailed discussion of the concurrency and parallelism in \CFA, it is important to describe the basics of concurrency and how they are expressed in \CFA user code.
-
-\section{Basics of concurrency}
-At its core, concurrency is based on having multiple call-stacks and scheduling among threads of execution executing on these stacks. Concurrency without parallelism only requires having multiple call stacks (or contexts) for a single thread of execution.
-
-Execution with a single thread and multiple stacks where the thread is self-scheduling deterministically across the stacks is called coroutining. Execution with a single and multiple stacks but where the thread is scheduled by an oracle (non-deterministic from the thread's perspective) across the stacks is called concurrency.
-
-Therefore, a minimal concurrency system can be achieved by creating coroutines (see Section \ref{coroutine}), which instead of context-switching among each other, always ask an oracle where to context-switch next. While coroutines can execute on the caller's stack-frame, stack-full coroutines allow full generality and are sufficient as the basis for concurrency. The aforementioned oracle is a scheduler and the whole system now follows a cooperative threading-model (a.k.a., non-preemptive scheduling). The oracle/scheduler can either be a stack-less or stack-full entity and correspondingly require one or two context-switches to run a different coroutine. In any case, a subset of concurrency related challenges start to appear. For the complete set of concurrency challenges to occur, the only feature missing is preemption.
-
-A scheduler introduces order of execution uncertainty, while preemption introduces uncertainty about where context switches occur. Mutual exclusion and synchronization are ways of limiting non-determinism in a concurrent system. Now it is important to understand that uncertainty is desirable; uncertainty can be used by runtime systems to significantly increase performance and is often the basis of giving a user the illusion that tasks are running in parallel. Optimal performance in concurrent applications is often obtained by having as much non-determinism as correctness allows.
-
-\section{\protect\CFA's Thread Building Blocks}
-One of the important features that are missing in C is threading\footnote{While the C11 standard defines a ``threads.h'' header, it is minimal and defined as optional. As such, library support for threading is far from widespread. At the time of writing the thesis, neither \texttt{gcc} nor \texttt{clang} support ``threads.h'' in their respective standard libraries.}. On modern architectures, a lack of threading is unacceptable~\cite{Sutter05, Sutter05b}, and therefore modern programming languages must have the proper tools to allow users to write efficient concurrent programs to take advantage of parallelism. As an extension of C, \CFA needs to express these concepts in a way that is as natural as possible to programmers familiar with imperative languages. And being a system-level language means programmers expect to choose precisely which features they need and which cost they are willing to pay.
-
-\section{Coroutines: A Stepping Stone}\label{coroutine}
-While the main focus of this proposal is concurrency and parallelism, it is important to address coroutines, which are actually a significant building block of a concurrency system. \textbf{Coroutine}s are generalized routines which have predefined points where execution is suspended and can be resumed at a later time. Therefore, they need to deal with context switches and other context-management operations. This proposal includes coroutines both as an intermediate step for the implementation of threads, and a first-class feature of \CFA. Furthermore, many design challenges of threads are at least partially present in designing coroutines, which makes the design effort that much more relevant. The core \acrshort{api} of coroutines revolves around two features: independent call-stacks and \code{suspend}/\code{resume}.
-
-\begin{table}
-\begin{center}
-\begin{tabular}{c @{\hskip 0.025in}|@{\hskip 0.025in} c @{\hskip 0.025in}|@{\hskip 0.025in} c}
-\begin{ccode}[tabsize=2]
-//Using callbacks
-void fibonacci_func(
-	int n,
-	void (*callback)(int)
-) {
-	int first = 0;
-	int second = 1;
-	int next, i;
-	for(i = 0; i < n; i++)
-	{
-		if(i <= 1)
-			next = i;
-		else {
-			next = f1 + f2;
-			f1 = f2;
-			f2 = next;
-		}
-		callback(next);
-	}
-}
-
-int main() {
-	void print_fib(int n) {
-		printf("%d\n", n);
-	}
-
-	fibonacci_func(
-		10, print_fib
-	);
-
-
-
-}
-\end{ccode}&\begin{ccode}[tabsize=2]
-//Using output array
-void fibonacci_array(
-	int n,
-	int* array
-) {
-	int f1 = 0; int f2 = 1;
-	int next, i;
-	for(i = 0; i < n; i++)
-	{
-		if(i <= 1)
-			next = i;
-		else {
-			next = f1 + f2;
-			f1 = f2;
-			f2 = next;
-		}
-		array[i] = next;
-	}
-}
-
-
-int main() {
-	int a[10];
-
-	fibonacci_func(
-		10, a
-	);
-
-	for(int i=0;i<10;i++){
-		printf("%d\n", a[i]);
-	}
-
-}
-\end{ccode}&\begin{ccode}[tabsize=2]
-//Using external state
-typedef struct {
-	int f1, f2;
-} Iterator_t;
-
-int fibonacci_state(
-	Iterator_t* it
-) {
-	int f;
-	f = it->f1 + it->f2;
-	it->f2 = it->f1;
-	it->f1 = max(f,1);
-	return f;
-}
-
-
-
-
-
-
-
-int main() {
-	Iterator_t it={0,0};
-
-	for(int i=0;i<10;i++){
-		printf("%d\n",
-			fibonacci_state(
-				&it
-			);
-		);
-	}
-
-}
-\end{ccode}
-\end{tabular}
-\end{center}
-\caption{Different implementations of a Fibonacci sequence generator in C.}
-\label{lst:fibonacci-c}
-\end{table}
-
-A good example of a problem made easier with coroutines is generators, e.g., generating the Fibonacci sequence. This problem comes with the challenge of decoupling how a sequence is generated and how it is used. Listing \ref{lst:fibonacci-c} shows conventional approaches to writing generators in C. All three of these approach suffer from strong coupling. The left and centre approaches require that the generator have knowledge of how the sequence is used, while the rightmost approach requires holding internal state between calls on behalf of the generator and makes it much harder to handle corner cases like the Fibonacci seed.
-
-Listing \ref{lst:fibonacci-cfa} is an example of a solution to the Fibonacci problem using \CFA coroutines, where the coroutine stack holds sufficient state for the next generation. This solution has the advantage of having very strong decoupling between how the sequence is generated and how it is used. Indeed, this version is as easy to use as the \code{fibonacci_state} solution, while the implementation is very similar to the \code{fibonacci_func} example.
-
-\begin{figure}
-\begin{cfacode}[caption={Implementation of Fibonacci using coroutines},label={lst:fibonacci-cfa}]
-coroutine Fibonacci {
-	int fn; //used for communication
-};
-
-void ?{}(Fibonacci& this) { //constructor
-	this.fn = 0;
-}
-
-//main automatically called on first resume
-void main(Fibonacci& this) with (this) {
-	int fn1, fn2; 		//retained between resumes
-	fn  = 0;
-	fn1 = fn;
-	suspend(this); 		//return to last resume
-
-	fn  = 1;
-	fn2 = fn1;
-	fn1 = fn;
-	suspend(this); 		//return to last resume
-
-	for ( ;; ) {
-		fn  = fn1 + fn2;
-		fn2 = fn1;
-		fn1 = fn;
-		suspend(this); 	//return to last resume
-	}
-}
-
-int next(Fibonacci& this) {
-	resume(this); //transfer to last suspend
-	return this.fn;
-}
-
-void main() { //regular program main
-	Fibonacci f1, f2;
-	for ( int i = 1; i <= 10; i += 1 ) {
-		sout | next( f1 ) | next( f2 ) | endl;
-	}
-}
-\end{cfacode}
-\end{figure}
-
-Listing \ref{lst:fmt-line} shows the \code{Format} coroutine for restructuring text into groups of character blocks of fixed size. The example takes advantage of resuming coroutines in the constructor to simplify the code and highlights the idea that interesting control flow can occur in the constructor.
-
-\begin{figure}
-\begin{cfacode}[tabsize=3,caption={Formatting text into lines of 5 blocks of 4 characters.},label={lst:fmt-line}]
-//format characters into blocks of 4 and groups of 5 blocks per line
-coroutine Format {
-	char ch;									//used for communication
-	int g, b;								//global because used in destructor
-};
-
-void  ?{}(Format& fmt) {
-	resume( fmt );  						//prime (start) coroutine
-}
-
-void ^?{}(Format& fmt) with fmt {
-	if ( fmt.g != 0 || fmt.b != 0 )
-	sout | endl;
-}
-
-void main(Format& fmt) with fmt {
-	for ( ;; ) {							//for as many characters
-		for(g = 0; g < 5; g++) {		//groups of 5 blocks
-			for(b = 0; b < 4; fb++) {	//blocks of 4 characters
-				suspend();
-				sout | ch;					//print character
-			}
-			sout | "  ";					//print block separator
-		}
-		sout | endl;						//print group separator
-	}
-}
-
-void prt(Format & fmt, char ch) {
-	fmt.ch = ch;
-	resume(fmt);
-}
-
-int main() {
-	Format fmt;
-	char ch;
-	Eof: for ( ;; ) {						//read until end of file
-		sin | ch;							//read one character
-		if(eof(sin)) break Eof;			//eof ?
-		prt(fmt, ch);						//push character for formatting
-	}
-}
-\end{cfacode}
-\end{figure}
-
-\subsection{Construction}
-One important design challenge for implementing coroutines and threads (shown in section \ref{threads}) is that the runtime system needs to run code after the user-constructor runs to connect the fully constructed object into the system. In the case of coroutines, this challenge is simpler since there is no non-determinism from preemption or scheduling. However, the underlying challenge remains the same for coroutines and threads.
-
-The runtime system needs to create the coroutine's stack and, more importantly, prepare it for the first resumption. The timing of the creation is non-trivial since users expect both to have fully constructed objects once execution enters the coroutine main and to be able to resume the coroutine from the constructor. There are several solutions to this problem but the chosen option effectively forces the design of the coroutine.
-
-Furthermore, \CFA faces an extra challenge as polymorphic routines create invisible thunks when cast to non-polymorphic routines and these thunks have function scope. For example, the following code, while looking benign, can run into undefined behaviour because of thunks:
-
-\begin{cfacode}
-//async: Runs function asynchronously on another thread
-forall(otype T)
-extern void async(void (*func)(T*), T* obj);
-
-forall(otype T)
-void noop(T*) {}
-
-void bar() {
-	int a;
-	async(noop, &a); //start thread running noop with argument a
-}
-\end{cfacode}
-
-The generated C code\footnote{Code trimmed down for brevity} creates a local thunk to hold type information:
-
-\begin{ccode}
-extern void async(/* omitted */, void (*func)(void*), void* obj);
-
-void noop(/* omitted */, void* obj){}
-
-void bar(){
-	int a;
-	void _thunk0(int* _p0){
-		/* omitted */
-		noop(/* omitted */, _p0);
-	}
-	/* omitted */
-	async(/* omitted */, ((void (*)(void*))(&_thunk0)), (&a));
-}
-\end{ccode}
-The problem in this example is a storage management issue, the function pointer \code{_thunk0} is only valid until the end of the block, which limits the viable solutions because storing the function pointer for too long causes undefined behaviour; i.e., the stack-based thunk being destroyed before it can be used. This challenge is an extension of challenges that come with second-class routines. Indeed, GCC nested routines also have the limitation that nested routine cannot be passed outside of the declaration scope. The case of coroutines and threads is simply an extension of this problem to multiple call stacks.
-
-\subsection{Alternative: Composition}
-One solution to this challenge is to use composition/containment, where coroutine fields are added to manage the coroutine.
-
-\begin{cfacode}
-struct Fibonacci {
-	int fn; //used for communication
-	coroutine c; //composition
-};
-
-void FibMain(void*) {
-	//...
-}
-
-void ?{}(Fibonacci& this) {
-	this.fn = 0;
-	//Call constructor to initialize coroutine
-	(this.c){myMain};
-}
-\end{cfacode}
-The downside of this approach is that users need to correctly construct the coroutine handle before using it. Like any other objects, the user must carefully choose construction order to prevent usage of objects not yet constructed. However, in the case of coroutines, users must also pass to the coroutine information about the coroutine main, like in the previous example. This opens the door for user errors and requires extra runtime storage to pass at runtime information that can be known statically.
-
-\subsection{Alternative: Reserved keyword}
-The next alternative is to use language support to annotate coroutines as follows:
-
-\begin{cfacode}
-coroutine Fibonacci {
-	int fn; //used for communication
-};
-\end{cfacode}
-The \code{coroutine} keyword means the compiler can find and inject code where needed. The downside of this approach is that it makes coroutine a special case in the language. Users wanting to extend coroutines or build their own for various reasons can only do so in ways offered by the language. Furthermore, implementing coroutines without language supports also displays the power of the programming language used. While this is ultimately the option used for idiomatic \CFA code, coroutines and threads can still be constructed by users without using the language support. The reserved keywords are only present to improve ease of use for the common cases.
-
-\subsection{Alternative: Lambda Objects}
-
-For coroutines as for threads, many implementations are based on routine pointers or function objects~\cite{Butenhof97, ANSI14:C++, MS:VisualC++, BoostCoroutines15}. For example, Boost implements coroutines in terms of four functor object types:
-\begin{cfacode}
-asymmetric_coroutine<>::pull_type
-asymmetric_coroutine<>::push_type
-symmetric_coroutine<>::call_type
-symmetric_coroutine<>::yield_type
-\end{cfacode}
-Often, the canonical threading paradigm in languages is based on function pointers, \texttt{pthread} being one of the most well-known examples. The main problem of this approach is that the thread usage is limited to a generic handle that must otherwise be wrapped in a custom type. Since the custom type is simple to write in \CFA and solves several issues, added support for routine/lambda based coroutines adds very little.
-
-A variation of this would be to use a simple function pointer in the same way \texttt{pthread} does for threads:
-\begin{cfacode}
-void foo( coroutine_t cid, void* arg ) {
-	int* value = (int*)arg;
-	//Coroutine body
-}
-
-int main() {
-	int value = 0;
-	coroutine_t cid = coroutine_create( &foo, (void*)&value );
-	coroutine_resume( &cid );
-}
-\end{cfacode}
-This semantics is more common for thread interfaces but coroutines work equally well. As discussed in section \ref{threads}, this approach is superseded by static approaches in terms of expressivity.
-
-\subsection{Alternative: Trait-Based Coroutines}
-
-Finally, the underlying approach, which is the one closest to \CFA idioms, is to use trait-based lazy coroutines. This approach defines a coroutine as anything that satisfies the trait \code{is_coroutine} (as defined below) and is used as a coroutine.
-
-\begin{cfacode}
-trait is_coroutine(dtype T) {
-      void main(T& this);
-      coroutine_desc* get_coroutine(T& this);
-};
-
-forall( dtype T | is_coroutine(T) ) void suspend(T&);
-forall( dtype T | is_coroutine(T) ) void resume (T&);
-\end{cfacode}
-This ensures that an object is not a coroutine until \code{resume} is called on the object. Correspondingly, any object that is passed to \code{resume} is a coroutine since it must satisfy the \code{is_coroutine} trait to compile. The advantage of this approach is that users can easily create different types of coroutines, for example, changing the memory layout of a coroutine is trivial when implementing the \code{get_coroutine} routine. The \CFA keyword \code{coroutine} simply has the effect of implementing the getter and forward declarations required for users to implement the main routine.
-
-\begin{center}
-\begin{tabular}{c c c}
-\begin{cfacode}[tabsize=3]
-coroutine MyCoroutine {
-	int someValue;
-};
-\end{cfacode} & == & \begin{cfacode}[tabsize=3]
-struct MyCoroutine {
-	int someValue;
-	coroutine_desc __cor;
-};
-
-static inline
-coroutine_desc* get_coroutine(
-	struct MyCoroutine& this
-) {
-	return &this.__cor;
-}
-
-void main(struct MyCoroutine* this);
-\end{cfacode}
-\end{tabular}
-\end{center}
-
-The combination of these two approaches allows users new to coroutining and concurrency to have an easy and concise specification, while more advanced users have tighter control on memory layout and initialization.
-
-\section{Thread Interface}\label{threads}
-The basic building blocks of multithreading in \CFA are \glspl{cfathread}. Both user and kernel threads are supported, where user threads are the concurrency mechanism and kernel threads are the parallel mechanism. User threads offer a flexible and lightweight interface. A thread can be declared using a struct declaration \code{thread} as follows:
-
-\begin{cfacode}
-thread foo {};
-\end{cfacode}
-
-As for coroutines, the keyword is a thin wrapper around a \CFA trait:
-
-\begin{cfacode}
-trait is_thread(dtype T) {
-      void ^?{}(T & mutex this);
-      void main(T & this);
-      thread_desc* get_thread(T & this);
-};
-\end{cfacode}
-
-Obviously, for this thread implementation to be useful it must run some user code. Several other threading interfaces use a function-pointer representation as the interface of threads (for example \Csharp~\cite{Csharp} and Scala~\cite{Scala}). However, this proposal considers that statically tying a \code{main} routine to a thread supersedes this approach. Since the \code{main} routine is already a special routine in \CFA (where the program begins), it is a natural extension of the semantics to use overloading to declare mains for different threads (the normal main being the main of the initial thread). As such the \code{main} routine of a thread can be defined as
-\begin{cfacode}
-thread foo {};
-
-void main(foo & this) {
-	sout | "Hello World!" | endl;
-}
-\end{cfacode}
-
-In this example, threads of type \code{foo} start execution in the \code{void main(foo &)} routine, which prints \code{"Hello World!".} While this thesis encourages this approach to enforce strongly typed programming, users may prefer to use the routine-based thread semantics for the sake of simplicity. With the static semantics it is trivial to write a thread type that takes a function pointer as a parameter and executes it on its stack asynchronously.
-\begin{cfacode}
-typedef void (*voidFunc)(int);
-
-thread FuncRunner {
-	voidFunc func;
-	int arg;
-};
-
-void ?{}(FuncRunner & this, voidFunc inFunc, int arg) {
-	this.func = inFunc;
-	this.arg  = arg;
-}
-
-void main(FuncRunner & this) {
-	//thread starts here and runs the function
-	this.func( this.arg );
-}
-
-void hello(/*unused*/ int) {
-	sout | "Hello World!" | endl;
-}
-
-int main() {
-	FuncRunner f = {hello, 42};
-	return 0?
-}
-\end{cfacode}
-
-A consequence of the strongly typed approach to main is that memory layout of parameters and return values to/from a thread are now explicitly specified in the \acrshort{api}.
-
-Of course, for threads to be useful, it must be possible to start and stop threads and wait for them to complete execution. While using an \acrshort{api} such as \code{fork} and \code{join} is relatively common in the literature, such an interface is unnecessary. Indeed, the simplest approach is to use \acrshort{raii} principles and have threads \code{fork} after the constructor has completed and \code{join} before the destructor runs.
-\begin{cfacode}
-thread World;
-
-void main(World & this) {
-	sout | "World!" | endl;
-}
-
-void main() {
-	World w;
-	//Thread forks here
-
-	//Printing "Hello " and "World!" are run concurrently
-	sout | "Hello " | endl;
-
-	//Implicit join at end of scope
-}
-\end{cfacode}
-
-This semantic has several advantages over explicit semantics: a thread is always started and stopped exactly once, users cannot make any programming errors, and it naturally scales to multiple threads meaning basic synchronization is very simple.
-
-\begin{cfacode}
-thread MyThread {
-	//...
-};
-
-//main
-void main(MyThread& this) {
-	//...
-}
-
-void foo() {
-	MyThread thrds[10];
-	//Start 10 threads at the beginning of the scope
-
-	DoStuff();
-
-	//Wait for the 10 threads to finish
-}
-\end{cfacode}
-
-However, one of the drawbacks of this approach is that threads always form a tree where nodes must always outlive their children, i.e., they are always destroyed in the opposite order of construction because of C scoping rules. This restriction is relaxed by using dynamic allocation, so threads can outlive the scope in which they are created, much like dynamically allocating memory lets objects outlive the scope in which they are created.
-
-\begin{cfacode}
-thread MyThread {
-	//...
-};
-
-void main(MyThread& this) {
-	//...
-}
-
-void foo() {
-	MyThread* long_lived;
-	{
-		//Start a thread at the beginning of the scope
-		MyThread short_lived;
-
-		//create another thread that will outlive the thread in this scope
-		long_lived = new MyThread;
-
-		DoStuff();
-
-		//Wait for the thread short_lived to finish
-	}
-	DoMoreStuff();
-
-	//Now wait for the long_lived to finish
-	delete long_lived;
-}
-\end{cfacode}
Index: doc/theses/thierry/text/cforall.tex
===================================================================
--- doc/theses/thierry/text/cforall.tex	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,167 +1,0 @@
-% ======================================================================
-% ======================================================================
-\chapter{\CFA Overview}
-% ======================================================================
-% ======================================================================
-
-The following is a quick introduction to the \CFA language, specifically tailored to the features needed to support concurrency.
-
-\CFA is an extension of ISO-C and therefore supports all of the same paradigms as C. It is a non-object-oriented system-language, meaning most of the major abstractions have either no runtime overhead or can be opted out easily. Like C, the basics of \CFA revolve around structures and routines, which are thin abstractions over machine code. The vast majority of the code produced by the \CFA translator respects memory layouts and calling conventions laid out by C. Interestingly, while \CFA is not an object-oriented language, lacking the concept of a receiver (e.g., {\tt this}), it does have some notion of objects\footnote{C defines the term objects as : ``region of data storage in the execution environment, the contents of which can represent
-values''~\cite[3.15]{C11}}, most importantly construction and destruction of objects. Most of the following code examples can be found on the \CFA website~\cite{www-cfa}.
-
-% ======================================================================
-\section{References}
-
-Like \CC, \CFA introduces rebind-able references providing multiple dereferencing as an alternative to pointers. In regards to concurrency, the semantic difference between pointers and references are not particularly relevant, but since this document uses mostly references, here is a quick overview of the semantics:
-\begin{cfacode}
-int x, *p1 = &x, **p2 = &p1, ***p3 = &p2,
-	&r1 = x,    &&r2 = r1,   &&&r3 = r2;
-***p3 = 3;							//change x
-r3    = 3;							//change x, ***r3
-**p3  = ...;						//change p1
-*p3   = ...;						//change p2
-int y, z, & ar[3] = {x, y, z};		//initialize array of references
-typeof( ar[1]) p;					//is int, referenced object type
-typeof(&ar[1]) q;					//is int &, reference type
-sizeof( ar[1]) == sizeof(int);		//is true, referenced object size
-sizeof(&ar[1]) == sizeof(int *);	//is true, reference size
-\end{cfacode}
-The important take away from this code example is that a reference offers a handle to an object, much like a pointer, but which is automatically dereferenced for convenience.
-
-% ======================================================================
-\section{Overloading}
-
-Another important feature of \CFA is function overloading as in Java and \CC, where routines with the same name are selected based on the number and type of the arguments. As well, \CFA uses the return type as part of the selection criteria, as in Ada~\cite{Ada}. For routines with multiple parameters and returns, the selection is complex.
-\begin{cfacode}
-//selection based on type and number of parameters
-void f(void);			//(1)
-void f(char);			//(2)
-void f(int, double);	//(3)
-f();					//select (1)
-f('a');					//select (2)
-f(3, 5.2);				//select (3)
-
-//selection based on  type and number of returns
-char   f(int);			//(1)
-double f(int);			//(2)
-char   c = f(3);		//select (1)
-double d = f(4);		//select (2)
-\end{cfacode}
-This feature is particularly important for concurrency since the runtime system relies on creating different types to represent concurrency objects. Therefore, overloading is necessary to prevent the need for long prefixes and other naming conventions that prevent name clashes. As seen in chapter \ref{basics}, routine \code{main} is an example that benefits from overloading.
-
-% ======================================================================
-\section{Operators}
-Overloading also extends to operators. The syntax for denoting operator-overloading is to name a routine with the symbol of the operator and question marks where the arguments of the operation appear, e.g.:
-\begin{cfacode}
-int ++? (int op);              		//unary prefix increment
-int ?++ (int op);              		//unary postfix increment
-int ?+? (int op1, int op2);    		//binary plus
-int ?<=?(int op1, int op2);   		//binary less than
-int ?=? (int & op1, int op2);  		//binary assignment
-int ?+=?(int & op1, int op2); 		//binary plus-assignment
-
-struct S {int i, j;};
-S ?+?(S op1, S op2) {				//add two structures
-	return (S){op1.i + op2.i, op1.j + op2.j};
-}
-S s1 = {1, 2}, s2 = {2, 3}, s3;
-s3 = s1 + s2;						//compute sum: s3 == {2, 5}
-\end{cfacode}
-While concurrency does not use operator overloading directly, this feature is more important as an introduction for the syntax of constructors.
-
-% ======================================================================
-\section{Constructors/Destructors}
-Object lifetime is often a challenge in concurrency. \CFA uses the approach of giving concurrent meaning to object lifetime as a means of synchronization and/or mutual exclusion. Since \CFA relies heavily on the lifetime of objects, constructors and destructors is a core feature required for concurrency and parallelism. \CFA uses the following syntax for constructors and destructors:
-\begin{cfacode}
-struct S {
-	size_t size;
-	int * ia;
-};
-void ?{}(S & s, int asize) {	//constructor operator
-	s.size = asize;				//initialize fields
-	s.ia = calloc(size, sizeof(S));
-}
-void ^?{}(S & s) {				//destructor operator
-	free(ia);					//de-initialization fields
-}
-int main() {
-	S x = {10}, y = {100};		//implicit calls: ?{}(x, 10), ?{}(y, 100)
-	...							//use x and y
-	^x{};  ^y{};				//explicit calls to de-initialize
-	x{20};  y{200};				//explicit calls to reinitialize
-	...							//reuse x and y
-}								//implicit calls: ^?{}(y), ^?{}(x)
-\end{cfacode}
-The language guarantees that every object and all their fields are constructed. Like \CC, construction of an object is automatically done on allocation and destruction of the object is done on deallocation. Allocation and deallocation can occur on the stack or on the heap.
-\begin{cfacode}
-{
-	struct S s = {10};	//allocation, call constructor
-	...
-}						//deallocation, call destructor
-struct S * s = new();	//allocation, call constructor
-...
-delete(s);				//deallocation, call destructor
-\end{cfacode}
-Note that like \CC, \CFA introduces \code{new} and \code{delete}, which behave like \code{malloc} and \code{free} in addition to constructing and destructing objects, after calling \code{malloc} and before calling \code{free}, respectively.
-
-% ======================================================================
-\section{Parametric Polymorphism}
-\label{s:ParametricPolymorphism}
-Routines in \CFA can also be reused for multiple types. This capability is done using the \code{forall} clauses, which allow separately compiled routines to support generic usage over multiple types. For example, the following sum function works for any type that supports construction from 0 and addition:
-\begin{cfacode}
-//constraint type, 0 and +
-forall(otype T | { void ?{}(T *, zero_t); T ?+?(T, T); })
-T sum(T a[ ], size_t size) {
-	T total = 0;				//construct T from 0
-	for(size_t i = 0; i < size; i++)
-		total = total + a[i];	//select appropriate +
-	return total;
-}
-
-S sa[5];
-int i = sum(sa, 5);				//use S's 0 construction and +
-\end{cfacode}
-
-Since writing constraints on types can become cumbersome for more constrained functions, \CFA also has the concept of traits. Traits are named collection of constraints that can be used both instead and in addition to regular constraints:
-\begin{cfacode}
-trait summable( otype T ) {
-	void ?{}(T *, zero_t);		//constructor from 0 literal
-	T ?+?(T, T);				//assortment of additions
-	T ?+=?(T *, T);
-	T ++?(T *);
-	T ?++(T *);
-};
-forall( otype T | summable(T) )	//use trait
-T sum(T a[], size_t size);
-\end{cfacode}
-
-Note that the type use for assertions can be either an \code{otype} or a \code{dtype}. Types declared as \code{otype} refer to ``complete'' objects, i.e., objects with a size, a default constructor, a copy constructor, a destructor and an assignment operator. Using \code{dtype,} on the other hand, has none of these assumptions but is extremely restrictive, it only guarantees the object is addressable.
-
-% ======================================================================
-\section{with Clause/Statement}
-Since \CFA lacks the concept of a receiver, certain functions end up needing to repeat variable names often. To remove this inconvenience, \CFA provides the \code{with} statement, which opens an aggregate scope making its fields directly accessible (like Pascal).
-\begin{cfacode}
-struct S { int i, j; };
-int mem(S & this) with (this)		//with clause
-	i = 1;							//this->i
-	j = 2;							//this->j
-}
-int foo() {
-	struct S1 { ... } s1;
-	struct S2 { ... } s2;
-	with (s1) 						//with statement
-	{
-		//access fields of s1 without qualification
-		with (s2)					//nesting
-		{
-			//access fields of s1 and s2 without qualification
-		}
-	}
-	with (s1, s2) 					//scopes open in parallel
-	{
-		//access fields of s1 and s2 without qualification
-	}
-}
-\end{cfacode}
-
-For more information on \CFA see \cite{cforall-ug,rob-thesis,www-cfa}.
Index: doc/theses/thierry/text/concurrency.tex
===================================================================
--- doc/theses/thierry/text/concurrency.tex	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,998 +1,0 @@
-% ======================================================================
-% ======================================================================
-\chapter{Concurrency}
-% ======================================================================
-% ======================================================================
-Several tools can be used to solve concurrency challenges. Since many of these challenges appear with the use of mutable shared state, some languages and libraries simply disallow mutable shared state (Erlang~\cite{Erlang}, Haskell~\cite{Haskell}, Akka (Scala)~\cite{Akka}). In these paradigms, interaction among concurrent objects relies on message passing~\cite{Thoth,Harmony,V-Kernel} or other paradigms closely relate to networking concepts (channels~\cite{CSP,Go} for example). However, in languages that use routine calls as their core abstraction mechanism, these approaches force a clear distinction between concurrent and non-concurrent paradigms (i.e., message passing versus routine calls). This distinction in turn means that, in order to be effective, programmers need to learn two sets of design patterns. While this distinction can be hidden away in library code, effective use of the library still has to take both paradigms into account.
-
-Approaches based on shared memory are more closely related to non-concurrent paradigms since they often rely on basic constructs like routine calls and shared objects. At the lowest level, concurrent paradigms are implemented as atomic operations and locks. Many such mechanisms have been proposed, including semaphores~\cite{Dijkstra68b} and path expressions~\cite{Campbell74}. However, for productivity reasons it is desirable to have a higher-level construct be the core concurrency paradigm~\cite{HPP:Study}.
-
-An approach that is worth mentioning because it is gaining in popularity is transactional memory~\cite{Herlihy93}. While this approach is even pursued by system languages like \CC~\cite{Cpp-Transactions}, the performance and feature set is currently too restrictive to be the main concurrency paradigm for system languages, which is why it was rejected as the core paradigm for concurrency in \CFA.
-
-One of the most natural, elegant, and efficient mechanisms for synchronization and communication, especially for shared-memory systems, is the \emph{monitor}. Monitors were first proposed by Brinch Hansen~\cite{Hansen73} and later described and extended by C.A.R.~Hoare~\cite{Hoare74}. Many programming languages---e.g., Concurrent Pascal~\cite{ConcurrentPascal}, Mesa~\cite{Mesa}, Modula~\cite{Modula-2}, Turing~\cite{Turing:old}, Modula-3~\cite{Modula-3}, NeWS~\cite{NeWS}, Emerald~\cite{Emerald}, \uC~\cite{Buhr92a} and Java~\cite{Java}---provide monitors as explicit language constructs. In addition, operating-system kernels and device drivers have a monitor-like structure, although they often use lower-level primitives such as semaphores or locks to simulate monitors. For these reasons, this project proposes monitors as the core concurrency construct.
-
-\section{Basics}
-Non-determinism requires concurrent systems to offer support for mutual-exclusion and synchronization. Mutual-exclusion is the concept that only a fixed number of threads can access a critical section at any given time, where a critical section is a group of instructions on an associated portion of data that requires the restricted access. On the other hand, synchronization enforces relative ordering of execution and synchronization tools provide numerous mechanisms to establish timing relationships among threads.
-
-\subsection{Mutual-Exclusion}
-As mentioned above, mutual-exclusion is the guarantee that only a fix number of threads can enter a critical section at once. However, many solutions exist for mutual exclusion, which vary in terms of performance, flexibility and ease of use. Methods range from low-level locks, which are fast and flexible but require significant attention to be correct, to  higher-level concurrency techniques, which sacrifice some performance in order to improve ease of use. Ease of use comes by either guaranteeing some problems cannot occur (e.g., being deadlock free) or by offering a more explicit coupling between data and corresponding critical section. For example, the \CC \code{std::atomic<T>} offers an easy way to express mutual-exclusion on a restricted set of operations (e.g., reading/writing large types atomically). Another challenge with low-level locks is composability. Locks have restricted composability because it takes careful organizing for multiple locks to be used while preventing deadlocks. Easing composability is another feature higher-level mutual-exclusion mechanisms often offer.
-
-\subsection{Synchronization}
-As with mutual-exclusion, low-level synchronization primitives often offer good performance and good flexibility at the cost of ease of use. Again, higher-level mechanisms often simplify usage by adding either better coupling between synchronization and data (e.g., message passing) or offering a simpler solution to otherwise involved challenges. As mentioned above, synchronization can be expressed as guaranteeing that event \textit{X} always happens before \textit{Y}. Most of the time, synchronization happens within a critical section, where threads must acquire mutual-exclusion in a certain order. However, it may also be desirable to guarantee that event \textit{Z} does not occur between \textit{X} and \textit{Y}. Not satisfying this property is called \textbf{barging}. For example, where event \textit{X} tries to effect event \textit{Y} but another thread acquires the critical section and emits \textit{Z} before \textit{Y}. The classic example is the thread that finishes using a resource and unblocks a thread waiting to use the resource, but the unblocked thread must compete to acquire the resource. Preventing or detecting barging is an involved challenge with low-level locks, which can be made much easier by higher-level constructs. This challenge is often split into two different methods, barging avoidance and barging prevention. Algorithms that use flag variables to detect barging threads are said to be using barging avoidance, while algorithms that baton-pass locks~\cite{Andrews89} between threads instead of releasing the locks are said to be using barging prevention.
-
-% ======================================================================
-% ======================================================================
-\section{Monitors}
-% ======================================================================
-% ======================================================================
-A \textbf{monitor} is a set of routines that ensure mutual-exclusion when accessing shared state. More precisely, a monitor is a programming technique that associates mutual-exclusion to routine scopes, as opposed to mutex locks, where mutual-exclusion is defined by lock/release calls independently of any scoping of the calling routine. This strong association eases readability and maintainability, at the cost of flexibility. Note that both monitors and mutex locks, require an abstract handle to identify them. This concept is generally associated with object-oriented languages like Java~\cite{Java} or \uC~\cite{uC++book} but does not strictly require OO semantics. The only requirement is the ability to declare a handle to a shared object and a set of routines that act on it:
-\begin{cfacode}
-typedef /*some monitor type*/ monitor;
-int f(monitor & m);
-
-int main() {
-	monitor m;  //Handle m
-	f(m);       //Routine using handle
-}
-\end{cfacode}
-
-% ======================================================================
-% ======================================================================
-\subsection{Call Semantics} \label{call}
-% ======================================================================
-% ======================================================================
-The above monitor example displays some of the intrinsic characteristics. First, it is necessary to use pass-by-reference over pass-by-value for monitor routines. This semantics is important, because at their core, monitors are implicit mutual-exclusion objects (locks), and these objects cannot be copied. Therefore, monitors are non-copy-able objects (\code{dtype}).
-
-Another aspect to consider is when a monitor acquires its mutual exclusion. For example, a monitor may need to be passed through multiple helper routines that do not acquire the monitor mutual-exclusion on entry. Passthrough can occur for generic helper routines (\code{swap}, \code{sort}, etc.) or specific helper routines like the following to implement an atomic counter:
-
-\begin{cfacode}
-monitor counter_t { /*...see section $\ref{data}$...*/ };
-
-void ?{}(counter_t & nomutex this); //constructor
-size_t ++?(counter_t & mutex this); //increment
-
-//need for mutex is platform dependent
-void ?{}(size_t * this, counter_t & mutex cnt); //conversion
-\end{cfacode}
-This counter is used as follows:
-\begin{center}
-\begin{tabular}{c @{\hskip 0.35in} c @{\hskip 0.35in} c}
-\begin{cfacode}
-//shared counter
-counter_t cnt1, cnt2;
-
-//multiple threads access counter
-thread 1 : cnt1++; cnt2++;
-thread 2 : cnt1++; cnt2++;
-thread 3 : cnt1++; cnt2++;
-	...
-thread N : cnt1++; cnt2++;
-\end{cfacode}
-\end{tabular}
-\end{center}
-Notice how the counter is used without any explicit synchronization and yet supports thread-safe semantics for both reading and writing, which is similar in usage to the \CC template \code{std::atomic}.
-
-Here, the constructor (\code{?\{\}}) uses the \code{nomutex} keyword to signify that it does not acquire the monitor mutual-exclusion when constructing. This semantics is because an object not yet con\-structed should never be shared and therefore does not require mutual exclusion. Furthermore, it allows the implementation greater freedom when it initializes the monitor locking. The prefix increment operator uses \code{mutex} to protect the incrementing process from race conditions. Finally, there is a conversion operator from \code{counter_t} to \code{size_t}. This conversion may or may not require the \code{mutex} keyword depending on whether or not reading a \code{size_t} is an atomic operation.
-
-For maximum usability, monitors use \gls{multi-acq} semantics, which means a single thread can acquire the same monitor multiple times without deadlock. For example, listing \ref{fig:search} uses recursion and \gls{multi-acq} to print values inside a binary tree.
-\begin{figure}
-\begin{cfacode}[caption={Recursive printing algorithm using \gls{multi-acq}.},label={fig:search}]
-monitor printer { ... };
-struct tree {
-	tree * left, right;
-	char * value;
-};
-void print(printer & mutex p, char * v);
-
-void print(printer & mutex p, tree * t) {
-	print(p, t->value);
-	print(p, t->left );
-	print(p, t->right);
-}
-\end{cfacode}
-\end{figure}
-
-Having both \code{mutex} and \code{nomutex} keywords can be redundant, depending on the meaning of a routine having neither of these keywords. For example, it is reasonable that it should default to the safest option (\code{mutex}) when given a routine without qualifiers \code{void foo(counter_t & this)}, whereas assuming \code{nomutex} is unsafe and may cause subtle errors. On the other hand, \code{nomutex} is the ``normal'' parameter behaviour, it effectively states explicitly that ``this routine is not special''. Another alternative is making exactly one of these keywords mandatory, which provides the same semantics but without the ambiguity of supporting routines with neither keyword. Mandatory keywords would also have the added benefit of being self-documented but at the cost of extra typing. While there are several benefits to mandatory keywords, they do bring a few challenges. Mandatory keywords in \CFA would imply that the compiler must know without doubt whether or not a parameter is a monitor or not. Since \CFA relies heavily on traits as an abstraction mechanism, the distinction between a type that is a monitor and a type that looks like a monitor can become blurred. For this reason, \CFA only has the \code{mutex} keyword and uses no keyword to mean \code{nomutex}.
-
-The next semantic decision is to establish when \code{mutex} may be used as a type qualifier. Consider the following declarations:
-\begin{cfacode}
-int f1(monitor & mutex m);
-int f2(const monitor & mutex m);
-int f3(monitor ** mutex m);
-int f4(monitor * mutex m []);
-int f5(graph(monitor *) & mutex m);
-\end{cfacode}
-The problem is to identify which object(s) should be acquired. Furthermore, each object needs to be acquired only once. In the case of simple routines like \code{f1} and \code{f2} it is easy to identify an exhaustive list of objects to acquire on entry. Adding indirections (\code{f3}) still allows the compiler and programmer to identify which object is acquired. However, adding in arrays (\code{f4}) makes it much harder. Array lengths are not necessarily known in C, and even then, making sure objects are only acquired once becomes none-trivial. This problem can be extended to absurd limits like \code{f5}, which uses a graph of monitors. To make the issue tractable, this project imposes the requirement that a routine may only acquire one monitor per parameter and it must be the type of the parameter with at most one level of indirection (ignoring potential qualifiers). Also note that while routine \code{f3} can be supported, meaning that monitor \code{**m} is acquired, passing an array to this routine would be type-safe and yet result in undefined behaviour because only the first element of the array is acquired. However, this ambiguity is part of the C type-system with respects to arrays. For this reason, \code{mutex} is disallowed in the context where arrays may be passed:
-\begin{cfacode}
-int f1(monitor & mutex m);    //Okay : recommended case
-int f2(monitor * mutex m);    //Not Okay : Could be an array
-int f3(monitor mutex m []);  //Not Okay : Array of unknown length
-int f4(monitor ** mutex m);   //Not Okay : Could be an array
-int f5(monitor * mutex m []); //Not Okay : Array of unknown length
-\end{cfacode}
-Note that not all array functions are actually distinct in the type system. However, even if the code generation could tell the difference, the extra information is still not sufficient to extend meaningfully the monitor call semantic.
-
-Unlike object-oriented monitors, where calling a mutex member \emph{implicitly} acquires mutual-exclusion of the receiver object, \CFA uses an explicit mechanism to specify the object that acquires mutual-exclusion. A consequence of this approach is that it extends naturally to multi-monitor calls.
-\begin{cfacode}
-int f(MonitorA & mutex a, MonitorB & mutex b);
-
-MonitorA a;
-MonitorB b;
-f(a,b);
-\end{cfacode}
-While OO monitors could be extended with a mutex qualifier for multiple-monitor calls, no example of this feature could be found. The capability to acquire multiple locks before entering a critical section is called \emph{\gls{bulk-acq}}. In practice, writing multi-locking routines that do not lead to deadlocks is tricky. Having language support for such a feature is therefore a significant asset for \CFA. In the case presented above, \CFA guarantees that the order of acquisition is consistent across calls to different routines using the same monitors as arguments. This consistent ordering means acquiring multiple monitors is safe from deadlock when using \gls{bulk-acq}. However, users can still force the acquiring order. For example, notice which routines use \code{mutex}/\code{nomutex} and how this affects acquiring order:
-\begin{cfacode}
-void foo(A& mutex a, B& mutex b) { //acquire a & b
-	...
-}
-
-void bar(A& mutex a, B& /*nomutex*/ b) { //acquire a
-	... foo(a, b); ... //acquire b
-}
-
-void baz(A& /*nomutex*/ a, B& mutex b) { //acquire b
-	... foo(a, b); ... //acquire a
-}
-\end{cfacode}
-The \gls{multi-acq} monitor lock allows a monitor lock to be acquired by both \code{bar} or \code{baz} and acquired again in \code{foo}. In the calls to \code{bar} and \code{baz} the monitors are acquired in opposite order.
-
-However, such use leads to lock acquiring order problems. In the example above, the user uses implicit ordering in the case of function \code{foo} but explicit ordering in the case of \code{bar} and \code{baz}. This subtle difference means that calling these routines concurrently may lead to deadlock and is therefore undefined behaviour. As shown~\cite{Lister77}, solving this problem requires:
-\begin{enumerate}
-	\item Dynamically tracking the monitor-call order.
-	\item Implement rollback semantics.
-\end{enumerate}
-While the first requirement is already a significant constraint on the system, implementing a general rollback semantics in a C-like language is still prohibitively complex~\cite{Dice10}. In \CFA, users simply need to be careful when acquiring multiple monitors at the same time or only use \gls{bulk-acq} of all the monitors. While \CFA provides only a partial solution, most systems provide no solution and the \CFA partial solution handles many useful cases.
-
-For example, \gls{multi-acq} and \gls{bulk-acq} can be used together in interesting ways:
-\begin{cfacode}
-monitor bank { ... };
-
-void deposit( bank & mutex b, int deposit );
-
-void transfer( bank & mutex mybank, bank & mutex yourbank, int me2you) {
-	deposit( mybank, -me2you );
-	deposit( yourbank, me2you );
-}
-\end{cfacode}
-This example shows a trivial solution to the bank-account transfer problem~\cite{BankTransfer}. Without \gls{multi-acq} and \gls{bulk-acq}, the solution to this problem is much more involved and requires careful engineering.
-
-\subsection{\code{mutex} statement} \label{mutex-stmt}
-
-The call semantics discussed above have one software engineering issue: only a routine can acquire the mutual-exclusion of a set of monitor. \CFA offers the \code{mutex} statement to work around the need for unnecessary names, avoiding a major software engineering problem~\cite{2FTwoHardThings}. Table \ref{lst:mutex-stmt} shows an example of the \code{mutex} statement, which introduces a new scope in which the mutual-exclusion of a set of monitor is acquired. Beyond naming, the \code{mutex} statement has no semantic difference from a routine call with \code{mutex} parameters.
-
-\begin{table}
-\begin{center}
-\begin{tabular}{|c|c|}
-function call & \code{mutex} statement \\
-\hline
-\begin{cfacode}[tabsize=3]
-monitor M {};
-void foo( M & mutex m1, M & mutex m2 ) {
-	//critical section
-}
-
-void bar( M & m1, M & m2 ) {
-	foo( m1, m2 );
-}
-\end{cfacode}&\begin{cfacode}[tabsize=3]
-monitor M {};
-void bar( M & m1, M & m2 ) {
-	mutex(m1, m2) {
-		//critical section
-	}
-}
-
-
-\end{cfacode}
-\end{tabular}
-\end{center}
-\caption{Regular call semantics vs. \code{mutex} statement}
-\label{lst:mutex-stmt}
-\end{table}
-
-% ======================================================================
-% ======================================================================
-\subsection{Data semantics} \label{data}
-% ======================================================================
-% ======================================================================
-Once the call semantics are established, the next step is to establish data semantics. Indeed, until now a monitor is used simply as a generic handle but in most cases monitors contain shared data. This data should be intrinsic to the monitor declaration to prevent any accidental use of data without its appropriate protection. For example, here is a complete version of the counter shown in section \ref{call}:
-\begin{cfacode}
-monitor counter_t {
-	int value;
-};
-
-void ?{}(counter_t & this) {
-	this.cnt = 0;
-}
-
-int ?++(counter_t & mutex this) {
-	return ++this.value;
-}
-
-//need for mutex is platform dependent here
-void ?{}(int * this, counter_t & mutex cnt) {
-	*this = (int)cnt;
-}
-\end{cfacode}
-
-Like threads and coroutines, monitors are defined in terms of traits with some additional language support in the form of the \code{monitor} keyword. The monitor trait is:
-\begin{cfacode}
-trait is_monitor(dtype T) {
-	monitor_desc * get_monitor( T & );
-	void ^?{}( T & mutex );
-};
-\end{cfacode}
-Note that the destructor of a monitor must be a \code{mutex} routine to prevent deallocation while a thread is accessing the monitor. As with any object, calls to a monitor, using \code{mutex} or otherwise, is undefined behaviour after the destructor has run.
-
-% ======================================================================
-% ======================================================================
-\section{Internal Scheduling} \label{intsched}
-% ======================================================================
-% ======================================================================
-In addition to mutual exclusion, the monitors at the core of \CFA's concurrency can also be used to achieve synchronization. With monitors, this capability is generally achieved with internal or external scheduling as in~\cite{Hoare74}. With \textbf{scheduling} loosely defined as deciding which thread acquires the critical section next, \textbf{internal scheduling} means making the decision from inside the critical section (i.e., with access to the shared state), while \textbf{external scheduling} means making the decision when entering the critical section (i.e., without access to the shared state). Since internal scheduling within a single monitor is mostly a solved problem, this thesis concentrates on extending internal scheduling to multiple monitors. Indeed, like the \gls{bulk-acq} semantics, internal scheduling extends to multiple monitors in a way that is natural to the user but requires additional complexity on the implementation side.
-
-First, here is a simple example of internal scheduling:
-
-\begin{cfacode}
-monitor A {
-	condition e;
-}
-
-void foo(A& mutex a1, A& mutex a2) {
-	...
-	//Wait for cooperation from bar()
-	wait(a1.e);
-	...
-}
-
-void bar(A& mutex a1, A& mutex a2) {
-	//Provide cooperation for foo()
-	...
-	//Unblock foo
-	signal(a1.e);
-}
-\end{cfacode}
-There are two details to note here. First, \code{signal} is a delayed operation; it only unblocks the waiting thread when it reaches the end of the critical section. This semantics is needed to respect mutual-exclusion, i.e., the signaller and signalled thread cannot be in the monitor simultaneously. The alternative is to return immediately after the call to \code{signal}, which is significantly more restrictive. Second, in \CFA, while it is common to store a \code{condition} as a field of the monitor, a \code{condition} variable can be stored/created independently of a monitor. Here routine \code{foo} waits for the \code{signal} from \code{bar} before making further progress, ensuring a basic ordering.
-
-An important aspect of the implementation is that \CFA does not allow barging, which means that once function \code{bar} releases the monitor, \code{foo} is guaranteed to be the next thread to acquire the monitor (unless some other thread waited on the same condition). This guarantee offers the benefit of not having to loop around waits to recheck that a condition is met. The main reason \CFA offers this guarantee is that users can easily introduce barging if it becomes a necessity but adding barging prevention or barging avoidance is more involved without language support. Supporting barging prevention as well as extending internal scheduling to multiple monitors is the main source of complexity in the design and implementation of \CFA concurrency.
-
-% ======================================================================
-% ======================================================================
-\subsection{Internal Scheduling - Multi-Monitor}
-% ======================================================================
-% ======================================================================
-It is easy to understand the problem of multi-monitor scheduling using a series of pseudo-code examples. Note that for simplicity in the following snippets of pseudo-code, waiting and signalling is done using an implicit condition variable, like Java built-in monitors. Indeed, \code{wait} statements always use the implicit condition variable as parameters and explicitly name the monitors (A and B) associated with the condition. Note that in \CFA, condition variables are tied to a \emph{group} of monitors on first use (called branding), which means that using internal scheduling with distinct sets of monitors requires one condition variable per set of monitors. The example below shows the simple case of having two threads (one for each column) and a single monitor A.
-
-\begin{multicols}{2}
-thread 1
-\begin{pseudo}
-acquire A
-	wait A
-release A
-\end{pseudo}
-
-\columnbreak
-
-thread 2
-\begin{pseudo}
-acquire A
-	signal A
-release A
-\end{pseudo}
-\end{multicols}
-One thread acquires before waiting (atomically blocking and releasing A) and the other acquires before signalling. It is important to note here that both \code{wait} and \code{signal} must be called with the proper monitor(s) already acquired. This semantic is a logical requirement for barging prevention.
-
-A direct extension of the previous example is a \gls{bulk-acq} version:
-\begin{multicols}{2}
-\begin{pseudo}
-acquire A & B
-	wait A & B
-release A & B
-\end{pseudo}
-\columnbreak
-\begin{pseudo}
-acquire A & B
-	signal A & B
-release A & B
-\end{pseudo}
-\end{multicols}
-\noindent This version uses \gls{bulk-acq} (denoted using the {\sf\&} symbol), but the presence of multiple monitors does not add a particularly new meaning. Synchronization happens between the two threads in exactly the same way and order. The only difference is that mutual exclusion covers a group of monitors. On the implementation side, handling multiple monitors does add a degree of complexity as the next few examples demonstrate.
-
-While deadlock issues can occur when nesting monitors, these issues are only a symptom of the fact that locks, and by extension monitors, are not perfectly composable. For monitors, a well-known deadlock problem is the Nested Monitor Problem~\cite{Lister77}, which occurs when a \code{wait} is made by a thread that holds more than one monitor. For example, the following pseudo-code runs into the nested-monitor problem:
-\begin{multicols}{2}
-\begin{pseudo}
-acquire A
-	acquire B
-		wait B
-	release B
-release A
-\end{pseudo}
-
-\columnbreak
-
-\begin{pseudo}
-acquire A
-	acquire B
-		signal B
-	release B
-release A
-\end{pseudo}
-\end{multicols}
-\noindent The \code{wait} only releases monitor \code{B} so the signalling thread cannot acquire monitor \code{A} to get to the \code{signal}. Attempting release of all acquired monitors at the \code{wait} introduces a different set of problems, such as releasing monitor \code{C}, which has nothing to do with the \code{signal}.
-
-However, for monitors as for locks, it is possible to write a program using nesting without encountering any problems if nesting is done correctly. For example, the next pseudo-code snippet acquires monitors {\sf A} then {\sf B} before waiting, while only acquiring {\sf B} when signalling, effectively avoiding the Nested Monitor Problem~\cite{Lister77}.
-
-\begin{multicols}{2}
-\begin{pseudo}
-acquire A
-	acquire B
-		wait B
-	release B
-release A
-\end{pseudo}
-
-\columnbreak
-
-\begin{pseudo}
-
-acquire B
-	signal B
-release B
-
-\end{pseudo}
-\end{multicols}
-
-\noindent However, this simple refactoring may not be possible, forcing more complex restructuring.
-
-% ======================================================================
-% ======================================================================
-\subsection{Internal Scheduling - In Depth}
-% ======================================================================
-% ======================================================================
-
-A larger example is presented to show complex issues for \gls{bulk-acq} and its implementation options are analyzed. Listing \ref{lst:int-bulk-pseudo} shows an example where \gls{bulk-acq} adds a significant layer of complexity to the internal signalling semantics, and listing \ref{lst:int-bulk-cfa} shows the corresponding \CFA code to implement the pseudo-code in listing \ref{lst:int-bulk-pseudo}. For the purpose of translating the given pseudo-code into \CFA-code, any method of introducing a monitor is acceptable, e.g., \code{mutex} parameters, global variables, pointer parameters, or using locals with the \code{mutex} statement.
-
-\begin{figure}[!t]
-\begin{multicols}{2}
-Waiting thread
-\begin{pseudo}[numbers=left]
-acquire A
-	//Code Section 1
-	acquire A & B
-		//Code Section 2
-		wait A & B
-		//Code Section 3
-	release A & B
-	//Code Section 4
-release A
-\end{pseudo}
-\columnbreak
-Signalling thread
-\begin{pseudo}[numbers=left, firstnumber=10,escapechar=|]
-acquire A
-	//Code Section 5
-	acquire A & B
-		//Code Section 6
-		|\label{line:signal1}|signal A & B
-		//Code Section 7
-	|\label{line:releaseFirst}|release A & B
-	//Code Section 8
-|\label{line:lastRelease}|release A
-\end{pseudo}
-\end{multicols}
-\begin{cfacode}[caption={Internal scheduling with \gls{bulk-acq}},label={lst:int-bulk-pseudo}]
-\end{cfacode}
-\begin{center}
-\begin{cfacode}[xleftmargin=.4\textwidth]
-monitor A a;
-monitor B b;
-condition c;
-\end{cfacode}
-\end{center}
-\begin{multicols}{2}
-Waiting thread
-\begin{cfacode}
-mutex(a) {
-	//Code Section 1
-	mutex(a, b) {
-		//Code Section 2
-		wait(c);
-		//Code Section 3
-	}
-	//Code Section 4
-}
-\end{cfacode}
-\columnbreak
-Signalling thread
-\begin{cfacode}
-mutex(a) {
-	//Code Section 5
-	mutex(a, b) {
-		//Code Section 6
-		signal(c);
-		//Code Section 7
-	}
-	//Code Section 8
-}
-\end{cfacode}
-\end{multicols}
-\begin{cfacode}[caption={Equivalent \CFA code for listing \ref{lst:int-bulk-pseudo}},label={lst:int-bulk-cfa}]
-\end{cfacode}
-\begin{multicols}{2}
-Waiter
-\begin{pseudo}[numbers=left]
-acquire A
-	acquire A & B
-		wait A & B
-	release A & B
-release A
-\end{pseudo}
-
-\columnbreak
-
-Signaller
-\begin{pseudo}[numbers=left, firstnumber=6,escapechar=|]
-acquire A
-	acquire A & B
-		signal A & B
-	release A & B
-	|\label{line:secret}|//Secretly keep B here
-release A
-//Wakeup waiter and transfer A & B
-\end{pseudo}
-\end{multicols}
-\begin{cfacode}[caption={Listing \ref{lst:int-bulk-pseudo}, with delayed signalling comments},label={lst:int-secret}]
-\end{cfacode}
-\end{figure}
-
-The complexity begins at code sections 4 and 8 in listing \ref{lst:int-bulk-pseudo}, which are where the existing semantics of internal scheduling needs to be extended for multiple monitors. The root of the problem is that \gls{bulk-acq} is used in a context where one of the monitors is already acquired, which is why it is important to define the behaviour of the previous pseudo-code. When the signaller thread reaches the location where it should ``release \code{A & B}'' (listing \ref{lst:int-bulk-pseudo} line \ref{line:releaseFirst}), it must actually transfer ownership of monitor \code{B} to the waiting thread. This ownership transfer is required in order to prevent barging into \code{B} by another thread, since both the signalling and signalled threads still need monitor \code{A}. There are three options:
-
-\subsubsection{Delaying Signals}
-The obvious solution to the problem of multi-monitor scheduling is to keep ownership of all locks until the last lock is ready to be transferred. It can be argued that that moment is when the last lock is no longer needed, because this semantics fits most closely to the behaviour of single-monitor scheduling. This solution has the main benefit of transferring ownership of groups of monitors, which simplifies the semantics from multiple objects to a single group of objects, effectively making the existing single-monitor semantic viable by simply changing monitors to monitor groups. This solution releases the monitors once every monitor in a group can be released. However, since some monitors are never released (e.g., the monitor of a thread), this interpretation means a group might never be released. A more interesting interpretation is to transfer the group until all its monitors are released, which means the group is not passed further and a thread can retain its locks.
-
-However, listing \ref{lst:int-secret} shows this solution can become much more complicated depending on what is executed while secretly holding B at line \ref{line:secret}, while avoiding the need to transfer ownership of a subset of the condition monitors. Listing \ref{lst:dependency} shows a slightly different example where a third thread is waiting on monitor \code{A}, using a different condition variable. Because the third thread is signalled when secretly holding \code{B}, the goal  becomes unreachable. Depending on the order of signals (listing \ref{lst:dependency} line \ref{line:signal-ab} and \ref{line:signal-a}) two cases can happen:
-
-\paragraph{Case 1: thread $\alpha$ goes first.} In this case, the problem is that monitor \code{A} needs to be passed to thread $\beta$ when thread $\alpha$ is done with it.
-\paragraph{Case 2: thread $\beta$ goes first.} In this case, the problem is that monitor \code{B} needs to be retained and passed to thread $\alpha$ along with monitor \code{A}, which can be done directly or possibly using thread $\beta$ as an intermediate.
-\\
-
-Note that ordering is not determined by a race condition but by whether signalled threads are enqueued in FIFO or FILO order. However, regardless of the answer, users can move line \ref{line:signal-a} before line \ref{line:signal-ab} and get the reverse effect for listing \ref{lst:dependency}.
-
-In both cases, the threads need to be able to distinguish, on a per monitor basis, which ones need to be released and which ones need to be transferred, which means knowing when to release a group becomes complex and inefficient (see next section) and therefore effectively precludes this approach.
-
-\subsubsection{Dependency graphs}
-
-
-\begin{figure}
-\begin{multicols}{3}
-Thread $\alpha$
-\begin{pseudo}[numbers=left, firstnumber=1]
-acquire A
-	acquire A & B
-		wait A & B
-	release A & B
-release A
-\end{pseudo}
-\columnbreak
-Thread $\gamma$
-\begin{pseudo}[numbers=left, firstnumber=6, escapechar=|]
-acquire A
-	acquire A & B
-		|\label{line:signal-ab}|signal A & B
-	|\label{line:release-ab}|release A & B
-	|\label{line:signal-a}|signal A
-|\label{line:release-a}|release A
-\end{pseudo}
-\columnbreak
-Thread $\beta$
-\begin{pseudo}[numbers=left, firstnumber=12, escapechar=|]
-acquire A
-	wait A
-|\label{line:release-aa}|release A
-\end{pseudo}
-\end{multicols}
-\begin{cfacode}[caption={Pseudo-code for the three thread example.},label={lst:dependency}]
-\end{cfacode}
-\begin{center}
-\input{dependency}
-\end{center}
-\caption{Dependency graph of the statements in listing \ref{lst:dependency}}
-\label{fig:dependency}
-\end{figure}
-
-In listing \ref{lst:int-bulk-pseudo}, there is a solution that satisfies both barging prevention and mutual exclusion. If ownership of both monitors is transferred to the waiter when the signaller releases \code{A & B} and then the waiter transfers back ownership of \code{A} back to the signaller when it releases it, then the problem is solved (\code{B} is no longer in use at this point). Dynamically finding the correct order is therefore the second possible solution. The problem is effectively resolving a dependency graph of ownership requirements. Here even the simplest of code snippets requires two transfers and has a super-linear complexity. This complexity can be seen in listing \ref{lst:explosion}, which is just a direct extension to three monitors, requires at least three ownership transfer and has multiple solutions. Furthermore, the presence of multiple solutions for ownership transfer can cause deadlock problems if a specific solution is not consistently picked; In the same way that multiple lock acquiring order can cause deadlocks.
-\begin{figure}
-\begin{multicols}{2}
-\begin{pseudo}
-acquire A
-	acquire B
-		acquire C
-			wait A & B & C
-		release C
-	release B
-release A
-\end{pseudo}
-
-\columnbreak
-
-\begin{pseudo}
-acquire A
-	acquire B
-		acquire C
-			signal A & B & C
-		release C
-	release B
-release A
-\end{pseudo}
-\end{multicols}
-\begin{cfacode}[caption={Extension to three monitors of listing \ref{lst:int-bulk-pseudo}},label={lst:explosion}]
-\end{cfacode}
-\end{figure}
-
-Given the three threads example in listing \ref{lst:dependency}, figure \ref{fig:dependency} shows the corresponding dependency graph that results, where every node is a statement of one of the three threads, and the arrows the dependency of that statement (e.g., $\alpha1$ must happen before $\alpha2$). The extra challenge is that this dependency graph is effectively post-mortem, but the runtime system needs to be able to build and solve these graphs as the dependencies unfold. Resolving dependency graphs being a complex and expensive endeavour, this solution is not the preferred one.
-
-\subsubsection{Partial Signalling} \label{partial-sig}
-Finally, the solution that is chosen for \CFA is to use partial signalling. Again using listing \ref{lst:int-bulk-pseudo}, the partial signalling solution transfers ownership of monitor \code{B} at lines \ref{line:signal1} to the waiter but does not wake the waiting thread since it is still using monitor \code{A}. Only when it reaches line \ref{line:lastRelease} does it actually wake up the waiting thread. This solution has the benefit that complexity is encapsulated into only two actions: passing monitors to the next owner when they should be released and conditionally waking threads if all conditions are met. This solution has a much simpler implementation than a dependency graph solving algorithms, which is why it was chosen. Furthermore, after being fully implemented, this solution does not appear to have any significant downsides.
-
-Using partial signalling, listing \ref{lst:dependency} can be solved easily:
-\begin{itemize}
-	\item When thread $\gamma$ reaches line \ref{line:release-ab} it transfers monitor \code{B} to thread $\alpha$ and continues to hold monitor \code{A}.
-	\item When thread $\gamma$ reaches line \ref{line:release-a}  it transfers monitor \code{A} to thread $\beta$  and wakes it up.
-	\item When thread $\beta$  reaches line \ref{line:release-aa} it transfers monitor \code{A} to thread $\alpha$ and wakes it up.
-\end{itemize}
-
-% ======================================================================
-% ======================================================================
-\subsection{Signalling: Now or Later}
-% ======================================================================
-% ======================================================================
-\begin{table}
-\begin{tabular}{|c|c|}
-\code{signal} & \code{signal_block} \\
-\hline
-\begin{cfacode}[tabsize=3]
-monitor DatingService
-{
-	//compatibility codes
-	enum{ CCodes = 20 };
-
-	int girlPhoneNo
-	int boyPhoneNo;
-};
-
-condition girls[CCodes];
-condition boys [CCodes];
-condition exchange;
-
-int girl(int phoneNo, int ccode)
-{
-	//no compatible boy ?
-	if(empty(boys[ccode]))
-	{
-		//wait for boy
-		wait(girls[ccode]);
-
-		//make phone number available
-		girlPhoneNo = phoneNo;
-
-		//wake boy from chair
-		signal(exchange);
-	}
-	else
-	{
-		//make phone number available
-		girlPhoneNo = phoneNo;
-
-		//wake boy
-		signal(boys[ccode]);
-
-		//sit in chair
-		wait(exchange);
-	}
-	return boyPhoneNo;
-}
-
-int boy(int phoneNo, int ccode)
-{
-	//same as above
-	//with boy/girl interchanged
-}
-\end{cfacode}&\begin{cfacode}[tabsize=3]
-monitor DatingService
-{
-	//compatibility codes
-	enum{ CCodes = 20 };
-
-	int girlPhoneNo;
-	int boyPhoneNo;
-};
-
-condition girls[CCodes];
-condition boys [CCodes];
-//exchange is not needed
-
-int girl(int phoneNo, int ccode)
-{
-	//no compatible boy ?
-	if(empty(boys[ccode]))
-	{
-		//wait for boy
-		wait(girls[ccode]);
-
-		//make phone number available
-		girlPhoneNo = phoneNo;
-
-		//wake boy from chair
-		signal(exchange);
-	}
-	else
-	{
-		//make phone number available
-		girlPhoneNo = phoneNo;
-
-		//wake boy
-		signal_block(boys[ccode]);
-
-		//second handshake unnecessary
-
-	}
-	return boyPhoneNo;
-}
-
-int boy(int phoneNo, int ccode)
-{
-	//same as above
-	//with boy/girl interchanged
-}
-\end{cfacode}
-\end{tabular}
-\caption{Dating service example using \code{signal} and \code{signal_block}. }
-\label{tbl:datingservice}
-\end{table}
-An important note is that, until now, signalling a monitor was a delayed operation. The ownership of the monitor is transferred only when the monitor would have otherwise been released, not at the point of the \code{signal} statement. However, in some cases, it may be more convenient for users to immediately transfer ownership to the thread that is waiting for cooperation, which is achieved using the \code{signal_block} routine.
-
-The example in table \ref{tbl:datingservice} highlights the difference in behaviour. As mentioned, \code{signal} only transfers ownership once the current critical section exits; this behaviour requires additional synchronization when a two-way handshake is needed. To avoid this explicit synchronization, the \code{condition} type offers the \code{signal_block} routine, which handles the two-way handshake as shown in the example. This feature removes the need for a second condition variables and simplifies programming. Like every other monitor semantic, \code{signal_block} uses barging prevention, which means mutual-exclusion is baton-passed both on the front end and the back end of the call to \code{signal_block}, meaning no other thread can acquire the monitor either before or after the call.
-
-% ======================================================================
-% ======================================================================
-\section{External scheduling} \label{extsched}
-% ======================================================================
-% ======================================================================
-An alternative to internal scheduling is external scheduling (see Table~\ref{tbl:sched}).
-\begin{table}
-\begin{tabular}{|c|c|c|}
-Internal Scheduling & External Scheduling & Go\\
-\hline
-\begin{ucppcode}[tabsize=3]
-_Monitor Semaphore {
-	condition c;
-	bool inUse;
-public:
-	void P() {
-		if(inUse)
-			wait(c);
-		inUse = true;
-	}
-	void V() {
-		inUse = false;
-		signal(c);
-	}
-}
-\end{ucppcode}&\begin{ucppcode}[tabsize=3]
-_Monitor Semaphore {
-
-	bool inUse;
-public:
-	void P() {
-		if(inUse)
-			_Accept(V);
-		inUse = true;
-	}
-	void V() {
-		inUse = false;
-
-	}
-}
-\end{ucppcode}&\begin{gocode}[tabsize=3]
-type MySem struct {
-	inUse bool
-	c     chan bool
-}
-
-// acquire
-func (s MySem) P() {
-	if s.inUse {
-		select {
-		case <-s.c:
-		}
-	}
-	s.inUse = true
-}
-
-// release
-func (s MySem) V() {
-	s.inUse = false
-
-	//This actually deadlocks
-	//when single thread
-	s.c <- false
-}
-\end{gocode}
-\end{tabular}
-\caption{Different forms of scheduling.}
-\label{tbl:sched}
-\end{table}
-This method is more constrained and explicit, which helps users reduce the non-deterministic nature of concurrency. Indeed, as the following examples demonstrate, external scheduling allows users to wait for events from other threads without the concern of unrelated events occurring. External scheduling can generally be done either in terms of control flow (e.g., Ada with \code{accept}, \uC with \code{_Accept}) or in terms of data (e.g., Go with channels). Of course, both of these paradigms have their own strengths and weaknesses, but for this project, control-flow semantics was chosen to stay consistent with the rest of the languages semantics. Two challenges specific to \CFA arise when trying to add external scheduling with loose object definitions and multiple-monitor routines. The previous example shows a simple use \code{_Accept} versus \code{wait}/\code{signal} and its advantages. Note that while other languages often use \code{accept}/\code{select} as the core external scheduling keyword, \CFA uses \code{waitfor} to prevent name collisions with existing socket \acrshort{api}s.
-
-For the \code{P} member above using internal scheduling, the call to \code{wait} only guarantees that \code{V} is the last routine to access the monitor, allowing a third routine, say \code{isInUse()}, acquire mutual exclusion several times while routine \code{P} is waiting. On the other hand, external scheduling guarantees that while routine \code{P} is waiting, no other routine than \code{V} can acquire the monitor.
-
-% ======================================================================
-% ======================================================================
-\subsection{Loose Object Definitions}
-% ======================================================================
-% ======================================================================
-In \uC, a monitor class declaration includes an exhaustive list of monitor operations. Since \CFA is not object oriented, monitors become both more difficult to implement and less clear for a user:
-
-\begin{cfacode}
-monitor A {};
-
-void f(A & mutex a);
-void g(A & mutex a) {
-	waitfor(f); //Obvious which f() to wait for
-}
-
-void f(A & mutex a, int); //New different F added in scope
-void h(A & mutex a) {
-	waitfor(f); //Less obvious which f() to wait for
-}
-\end{cfacode}
-
-Furthermore, external scheduling is an example where implementation constraints become visible from the interface. Here is the pseudo-code for the entering phase of a monitor:
-\begin{center}
-\begin{tabular}{l}
-\begin{pseudo}
-	if monitor is free
-		enter
-	elif already own the monitor
-		continue
-	elif monitor accepts me
-		enter
-	else
-		block
-\end{pseudo}
-\end{tabular}
-\end{center}
-For the first two conditions, it is easy to implement a check that can evaluate the condition in a few instructions. However, a fast check for \pscode{monitor accepts me} is much harder to implement depending on the constraints put on the monitors. Indeed, monitors are often expressed as an entry queue and some acceptor queue as in Figure~\ref{fig:ClassicalMonitor}.
-
-\begin{figure}
-\centering
-\subfloat[Classical Monitor] {
-\label{fig:ClassicalMonitor}
-{\resizebox{0.45\textwidth}{!}{\input{monitor}}}
-}% subfloat
-\qquad
-\subfloat[\Gls{bulk-acq} Monitor] {
-\label{fig:BulkMonitor}
-{\resizebox{0.45\textwidth}{!}{\input{ext_monitor}}}
-}% subfloat
-\caption{External Scheduling Monitor}
-\end{figure}
-
-There are other alternatives to these pictures, but in the case of the left picture, implementing a fast accept check is relatively easy. Restricted to a fixed number of mutex members, N, the accept check reduces to updating a bitmask when the acceptor queue changes, a check that executes in a single instruction even with a fairly large number (e.g., 128) of mutex members. This approach requires a unique dense ordering of routines with an upper-bound and that ordering must be consistent across translation units. For OO languages these constraints are common, since objects only offer adding member routines consistently across translation units via inheritance. However, in \CFA users can extend objects with mutex routines that are only visible in certain translation unit. This means that establishing a program-wide dense-ordering among mutex routines can only be done in the program linking phase, and still could have issues when using dynamically shared objects.
-
-The alternative is to alter the implementation as in Figure~\ref{fig:BulkMonitor}.
-Here, the mutex routine called is associated with a thread on the entry queue while a list of acceptable routines is kept separate. Generating a mask dynamically means that the storage for the mask information can vary between calls to \code{waitfor}, allowing for more flexibility and extensions. Storing an array of accepted function pointers replaces the single instruction bitmask comparison with dereferencing a pointer followed by a linear search. Furthermore, supporting nested external scheduling (e.g., listing \ref{lst:nest-ext}) may now require additional searches for the \code{waitfor} statement to check if a routine is already queued.
-
-\begin{figure}
-\begin{cfacode}[caption={Example of nested external scheduling},label={lst:nest-ext}]
-monitor M {};
-void foo( M & mutex a ) {}
-void bar( M & mutex b ) {
-	//Nested in the waitfor(bar, c) call
-	waitfor(foo, b);
-}
-void baz( M & mutex c ) {
-	waitfor(bar, c);
-}
-
-\end{cfacode}
-\end{figure}
-
-Note that in the right picture, tasks need to always keep track of the monitors associated with mutex routines, and the routine mask needs to have both a function pointer and a set of monitors, as is discussed in the next section. These details are omitted from the picture for the sake of simplicity.
-
-At this point, a decision must be made between flexibility and performance. Many design decisions in \CFA achieve both flexibility and performance, for example polymorphic routines add significant flexibility but inlining them means the optimizer can easily remove any runtime cost. Here, however, the cost of flexibility cannot be trivially removed. In the end, the most flexible approach has been chosen since it allows users to write programs that would otherwise be  hard to write. This decision is based on the assumption that writing fast but inflexible locks is closer to a solved problem than writing locks that are as flexible as external scheduling in \CFA.
-
-% ======================================================================
-% ======================================================================
-\subsection{Multi-Monitor Scheduling}
-% ======================================================================
-% ======================================================================
-
-External scheduling, like internal scheduling, becomes significantly more complex when introducing multi-monitor syntax. Even in the simplest possible case, some new semantics needs to be established:
-\begin{cfacode}
-monitor M {};
-
-void f(M & mutex a);
-
-void g(M & mutex b, M & mutex c) {
-	waitfor(f); //two monitors M => unknown which to pass to f(M & mutex)
-}
-\end{cfacode}
-The obvious solution is to specify the correct monitor as follows:
-
-\begin{cfacode}
-monitor M {};
-
-void f(M & mutex a);
-
-void g(M & mutex a, M & mutex b) {
-	//wait for call to f with argument b
-	waitfor(f, b);
-}
-\end{cfacode}
-This syntax is unambiguous. Both locks are acquired and kept by \code{g}. When routine \code{f} is called, the lock for monitor \code{b} is temporarily transferred from \code{g} to \code{f} (while \code{g} still holds lock \code{a}). This behaviour can be extended to the multi-monitor \code{waitfor} statement as follows.
-
-\begin{cfacode}
-monitor M {};
-
-void f(M & mutex a, M & mutex b);
-
-void g(M & mutex a, M & mutex b) {
-	//wait for call to f with arguments a and b
-	waitfor(f, a, b);
-}
-\end{cfacode}
-
-Note that the set of monitors passed to the \code{waitfor} statement must be entirely contained in the set of monitors already acquired in the routine. \code{waitfor} used in any other context is undefined behaviour.
-
-An important behaviour to note is when a set of monitors only match partially:
-
-\begin{cfacode}
-mutex struct A {};
-
-mutex struct B {};
-
-void g(A & mutex a, B & mutex b) {
-	waitfor(f, a, b);
-}
-
-A a1, a2;
-B b;
-
-void foo() {
-	g(a1, b); //block on accept
-}
-
-void bar() {
-	f(a2, b); //fulfill cooperation
-}
-\end{cfacode}
-While the equivalent can happen when using internal scheduling, the fact that conditions are specific to a set of monitors means that users have to use two different condition variables. In both cases, partially matching monitor sets does not wakeup the waiting thread. It is also important to note that in the case of external scheduling the order of parameters is irrelevant; \code{waitfor(f,a,b)} and \code{waitfor(f,b,a)} are indistinguishable waiting condition.
-
-% ======================================================================
-% ======================================================================
-\subsection{\code{waitfor} Semantics}
-% ======================================================================
-% ======================================================================
-
-Syntactically, the \code{waitfor} statement takes a function identifier and a set of monitors. While the set of monitors can be any list of expressions, the function name is more restricted because the compiler validates at compile time the validity of the function type and the parameters used with the \code{waitfor} statement. It checks that the set of monitors passed in matches the requirements for a function call. Listing \ref{lst:waitfor} shows various usages of the waitfor statement and which are acceptable. The choice of the function type is made ignoring any non-\code{mutex} parameter. One limitation of the current implementation is that it does not handle overloading, but overloading is possible.
-\begin{figure}
-\begin{cfacode}[caption={Various correct and incorrect uses of the waitfor statement},label={lst:waitfor}]
-monitor A{};
-monitor B{};
-
-void f1( A & mutex );
-void f2( A & mutex, B & mutex );
-void f3( A & mutex, int );
-void f4( A & mutex, int );
-void f4( A & mutex, double );
-
-void foo( A & mutex a1, A & mutex a2, B & mutex b1, B & b2 ) {
-	A * ap = & a1;
-	void (*fp)( A & mutex ) = f1;
-
-	waitfor(f1, a1);     //Correct : 1 monitor case
-	waitfor(f2, a1, b1); //Correct : 2 monitor case
-	waitfor(f3, a1);     //Correct : non-mutex arguments are ignored
-	waitfor(f1, *ap);    //Correct : expression as argument
-
-	waitfor(f1, a1, b1); //Incorrect : Too many mutex arguments
-	waitfor(f2, a1);     //Incorrect : Too few mutex arguments
-	waitfor(f2, a1, a2); //Incorrect : Mutex arguments don't match
-	waitfor(f1, 1);      //Incorrect : 1 not a mutex argument
-	waitfor(f9, a1);     //Incorrect : f9 function does not exist
-	waitfor(*fp, a1 );   //Incorrect : fp not an identifier
-	waitfor(f4, a1);     //Incorrect : f4 ambiguous
-
-	waitfor(f2, a1, b2); //Undefined behaviour : b2 not mutex
-}
-\end{cfacode}
-\end{figure}
-
-Finally, for added flexibility, \CFA supports constructing a complex \code{waitfor} statement using the \code{or}, \code{timeout} and \code{else}. Indeed, multiple \code{waitfor} clauses can be chained together using \code{or}; this chain forms a single statement that uses baton pass to any function that fits one of the function+monitor set passed in. To enable users to tell which accepted function executed, \code{waitfor}s are followed by a statement (including the null statement \code{;}) or a compound statement, which is executed after the clause is triggered. A \code{waitfor} chain can also be followed by a \code{timeout}, to signify an upper bound on the wait, or an \code{else}, to signify that the call should be non-blocking, which checks for a matching function call already arrived and otherwise continues. Any and all of these clauses can be preceded by a \code{when} condition to dynamically toggle the accept clauses on or off based on some current state. Listing \ref{lst:waitfor2} demonstrates several complex masks and some incorrect ones.
-
-\begin{figure}
-\begin{cfacode}[caption={Various correct and incorrect uses of the or, else, and timeout clause around a waitfor statement},label={lst:waitfor2}]
-monitor A{};
-
-void f1( A & mutex );
-void f2( A & mutex );
-
-void foo( A & mutex a, bool b, int t ) {
-	//Correct : blocking case
-	waitfor(f1, a);
-
-	//Correct : block with statement
-	waitfor(f1, a) {
-		sout | "f1" | endl;
-	}
-
-	//Correct : block waiting for f1 or f2
-	waitfor(f1, a) {
-		sout | "f1" | endl;
-	} or waitfor(f2, a) {
-		sout | "f2" | endl;
-	}
-
-	//Correct : non-blocking case
-	waitfor(f1, a); or else;
-
-	//Correct : non-blocking case
-	waitfor(f1, a) {
-		sout | "blocked" | endl;
-	} or else {
-		sout | "didn't block" | endl;
-	}
-
-	//Correct : block at most 10 seconds
-	waitfor(f1, a) {
-		sout | "blocked" | endl;
-	} or timeout( 10`s) {
-		sout | "didn't block" | endl;
-	}
-
-	//Correct : block only if b == true
-	//if b == false, don't even make the call
-	when(b) waitfor(f1, a);
-
-	//Correct : block only if b == true
-	//if b == false, make non-blocking call
-	waitfor(f1, a); or when(!b) else;
-
-	//Correct : block only of t > 1
-	waitfor(f1, a); or when(t > 1) timeout(t); or else;
-
-	//Incorrect : timeout clause is dead code
-	waitfor(f1, a); or timeout(t); or else;
-
-	//Incorrect : order must be
-	//waitfor [or waitfor... [or timeout] [or else]]
-	timeout(t); or waitfor(f1, a); or else;
-}
-\end{cfacode}
-\end{figure}
-
-% ======================================================================
-% ======================================================================
-\subsection{Waiting For The Destructor}
-% ======================================================================
-% ======================================================================
-An interesting use for the \code{waitfor} statement is destructor semantics. Indeed, the \code{waitfor} statement can accept any \code{mutex} routine, which includes the destructor (see section \ref{data}). However, with the semantics discussed until now, waiting for the destructor does not make any sense, since using an object after its destructor is called is undefined behaviour. The simplest approach is to disallow \code{waitfor} on a destructor. However, a more expressive approach is to flip ordering of execution when waiting for the destructor, meaning that waiting for the destructor allows the destructor to run after the current \code{mutex} routine, similarly to how a condition is signalled.
-\begin{figure}
-\begin{cfacode}[caption={Example of an executor which executes action in series until the destructor is called.},label={lst:dtor-order}]
-monitor Executer {};
-struct  Action;
-
-void ^?{}   (Executer & mutex this);
-void execute(Executer & mutex this, const Action & );
-void run    (Executer & mutex this) {
-	while(true) {
-		   waitfor(execute, this);
-		or waitfor(^?{}   , this) {
-			break;
-		}
-	}
-}
-\end{cfacode}
-\end{figure}
-For example, listing \ref{lst:dtor-order} shows an example of an executor with an infinite loop, which waits for the destructor to break out of this loop. Switching the semantic meaning introduces an idiomatic way to terminate a task and/or wait for its termination via destruction.
Index: doc/theses/thierry/text/frontpgs.tex
===================================================================
--- doc/theses/thierry/text/frontpgs.tex	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,165 +1,0 @@
-% T I T L E   P A G E
-% -------------------
-% Last updated May 24, 2011, by Stephen Carr, IST-Client Services
-% The title page is counted as page `i' but we need to suppress the
-% page number.  We also don't want any headers or footers.
-\pagestyle{empty}
-\pagenumbering{roman}
-
-% The contents of the title page are specified in the "titlepage"
-% environment.
-\begin{titlepage}
-        \begin{center}
-        \vspace*{1.0cm}
-
-        \Huge
-        {\bf Concurrency in \CFA}
-
-        \vspace*{1.0cm}
-
-        \normalsize
-        by \\
-
-        \vspace*{1.0cm}
-
-        \Large
-        Thierry Delisle \\
-
-        \vspace*{3.0cm}
-
-        \normalsize
-        A thesis \\
-        presented to the University of Waterloo \\
-        in fulfillment of the \\
-        thesis requirement for the degree of \\
-        Master of Mathematics \\
-        in \\
-        Computer Science \\
-
-        \vspace*{2.0cm}
-
-        Waterloo, Ontario, Canada, 2018 \\
-
-        \vspace*{1.0cm}
-
-        \copyright\ Thierry Delisle 2018 \\
-        \end{center}
-\end{titlepage}
-
-% The rest of the front pages should contain no headers and be numbered using Roman numerals starting with `ii'
-\pagestyle{plain}
-\setcounter{page}{2}
-
-\cleardoublepage % Ends the current page and causes all figures and tables that have so far appeared in the input to be printed.
-% In a two-sided printing style, it also makes the next page a right-hand (odd-numbered) page, producing a blank page if necessary.
-
-
-
-% D E C L A R A T I O N   P A G E
-% -------------------------------
-  % The following is the sample Delaration Page as provided by the GSO
-  % December 13th, 2006.  It is designed for an electronic thesis.
-  \noindent
-%I hereby declare that I am the sole author of this thesis. This is a true copy of the thesis, including any required final revisions, as accepted by my examiners.
-
-  \bigskip
-
-  \noindent
-
-  I hereby declare that I am the sole author of this thesis. This is a true copy of the thesis, including any required final revisions, as accepted by my examiners.
-
-
-  I understand that my thesis may be made electronically available to the public.
-
-\cleardoublepage
-%\newpage
-
-% A B S T R A C T
-% ---------------
-
-\begin{center}\textbf{Abstract}\end{center}
-
-\CFA is a modern, non-object-oriented extension of the C programming language. This thesis serves as a definition and an implementation for the concurrency and parallelism \CFA offers. These features are created from scratch due to the lack of concurrency in ISO C. Lightweight threads are introduced into the language. In addition, monitors are introduced as a high-level tool for control-flow based synchronization and mutual-exclusion. The main contributions of this thesis are two-fold: it extends the existing semantics of monitors introduce by~\cite{Hoare74} to handle monitors in groups and also details the engineering effort needed to introduce these features as core language features. Indeed, these features are added with respect to expectations of C programmers, and integrate with the \CFA type-system and other language features.
-
-
-\cleardoublepage
-%\newpage
-
-% A C K N O W L E D G E M E N T S
-% -------------------------------
-
-\begin{center}\textbf{Acknowledgements}\end{center}
-
-I would like to thank my supervisor, Professor Peter Buhr, for his guidance through my degree as well as the editing of this document.
-
-I would like to thank Professors Martin Karsten and Gregor Richards, for reading my thesis and providing helpful feedback.
-
-Thanks to Aaron Moss, Rob Schluntz and Andrew Beach for their work on the \CFA project as well as all the discussions which have helped me concretize the ideas in this thesis.
-
-Finally, I acknowledge that this has been possible thanks to the financial help offered by the David R. Cheriton School of Computer Science and the corporate partnership with Huawei Ltd.
-
-\cleardoublepage
-%\newpage
-
-% % D E D I C A T I O N
-% % -------------------
-
-% \begin{center}\textbf{Dedication}\end{center}
-
-% % This is dedicated to the one I love.
-% TODO
-% \cleardoublepage
-% %\newpage
-
-% T A B L E   O F   C O N T E N T S
-% ---------------------------------
-\renewcommand\contentsname{Table of Contents}
-\tableofcontents
-\cleardoublepage
-\phantomsection
-%\newpage
-
-% L I S T   O F   T A B L E S
-% ---------------------------
-\addcontentsline{toc}{chapter}{List of Tables}
-\listoftables
-\cleardoublepage
-\phantomsection		% allows hyperref to link to the correct page
-%\newpage
-
-% L I S T   O F   F I G U R E S
-% -----------------------------
-\addcontentsline{toc}{chapter}{List of Figures}
-\listoffigures
-\cleardoublepage
-\phantomsection		% allows hyperref to link to the correct page
-%\newpage
-
-% L I S T   O F   L I S T I N G S
-% -----------------------------
-\addcontentsline{toc}{chapter}{List of Listings}
-\lstlistoflistings
-\cleardoublepage
-\phantomsection		% allows hyperref to link to the correct page
-%\newpage
-
-% L I S T   O F   S Y M B O L S
-% -----------------------------
-% To include a Nomenclature section
-% \addcontentsline{toc}{chapter}{\textbf{Nomenclature}}
-% \renewcommand{\nomname}{Nomenclature}
-% \printglossary
-% \cleardoublepage
-% \phantomsection % allows hyperref to link to the correct page
-% \newpage
-
-% L I S T   O F   T A B L E S
-% -----------------------------
-\addcontentsline{toc}{chapter}{List of Acronyms}
-\printglossary[type=\acronymtype,title={List of Acronyms}]
-\cleardoublepage
-\phantomsection		% allows hyperref to link to the correct page
-
-% Change page numbering back to Arabic numerals
-\pagenumbering{arabic}
-
Index: doc/theses/thierry/text/future.tex
===================================================================
--- doc/theses/thierry/text/future.tex	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,112 +1,0 @@
-
-\chapter{Conclusion}
-This thesis has achieved a minimal concurrency \acrshort{api} that is simple, efficient and usable as the basis for higher-level features. The approach presented is based on a lightweight thread-system for parallelism, which sits on top of clusters of processors. This M:N model is judged to be both more efficient and allow more flexibility for users. Furthermore, this document introduces monitors as the main concurrency tool for users. This thesis also offers a novel approach allowing multiple monitors to be accessed simultaneously without running into the Nested Monitor Problem~\cite{Lister77}. It also offers a full implementation of the concurrency runtime written entirely in \CFA, effectively the largest \CFA code base to date.
-
-
-% ======================================================================
-% ======================================================================
-\section{Future Work}
-% ======================================================================
-% ======================================================================
-
-\subsection{Performance} \label{futur:perf}
-This thesis presents a first implementation of the \CFA concurrency runtime. Therefore, there is still significant work to improve performance. Many of the data structures and algorithms may change in the future to more efficient versions. For example, the number of monitors in a single \gls{bulk-acq} is only bound by the stack size, this is probably unnecessarily generous. It may be possible that limiting the number helps increase performance. However, it is not obvious that the benefit would be significant.
-
-\subsection{Flexible Scheduling} \label{futur:sched}
-An important part of concurrency is scheduling. Different scheduling algorithms can affect performance (both in terms of average and variation). However, no single scheduler is optimal for all workloads and therefore there is value in being able to change the scheduler for given programs. One solution is to offer various tweaking options to users, allowing the scheduler to be adjusted to the requirements of the workload. However, in order to be truly flexible, it would be interesting to allow users to add arbitrary data and arbitrary scheduling algorithms. For example, a web server could attach Type-of-Service information to threads and have a ``ToS aware'' scheduling algorithm tailored to this specific web server. This path of flexible schedulers will be explored for \CFA.
-
-\subsection{Non-Blocking I/O} \label{futur:nbio}
-While most of the parallelism tools are aimed at data parallelism and control-flow parallelism, many modern workloads are not bound on computation but on IO operations, a common case being web servers and XaaS (anything as a service). These types of workloads often require significant engineering around amortizing costs of blocking IO operations. At its core, non-blocking I/O is an operating system level feature that allows queuing IO operations (e.g., network operations) and registering for notifications instead of waiting for requests to complete. In this context, the role of the language makes Non-Blocking IO easily available and with low overhead. The current trend is to use asynchronous programming using tools like callbacks and/or futures and promises, which can be seen in frameworks like Node.js~\cite{NodeJs} for JavaScript, Spring MVC~\cite{SpringMVC} for Java and Django~\cite{Django} for Python. However, while these are valid solutions, they lead to code that is harder to read and maintain because it is much less linear.
-
-\subsection{Other Concurrency Tools} \label{futur:tools}
-While monitors offer a flexible and powerful concurrent core for \CFA, other concurrency tools are also necessary for a complete multi-paradigm concurrency package. Examples of such tools can include simple locks and condition variables, futures and promises~\cite{promises}, executors and actors. These additional features are useful when monitors offer a level of abstraction that is inadequate for certain tasks.
-
-\subsection{Implicit Threading} \label{futur:implcit}
-Simpler applications can benefit greatly from having implicit parallelism. That is, parallelism that does not rely on the user to write concurrency. This type of parallelism can be achieved both at the language level and at the library level. The canonical example of implicit parallelism is parallel for loops, which are the simplest example of a divide and conquer algorithms~\cite{uC++book}. Table \ref{lst:parfor} shows three different code examples that accomplish point-wise sums of large arrays. Note that none of these examples explicitly declare any concurrency or parallelism objects.
-
-\begin{table}
-\begin{center}
-\begin{tabular}[t]{|c|c|c|}
-Sequential & Library Parallel & Language Parallel \\
-\begin{cfacode}[tabsize=3]
-void big_sum(
-	int* a, int* b,
-	int* o,
-	size_t len)
-{
-	for(
-		int i = 0;
-		i < len;
-		++i )
-	{
-		o[i]=a[i]+b[i];
-	}
-}
-
-
-
-
-
-int* a[10000];
-int* b[10000];
-int* c[10000];
-//... fill in a & b
-big_sum(a,b,c,10000);
-\end{cfacode} &\begin{cfacode}[tabsize=3]
-void big_sum(
-	int* a, int* b,
-	int* o,
-	size_t len)
-{
-	range ar(a, a+len);
-	range br(b, b+len);
-	range or(o, o+len);
-	parfor( ai, bi, oi,
-	[](	int* ai,
-		int* bi,
-		int* oi)
-	{
-		oi=ai+bi;
-	});
-}
-
-
-int* a[10000];
-int* b[10000];
-int* c[10000];
-//... fill in a & b
-big_sum(a,b,c,10000);
-\end{cfacode}&\begin{cfacode}[tabsize=3]
-void big_sum(
-	int* a, int* b,
-	int* o,
-	size_t len)
-{
-	parfor (ai,bi,oi)
-	    in (a, b, o )
-	{
-		oi = ai + bi;
-	}
-}
-
-
-
-
-
-
-
-int* a[10000];
-int* b[10000];
-int* c[10000];
-//... fill in a & b
-big_sum(a,b,c,10000);
-\end{cfacode}
-\end{tabular}
-\end{center}
-\caption{For loop to sum numbers: Sequential, using library parallelism and language parallelism.}
-\label{lst:parfor}
-\end{table}
-
-Implicit parallelism is a restrictive solution and therefore has its limitations. However, it is a quick and simple approach to parallelism, which may very well be sufficient for smaller applications and reduces the amount of boilerplate needed to start benefiting from parallelism in modern CPUs.
-
-
Index: doc/theses/thierry/text/internals.tex
===================================================================
--- doc/theses/thierry/text/internals.tex	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,326 +1,0 @@
-
-\chapter{Behind the Scenes}
-There are several challenges specific to \CFA when implementing concurrency. These challenges are a direct result of \gls{bulk-acq} and loose object definitions. These two constraints are the root cause of most design decisions in the implementation. Furthermore, to avoid contention from dynamically allocating memory in a concurrent environment, the internal-scheduling design is (almost) entirely free of mallocs. This approach avoids the chicken and egg problem~\cite{Chicken} of having a memory allocator that relies on the threading system and a threading system that relies on the runtime. This extra goal means that memory management is a constant concern in the design of the system.
-
-The main memory concern for concurrency is queues. All blocking operations are made by parking threads onto queues and all queues are designed with intrusive nodes, where each node has pre-allocated link fields for chaining, to avoid the need for memory allocation. Since several concurrency operations can use an unbound amount of memory (depending on \gls{bulk-acq}), statically defining information in the intrusive fields of threads is insufficient.The only way to use a variable amount of memory without requiring memory allocation is to pre-allocate large buffers of memory eagerly and store the information in these buffers. Conveniently, the call stack fits that description and is easy to use, which is why it is used heavily in the implementation of internal scheduling, particularly variable-length arrays. Since stack allocation is based on scopes, the first step of the implementation is to identify the scopes that are available to store the information, and which of these can have a variable-length array. The threads and the condition both have a fixed amount of memory, while \code{mutex} routines and blocking calls allow for an unbound amount, within the stack size.
-
-Note that since the major contributions of this thesis are extending monitor semantics to \gls{bulk-acq} and loose object definitions, any challenges that are not resulting of these characteristics of \CFA are considered as solved problems and therefore not discussed.
-
-% ======================================================================
-% ======================================================================
-\section{Mutex Routines}
-% ======================================================================
-% ======================================================================
-
-The first step towards the monitor implementation is simple \code{mutex} routines. In the single monitor case, mutual-exclusion is done using the entry/exit procedure in listing \ref{lst:entry1}. The entry/exit procedures do not have to be extended to support multiple monitors. Indeed it is sufficient to enter/leave monitors one-by-one as long as the order is correct to prevent deadlock~\cite{Havender68}. In \CFA, ordering of monitor acquisition relies on memory ordering. This approach is sufficient because all objects are guaranteed to have distinct non-overlapping memory layouts and mutual-exclusion for a monitor is only defined for its lifetime, meaning that destroying a monitor while it is acquired is undefined behaviour. When a mutex call is made, the concerned monitors are aggregated into a variable-length pointer array and sorted based on pointer values. This array persists for the entire duration of the mutual-exclusion and its ordering reused extensively.
-\begin{figure}
-\begin{multicols}{2}
-Entry
-\begin{pseudo}
-if monitor is free
-	enter
-elif already own the monitor
-	continue
-else
-	block
-increment recursions
-\end{pseudo}
-\columnbreak
-Exit
-\begin{pseudo}
-decrement recursion
-if recursion == 0
-	if entry queue not empty
-		wake-up thread
-\end{pseudo}
-\end{multicols}
-\begin{pseudo}[caption={Initial entry and exit routine for monitors},label={lst:entry1}]
-\end{pseudo}
-\end{figure}
-
-\subsection{Details: Interaction with polymorphism}
-Depending on the choice of semantics for when monitor locks are acquired, interaction between monitors and \CFA's concept of polymorphism can be more complex to support. However, it is shown that entry-point locking solves most of the issues.
-
-First of all, interaction between \code{otype} polymorphism (see Section~\ref{s:ParametricPolymorphism}) and monitors is impossible since monitors do not support copying. Therefore, the main question is how to support \code{dtype} polymorphism. It is important to present the difference between the two acquiring options: \glspl{callsite-locking} and entry-point locking, i.e., acquiring the monitors before making a mutex routine-call or as the first operation of the mutex routine-call. For example:
-\begin{table}[H]
-\begin{center}
-\begin{tabular}{|c|c|c|}
-Mutex & \gls{callsite-locking} & \gls{entry-point-locking} \\
-call & pseudo-code & pseudo-code \\
-\hline
-\begin{cfacode}[tabsize=3]
-void foo(monitor& mutex a){
-
-	//Do Work
-	//...
-
-}
-
-void main() {
-	monitor a;
-
-	foo(a);
-
-}
-\end{cfacode} & \begin{pseudo}[tabsize=3]
-foo(& a) {
-
-	//Do Work
-	//...
-
-}
-
-main() {
-	monitor a;
-	acquire(a);
-	foo(a);
-	release(a);
-}
-\end{pseudo} & \begin{pseudo}[tabsize=3]
-foo(& a) {
-	acquire(a);
-	//Do Work
-	//...
-	release(a);
-}
-
-main() {
-	monitor a;
-
-	foo(a);
-
-}
-\end{pseudo}
-\end{tabular}
-\end{center}
-\caption{Call-site vs entry-point locking for mutex calls}
-\label{tbl:locking-site}
-\end{table}
-
-Note the \code{mutex} keyword relies on the type system, which means that in cases where a generic monitor-routine is desired, writing the mutex routine is possible with the proper trait, e.g.:
-\begin{cfacode}
-//Incorrect: T may not be monitor
-forall(dtype T)
-void foo(T * mutex t);
-
-//Correct: this function only works on monitors (any monitor)
-forall(dtype T | is_monitor(T))
-void bar(T * mutex t));
-\end{cfacode}
-
-Both entry point and \gls{callsite-locking} are feasible implementations. The current \CFA implementation uses entry-point locking because it requires less work when using \gls{raii}, effectively transferring the burden of implementation to object construction/destruction. It is harder to use \gls{raii} for call-site locking, as it does not necessarily have an existing scope that matches exactly the scope of the mutual exclusion, i.e., the function body. For example, the monitor call can appear in the middle of an expression. Furthermore, entry-point locking requires less code generation since any useful routine is called multiple times but there is only one entry point for many call sites.
-
-% ======================================================================
-% ======================================================================
-\section{Threading} \label{impl:thread}
-% ======================================================================
-% ======================================================================
-
-Figure \ref{fig:system1} shows a high-level picture if the \CFA runtime system in regards to concurrency. Each component of the picture is explained in detail in the flowing sections.
-
-\begin{figure}
-\begin{center}
-{\resizebox{\textwidth}{!}{\input{system.pstex_t}}}
-\end{center}
-\caption{Overview of the entire system}
-\label{fig:system1}
-\end{figure}
-
-\subsection{Processors}
-Parallelism in \CFA is built around using processors to specify how much parallelism is desired. \CFA processors are object wrappers around kernel threads, specifically \texttt{pthread}s in the current implementation of \CFA. Indeed, any parallelism must go through operating-system libraries. However, \glspl{uthread} are still the main source of concurrency, processors are simply the underlying source of parallelism. Indeed, processor \glspl{kthread} simply fetch a \gls{uthread} from the scheduler and run it; they are effectively executers for user-threads. The main benefit of this approach is that it offers a well-defined boundary between kernel code and user code, for example, kernel thread quiescing, scheduling and interrupt handling. Processors internally use coroutines to take advantage of the existing context-switching semantics.
-
-\subsection{Stack Management}
-One of the challenges of this system is to reduce the footprint as much as possible. Specifically, all \texttt{pthread}s created also have a stack created with them, which should be used as much as possible. Normally, coroutines also create their own stack to run on, however, in the case of the coroutines used for processors, these coroutines run directly on the \gls{kthread} stack, effectively stealing the processor stack. The exception to this rule is the Main Processor, i.e., the initial \gls{kthread} that is given to any program. In order to respect C user expectations, the stack of the initial kernel thread, the main stack of the program, is used by the main user thread rather than the main processor, which can grow very large.
-
-\subsection{Context Switching}
-As mentioned in section \ref{coroutine}, coroutines are a stepping stone for implementing threading, because they share the same mechanism for context-switching between different stacks. To improve performance and simplicity, context-switching is implemented using the following assumption: all context-switches happen inside a specific function call. This assumption means that the context-switch only has to copy the callee-saved registers onto the stack and then switch the stack registers with the ones of the target coroutine/thread. Note that the instruction pointer can be left untouched since the context-switch is always inside the same function. Threads, however, do not context-switch between each other directly. They context-switch to the scheduler. This method is called a 2-step context-switch and has the advantage of having a clear distinction between user code and the kernel where scheduling and other system operations happen. Obviously, this doubles the context-switch cost because threads must context-switch to an intermediate stack. The alternative 1-step context-switch uses the stack of the ``from'' thread to schedule and then context-switches directly to the ``to'' thread. However, the performance of the 2-step context-switch is still superior to a \code{pthread_yield} (see section \ref{results}). Additionally, for users in need for optimal performance, it is important to note that having a 2-step context-switch as the default does not prevent \CFA from offering a 1-step context-switch (akin to the Microsoft \code{SwitchToFiber}~\cite{switchToWindows} routine). This option is not currently present in \CFA, but the changes required to add it are strictly additive.
-
-\subsection{Preemption} \label{preemption}
-Finally, an important aspect for any complete threading system is preemption. As mentioned in chapter \ref{basics}, preemption introduces an extra degree of uncertainty, which enables users to have multiple threads interleave transparently, rather than having to cooperate among threads for proper scheduling and CPU distribution. Indeed, preemption is desirable because it adds a degree of isolation among threads. In a fully cooperative system, any thread that runs a long loop can starve other threads, while in a preemptive system, starvation can still occur but it does not rely on every thread having to yield or block on a regular basis, which reduces significantly a programmer burden. Obviously, preemption is not optimal for every workload. However any preemptive system can become a cooperative system by making the time slices extremely large. Therefore, \CFA uses a preemptive threading system.
-
-Preemption in \CFA\footnote{Note that the implementation of preemption is strongly tied with the underlying threading system. For this reason, only the Linux implementation is cover, \CFA does not run on Windows at the time of writting} is based on kernel timers, which are used to run a discrete-event simulation. Every processor keeps track of the current time and registers an expiration time with the preemption system. When the preemption system receives a change in preemption, it inserts the time in a sorted order and sets a kernel timer for the closest one, effectively stepping through preemption events on each signal sent by the timer. These timers use the Linux signal {\tt SIGALRM}, which is delivered to the process rather than the kernel-thread. This results in an implementation problem, because when delivering signals to a process, the kernel can deliver the signal to any kernel thread for which the signal is not blocked, i.e.:
-\begin{quote}
-A process-directed signal may be delivered to any one of the threads that does not currently have the signal blocked. If more than one of the threads has the signal unblocked, then the kernel chooses an arbitrary thread to which to deliver the signal.
-SIGNAL(7) - Linux Programmer's Manual
-\end{quote}
-For the sake of simplicity, and in order to prevent the case of having two threads receiving alarms simultaneously, \CFA programs block the {\tt SIGALRM} signal on every kernel thread except one.
-
-Now because of how involuntary context-switches are handled, the kernel thread handling {\tt SIGALRM} cannot also be a processor thread. Hence, involuntary context-switching is done by sending signal {\tt SIGUSR1} to the corresponding proces\-sor and having the thread yield from inside the signal handler. This approach effectively context-switches away from the signal handler back to the kernel and the signal handler frame is eventually unwound when the thread is scheduled again. As a result, a signal handler can start on one kernel thread and terminate on a second kernel thread (but the same user thread). It is important to note that signal handlers save and restore signal masks because user-thread migration can cause a signal mask to migrate from one kernel thread to another. This behaviour is only a problem if all kernel threads, among which a user thread can migrate, differ in terms of signal masks\footnote{Sadly, official POSIX documentation is silent on what distinguishes ``async-signal-safe'' functions from other functions.}. However, since the kernel thread handling preemption requires a different signal mask, executing user threads on the kernel-alarm thread can cause deadlocks. For this reason, the alarm thread is in a tight loop around a system call to \code{sigwaitinfo}, requiring very little CPU time for preemption. One final detail about the alarm thread is how to wake it when additional communication is required (e.g., on thread termination). This unblocking is also done using {\tt SIGALRM}, but sent through the \code{pthread_sigqueue}. Indeed, \code{sigwait} can differentiate signals sent from \code{pthread_sigqueue} from signals sent from alarms or the kernel.
-
-\subsection{Scheduler}
-Finally, an aspect that was not mentioned yet is the scheduling algorithm. Currently, the \CFA scheduler uses a single ready queue for all processors, which is the simplest approach to scheduling. Further discussion on scheduling is present in section \ref{futur:sched}.
-
-% ======================================================================
-% ======================================================================
-\section{Internal Scheduling} \label{impl:intsched}
-% ======================================================================
-% ======================================================================
-The following figure is the traditional illustration of a monitor (repeated from page~\pageref{fig:ClassicalMonitor} for convenience):
-
-\begin{figure}[H]
-\begin{center}
-{\resizebox{0.4\textwidth}{!}{\input{monitor}}}
-\end{center}
-\caption{Traditional illustration of a monitor}
-\end{figure}
-
-This picture has several components, the two most important being the entry queue and the AS-stack. The entry queue is an (almost) FIFO list where threads waiting to enter are parked, while the acceptor/signaller (AS) stack is a FILO list used for threads that have been signalled or otherwise marked as running next.
-
-For \CFA, this picture does not have support for blocking multiple monitors on a single condition. To support \gls{bulk-acq} two changes to this picture are required. First, it is no longer helpful to attach the condition to \emph{a single} monitor. Secondly, the thread waiting on the condition has to be separated across multiple monitors, seen in figure \ref{fig:monitor_cfa}.
-
-\begin{figure}[H]
-\begin{center}
-{\resizebox{0.8\textwidth}{!}{\input{int_monitor}}}
-\end{center}
-\caption{Illustration of \CFA Monitor}
-\label{fig:monitor_cfa}
-\end{figure}
-
-This picture and the proper entry and leave algorithms (see listing \ref{lst:entry2}) is the fundamental implementation of internal scheduling. Note that when a thread is moved from the condition to the AS-stack, it is conceptually split into N pieces, where N is the number of monitors specified in the parameter list. The thread is woken up when all the pieces have popped from the AS-stacks and made active. In this picture, the threads are split into halves but this is only because there are two monitors. For a specific signalling operation every monitor needs a piece of thread on its AS-stack.
-
-\begin{figure}[b]
-\begin{multicols}{2}
-Entry
-\begin{pseudo}
-if monitor is free
-	enter
-elif already own the monitor
-	continue
-else
-	block
-increment recursion
-
-\end{pseudo}
-\columnbreak
-Exit
-\begin{pseudo}
-decrement recursion
-if recursion == 0
-	if signal_stack not empty
-		set_owner to thread
-		if all monitors ready
-			wake-up thread
-
-	if entry queue not empty
-		wake-up thread
-\end{pseudo}
-\end{multicols}
-\begin{pseudo}[caption={Entry and exit routine for monitors with internal scheduling},label={lst:entry2}]
-\end{pseudo}
-\end{figure}
-
-The solution discussed in \ref{intsched} can be seen in the exit routine of listing \ref{lst:entry2}. Basically, the solution boils down to having a separate data structure for the condition queue and the AS-stack, and unconditionally transferring ownership of the monitors but only unblocking the thread when the last monitor has transferred ownership. This solution is deadlock safe as well as preventing any potential barging. The data structures used for the AS-stack are reused extensively for external scheduling, but in the case of internal scheduling, the data is allocated using variable-length arrays on the call stack of the \code{wait} and \code{signal_block} routines.
-
-\begin{figure}[H]
-\begin{center}
-{\resizebox{0.8\textwidth}{!}{\input{monitor_structs.pstex_t}}}
-\end{center}
-\caption{Data structures involved in internal/external scheduling}
-\label{fig:structs}
-\end{figure}
-
-Figure \ref{fig:structs} shows a high-level representation of these data structures. The main idea behind them is that, a thread cannot contain an arbitrary number of intrusive ``next'' pointers for linking onto monitors. The \code{condition node} is the data structure that is queued onto a condition variable and, when signalled, the condition queue is popped and each \code{condition criterion} is moved to the AS-stack. Once all the criteria have been popped from their respective AS-stacks, the thread is woken up, which is what is shown in listing \ref{lst:entry2}.
-
-% ======================================================================
-% ======================================================================
-\section{External Scheduling}
-% ======================================================================
-% ======================================================================
-Similarly to internal scheduling, external scheduling for multiple monitors relies on the idea that waiting-thread queues are no longer specific to a single monitor, as mentioned in section \ref{extsched}. For internal scheduling, these queues are part of condition variables, which are still unique for a given scheduling operation (i.e., no signal statement uses multiple conditions). However, in the case of external scheduling, there is no equivalent object which is associated with \code{waitfor} statements. This absence means the queues holding the waiting threads must be stored inside at least one of the monitors that is acquired. These monitors being the only objects that have sufficient lifetime and are available on both sides of the \code{waitfor} statement. This requires an algorithm to choose which monitor holds the relevant queue. It is also important that said algorithm be independent of the order in which users list parameters. The proposed algorithm is to fall back on monitor lock ordering (sorting by address) and specify that the monitor that is acquired first is the one with the relevant waiting queue. This assumes that the lock acquiring order is static for the lifetime of all concerned objects but that is a reasonable constraint.
-
-This algorithm choice has two consequences:
-\begin{itemize}
-	\item The queue of the monitor with the lowest address is no longer a true FIFO queue because threads can be moved to the front of the queue. These queues need to contain a set of monitors for each of the waiting threads. Therefore, another thread whose set contains the same lowest address monitor but different lower priority monitors may arrive first but enter the critical section after a thread with the correct pairing.
-	\item The queue of the lowest priority monitor is both required and potentially unused. Indeed, since it is not known at compile time which monitor is the monitor which has the lowest address, every monitor needs to have the correct queues even though it is possible that some queues go unused for the entire duration of the program, for example if a monitor is only used in a specific pair.
-\end{itemize}
-Therefore, the following modifications need to be made to support external scheduling:
-\begin{itemize}
-	\item The threads waiting on the entry queue need to keep track of which routine they are trying to enter, and using which set of monitors. The \code{mutex} routine already has all the required information on its stack, so the thread only needs to keep a pointer to that information.
-	\item The monitors need to keep a mask of acceptable routines. This mask contains for each acceptable routine, a routine pointer and an array of monitors to go with it. It also needs storage to keep track of which routine was accepted. Since this information is not specific to any monitor, the monitors actually contain a pointer to an integer on the stack of the waiting thread. Note that if a thread has acquired two monitors but executes a \code{waitfor} with only one monitor as a parameter, setting the mask of acceptable routines to both monitors will not cause any problems since the extra monitor will not change ownership regardless. This becomes relevant when \code{when} clauses affect the number of monitors passed to a \code{waitfor} statement.
-	\item The entry/exit routines need to be updated as shown in listing \ref{lst:entry3}.
-\end{itemize}
-
-\subsection{External Scheduling - Destructors}
-Finally, to support the ordering inversion of destructors, the code generation needs to be modified to use a special entry routine. This routine is needed because of the storage requirements of the call order inversion. Indeed, when waiting for the destructors, storage is needed for the waiting context and the lifetime of said storage needs to outlive the waiting operation it is needed for. For regular \code{waitfor} statements, the call stack of the routine itself matches this requirement but it is no longer the case when waiting for the destructor since it is pushed on to the AS-stack for later. The \code{waitfor} semantics can then be adjusted correspondingly, as seen in listing \ref{lst:entry-dtor}
-
-\begin{figure}
-\begin{multicols}{2}
-Entry
-\begin{pseudo}
-if monitor is free
-	enter
-elif already own the monitor
-	continue
-elif matches waitfor mask
-	push criteria to AS-stack
-	continue
-else
-	block
-increment recursion
-\end{pseudo}
-\columnbreak
-Exit
-\begin{pseudo}
-decrement recursion
-if recursion == 0
-	if signal_stack not empty
-		set_owner to thread
-		if all monitors ready
-			wake-up thread
-		endif
-	endif
-
-	if entry queue not empty
-		wake-up thread
-	endif
-\end{pseudo}
-\end{multicols}
-\begin{pseudo}[caption={Entry and exit routine for monitors with internal scheduling and external scheduling},label={lst:entry3}]
-\end{pseudo}
-\end{figure}
-
-\begin{figure}
-\begin{multicols}{2}
-Destructor Entry
-\begin{pseudo}
-if monitor is free
-	enter
-elif already own the monitor
-	increment recursion
-	return
-create wait context
-if matches waitfor mask
-	reset mask
-	push self to AS-stack
-	baton pass
-else
-	wait
-increment recursion
-\end{pseudo}
-\columnbreak
-Waitfor
-\begin{pseudo}
-if matching thread is already there
-	if found destructor
-		push destructor to AS-stack
-		unlock all monitors
-	else
-		push self to AS-stack
-		baton pass
-	endif
-	return
-endif
-if non-blocking
-	Unlock all monitors
-	Return
-endif
-
-push self to AS-stack
-set waitfor mask
-block
-return
-\end{pseudo}
-\end{multicols}
-\begin{pseudo}[caption={Pseudo code for the \code{waitfor} routine and the \code{mutex} entry routine for destructors},label={lst:entry-dtor}]
-\end{pseudo}
-\end{figure}
Index: doc/theses/thierry/text/intro.tex
===================================================================
--- doc/theses/thierry/text/intro.tex	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,8 +1,0 @@
-% ======================================================================
-\chapter{Introduction}
-% ======================================================================
-This thesis provides a minimal concurrency \acrshort{api} that is simple, efficient and can be reused to build higher-level features. The simplest possible concurrency system is a thread and a lock but this low-level approach is hard to master. An easier approach for users is to support higher-level constructs as the basis of concurrency. Indeed, for highly productive concurrent programming, high-level approaches are much more popular~\cite{HPP:Study}. Examples are task based, message passing and implicit threading. The high-level approach and its minimal \acrshort{api} are tested in a dialect of C, called \CFA. Furthermore, the proposed \acrshort{api} doubles as an early definition of the \CFA language and library. This thesis also provides an implementation of the concurrency library for \CFA as well as all the required language features added to the source-to-source translator.
-
-There are actually two problems that need to be solved in the design of concurrency for a programming language: which concurrency and which parallelism tools are available to the programmer. While these two concepts are often combined, they are in fact distinct, requiring different tools~\cite{Buhr05a}. Concurrency tools need to handle mutual exclusion and synchronization, while parallelism tools are about performance, cost and resource utilization.
-
-In the context of this thesis, a \textbf{thread} is a fundamental unit of execution that runs a sequence of code, generally on a program stack. Having multiple simultaneous threads gives rise to concurrency and generally requires some kind of locking mechanism to ensure proper execution. Correspondingly, \textbf{concurrency} is defined as the concepts and challenges that occur when multiple independent (sharing memory, timing dependencies, etc.) concurrent threads are introduced. Accordingly, \textbf{locking} (and by extension locks) are defined as a mechanism that prevents the progress of certain threads in order to avoid problems due to concurrency. Finally, in this thesis \textbf{parallelism} is distinct from concurrency and is defined as running multiple threads simultaneously. More precisely, parallelism implies \emph{actual} simultaneous execution as opposed to concurrency which only requires \emph{apparent} simultaneous execution. As such, parallelism is only observable in the differences in performance or, more generally, differences in timing.
Index: doc/theses/thierry/text/parallelism.tex
===================================================================
--- doc/theses/thierry/text/parallelism.tex	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,39 +1,0 @@
-% ######     #    ######     #    #       #       ####### #       ###  #####  #     #
-% #     #   # #   #     #   # #   #       #       #       #        #  #     # ##   ##
-% #     #  #   #  #     #  #   #  #       #       #       #        #  #       # # # #
-% ######  #     # ######  #     # #       #       #####   #        #   #####  #  #  #
-% #       ####### #   #   ####### #       #       #       #        #        # #     #
-% #       #     # #    #  #     # #       #       #       #        #  #     # #     #
-% #       #     # #     # #     # ####### ####### ####### ####### ###  #####  #     #
-\chapter{Parallelism}
-Historically, computer performance was about processor speeds and instruction counts. However, with heat dissipation being a direct consequence of speed increase, parallelism has become the new source for increased performance~\cite{Sutter05, Sutter05b}. In this decade, it is no longer reasonable to create a high-performance application without caring about parallelism. Indeed, parallelism is an important aspect of performance and more specifically throughput and hardware utilization. The lowest-level approach of parallelism is to use \glspl{kthread} in combination with semantics like \code{fork}, \code{join}, etc. However, since these have significant costs and limitations, \glspl{kthread} are now mostly used as an implementation tool rather than a user oriented one. There are several alternatives to solve these issues that all have strengths and weaknesses. While there are many variations of the presented paradigms, most of these variations do not actually change the guarantees or the semantics, they simply move costs in order to achieve better performance for certain workloads.
-
-\section{Paradigms}
-\subsection{User-Level Threads}
-A direct improvement on the \gls{kthread} approach is to use \glspl{uthread}. These threads offer most of the same features that the operating system already provides but can be used on a much larger scale. This approach is the most powerful solution as it allows all the features of multithreading, while removing several of the more expensive costs of kernel threads. The downside is that almost none of the low-level threading problems are hidden; users still have to think about data races, deadlocks and synchronization issues. These issues can be somewhat alleviated by a concurrency toolkit with strong guarantees, but the parallelism toolkit offers very little to reduce complexity in itself.
-
-Examples of languages that support \glspl{uthread} are Erlang~\cite{Erlang} and \uC~\cite{uC++book}.
-
-\subsection{Fibers : User-Level Threads Without Preemption} \label{fibers}
-A popular variant of \glspl{uthread} is what is often referred to as \glspl{fiber}. However, \glspl{fiber} do not present meaningful semantic differences with \glspl{uthread}. The significant difference between \glspl{uthread} and \glspl{fiber} is the lack of \gls{preemption} in the latter. Advocates of \glspl{fiber} list their high performance and ease of implementation as major strengths, but the performance difference between \glspl{uthread} and \glspl{fiber} is controversial, and the ease of implementation, while true, is a weak argument in the context of language design. Therefore this proposal largely ignores fibers.
-
-An example of a language that uses fibers is Go~\cite{Go}
-
-\subsection{Jobs and Thread Pools}
-An approach on the opposite end of the spectrum is to base parallelism on \glspl{pool}. Indeed, \glspl{pool} offer limited flexibility but at the benefit of a simpler user interface. In \gls{pool} based systems, users express parallelism as units of work, called jobs, and a dependency graph (either explicit or implicit) that ties them together. This approach means users need not worry about concurrency but significantly limit the interaction that can occur among jobs. Indeed, any \gls{job} that blocks also block the underlying worker, which effectively means the CPU utilization, and therefore throughput, suffers noticeably. It can be argued that a solution to this problem is to use more workers than available cores. However, unless the number of jobs and the number of workers are comparable, having a significant number of blocked jobs always results in idles cores.
-
-The gold standard of this implementation is Intel's TBB library~\cite{TBB}.
-
-\subsection{Paradigm Performance}
-While the choice between the three paradigms listed above may have significant performance implications, it is difficult to pin down the performance implications of choosing a model at the language level. Indeed, in many situations one of these paradigms may show better performance but it all strongly depends on the workload. Having a large amount of mostly independent units of work to execute almost guarantees equivalent performance across paradigms and that the \gls{pool}-based system has the best efficiency thanks to the lower memory overhead (i.e., no thread stack per job). However, interactions among jobs can easily exacerbate contention. User-level threads allow fine-grain context switching, which results in better resource utilization, but a context switch is more expensive and the extra control means users need to tweak more variables to get the desired performance. Finally, if the units of uninterrupted work are large, enough the paradigm choice is largely amortized by the actual work done.
-
-\section{The \protect\CFA\ Kernel : Processors, Clusters and Threads}\label{kernel}
-A \gls{cfacluster} is a group of \glspl{kthread} executed in isolation. \Glspl{uthread} are scheduled on the \glspl{kthread} of a given \gls{cfacluster}, allowing organization between \glspl{uthread} and \glspl{kthread}. It is important that \glspl{kthread} belonging to a same \glspl{cfacluster} have homogeneous settings, otherwise migrating a \gls{uthread} from one \gls{kthread} to the other can cause issues. A \gls{cfacluster} also offers a pluggable scheduler that can optimize the workload generated by the \glspl{uthread}.
-
-\Glspl{cfacluster} have not been fully implemented in the context of this thesis. Currently \CFA only supports one \gls{cfacluster}, the initial one.
-
-\subsection{Future Work: Machine Setup}\label{machine}
-While this was not done in the context of this thesis, another important aspect of clusters is affinity. While many common desktop and laptop PCs have homogeneous CPUs, other devices often have more heterogeneous setups. For example, a system using \acrshort{numa} configurations may benefit from users being able to tie clusters and/or kernel threads to certain CPU cores. OS support for CPU affinity is now common~\cite{affinityLinux, affinityWindows, affinityFreebsd, affinityNetbsd, affinityMacosx}, which means it is both possible and desirable for \CFA to offer an abstraction mechanism for portable CPU affinity.
-
-\subsection{Paradigms}\label{cfaparadigms}
-Given these building blocks, it is possible to reproduce all three of the popular paradigms. Indeed, \glspl{uthread} is the default paradigm in \CFA. However, disabling \gls{preemption} on the \gls{cfacluster} means \glspl{cfathread} effectively become \glspl{fiber}. Since several \glspl{cfacluster} with different scheduling policy can coexist in the same application, this allows \glspl{fiber} and \glspl{uthread} to coexist in the runtime of an application. Finally, it is possible to build executors for thread pools from \glspl{uthread} or \glspl{fiber}, which includes specialized jobs like actors~\cite{Actors}.
Index: doc/theses/thierry/text/results.tex
===================================================================
--- doc/theses/thierry/text/results.tex	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,332 +1,0 @@
-% ======================================================================
-% ======================================================================
-\chapter{Performance Results} \label{results}
-% ======================================================================
-% ======================================================================
-\section{Machine Setup}
-Table \ref{tab:machine} shows the characteristics of the machine used to run the benchmarks. All tests were made on this machine.
-\begin{table}[H]
-\begin{center}
-\begin{tabular}{| l | r | l | r |}
-\hline
-Architecture		& x86\_64 			& NUMA node(s) 	& 8 \\
-\hline
-CPU op-mode(s)		& 32-bit, 64-bit 		& Model name 	& AMD Opteron\texttrademark  Processor 6380 \\
-\hline
-Byte Order			& Little Endian 		& CPU Freq 		& 2.5\si{\giga\hertz} \\
-\hline
-CPU(s)			& 64 				& L1d cache 	& \SI{16}{\kibi\byte} \\
-\hline
-Thread(s) per core	& 2 				& L1i cache 	& \SI{64}{\kibi\byte} \\
-\hline
-Core(s) per socket	& 8 				& L2 cache 		& \SI{2048}{\kibi\byte} \\
-\hline
-Socket(s)			& 4 				& L3 cache 		& \SI{6144}{\kibi\byte} \\
-\hline
-\hline
-Operating system		& Ubuntu 16.04.3 LTS	& Kernel		& Linux 4.4-97-generic \\
-\hline
-Compiler			& GCC 6.3 		& Translator	& CFA 1 \\
-\hline
-Java version		& OpenJDK-9 		& Go version	& 1.9.2 \\
-\hline
-\end{tabular}
-\end{center}
-\caption{Machine setup used for the tests}
-\label{tab:machine}
-\end{table}
-
-\section{Micro Benchmarks}
-All benchmarks are run using the same harness to produce the results, seen as the \code{BENCH()} macro in the following examples. This macro uses the following logic to benchmark the code:
-\begin{pseudo}
-#define BENCH(run, result) \
-	before = gettime(); \
-	run; \
-	after  = gettime(); \
-	result = (after - before) / N;
-\end{pseudo}
-The method used to get time is \code{clock_gettime(CLOCK_THREAD_CPUTIME_ID);}. Each benchmark is using many iterations of a simple call to measure the cost of the call. The specific number of iterations depends on the specific benchmark.
-
-\subsection{Context-Switching}
-The first interesting benchmark is to measure how long context-switches take. The simplest approach to do this is to yield on a thread, which executes a 2-step context switch. Yielding causes the thread to context-switch to the scheduler and back, more precisely: from the \gls{uthread} to the \gls{kthread} then from the \gls{kthread} back to the same \gls{uthread} (or a different one in the general case). In order to make the comparison fair, coroutines also execute a 2-step context-switch by resuming another coroutine which does nothing but suspending in a tight loop, which is a resume/suspend cycle instead of a yield. Listing \ref{lst:ctx-switch} shows the code for coroutines and threads with the results in table \ref{tab:ctx-switch}. All omitted tests are functionally identical to one of these tests. The difference between coroutines and threads can be attributed to the cost of scheduling.
-\begin{figure}
-\begin{multicols}{2}
-\CFA Coroutines
-\begin{cfacode}
-coroutine GreatSuspender {};
-void main(GreatSuspender& this) {
-	while(true) { suspend(); }
-}
-int main() {
-	GreatSuspender s;
-	resume(s);
-	BENCH(
-		for(size_t i=0; i<n; i++) {
-			resume(s);
-		},
-		result
-	)
-	printf("%llu\n", result);
-}
-\end{cfacode}
-\columnbreak
-\CFA Threads
-\begin{cfacode}
-
-
-
-
-int main() {
-
-
-	BENCH(
-		for(size_t i=0; i<n; i++) {
-			yield();
-		},
-		result
-	)
-	printf("%llu\n", result);
-}
-\end{cfacode}
-\end{multicols}
-\begin{cfacode}[caption={\CFA benchmark code used to measure context-switches for coroutines and threads.},label={lst:ctx-switch}]
-\end{cfacode}
-\end{figure}
-
-\begin{table}
-\begin{center}
-\begin{tabular}{| l | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] |}
-\cline{2-4}
-\multicolumn{1}{c |}{} & \multicolumn{1}{c |}{ Median } &\multicolumn{1}{c |}{ Average } & \multicolumn{1}{c |}{ Standard Deviation} \\
-\hline
-Kernel Thread	& 241.5	& 243.86	& 5.08 \\
-\CFA Coroutine	& 38		& 38		& 0    \\
-\CFA Thread		& 103		& 102.96	& 2.96 \\
-\uC Coroutine	& 46		& 45.86	& 0.35 \\
-\uC Thread		& 98		& 99.11	& 1.42 \\
-Goroutine		& 150		& 149.96	& 3.16 \\
-Java Thread		& 289		& 290.68	& 8.72 \\
-\hline
-\end{tabular}
-\end{center}
-\caption{Context Switch comparison. All numbers are in nanoseconds(\si{\nano\second})}
-\label{tab:ctx-switch}
-\end{table}
-
-\subsection{Mutual-Exclusion}
-The next interesting benchmark is to measure the overhead to enter/leave a critical-section. For monitors, the simplest approach is to measure how long it takes to enter and leave a monitor routine. Listing \ref{lst:mutex} shows the code for \CFA. To put the results in context, the cost of entering a non-inline function and the cost of acquiring and releasing a \code{pthread_mutex} lock is also measured. The results can be shown in table \ref{tab:mutex}.
-
-\begin{figure}
-\begin{cfacode}[caption={\CFA benchmark code used to measure mutex routines.},label={lst:mutex}]
-monitor M {};
-void __attribute__((noinline)) call( M & mutex m /*, m2, m3, m4*/ ) {}
-
-int main() {
-	M m/*, m2, m3, m4*/;
-	BENCH(
-		for(size_t i=0; i<n; i++) {
-			call(m/*, m2, m3, m4*/);
-		},
-		result
-	)
-	printf("%llu\n", result);
-}
-\end{cfacode}
-\end{figure}
-
-\begin{table}
-\begin{center}
-\begin{tabular}{| l | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] |}
-\cline{2-4}
-\multicolumn{1}{c |}{} & \multicolumn{1}{c |}{ Median } &\multicolumn{1}{c |}{ Average } & \multicolumn{1}{c |}{ Standard Deviation} \\
-\hline
-C routine						& 2		& 2		& 0    \\
-FetchAdd + FetchSub				& 26		& 26		& 0    \\
-Pthreads Mutex Lock				& 31		& 31.86	& 0.99 \\
-\uC \code{monitor} member routine		& 30		& 30		& 0    \\
-\CFA \code{mutex} routine, 1 argument	& 41		& 41.57	& 0.9  \\
-\CFA \code{mutex} routine, 2 argument	& 76		& 76.96	& 1.57 \\
-\CFA \code{mutex} routine, 4 argument	& 145		& 146.68	& 3.85 \\
-Java synchronized routine			& 27		& 28.57	& 2.6  \\
-\hline
-\end{tabular}
-\end{center}
-\caption{Mutex routine comparison. All numbers are in nanoseconds(\si{\nano\second})}
-\label{tab:mutex}
-\end{table}
-
-\subsection{Internal Scheduling}
-The internal-scheduling benchmark measures the cost of waiting on and signalling a condition variable. Listing \ref{lst:int-sched} shows the code for \CFA, with results table \ref{tab:int-sched}. As with all other benchmarks, all omitted tests are functionally identical to one of these tests.
-
-\begin{figure}
-\begin{cfacode}[caption={Benchmark code for internal scheduling},label={lst:int-sched}]
-volatile int go = 0;
-condition c;
-monitor M {};
-M m1;
-
-void __attribute__((noinline)) do_call( M & mutex a1 ) { signal(c); }
-
-thread T {};
-void ^?{}( T & mutex this ) {}
-void main( T & this ) {
-	while(go == 0) { yield(); }
-	while(go == 1) { do_call(m1); }
-}
-int  __attribute__((noinline)) do_wait( M & mutex a1 ) {
-	go = 1;
-	BENCH(
-		for(size_t i=0; i<n; i++) {
-			wait(c);
-		},
-		result
-	)
-	printf("%llu\n", result);
-	go = 0;
-	return 0;
-}
-int main() {
-	T t;
-	return do_wait(m1);
-}
-\end{cfacode}
-\end{figure}
-
-\begin{table}
-\begin{center}
-\begin{tabular}{| l | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] |}
-\cline{2-4}
-\multicolumn{1}{c |}{} & \multicolumn{1}{c |}{ Median } &\multicolumn{1}{c |}{ Average } & \multicolumn{1}{c |}{ Standard Deviation} \\
-\hline
-Pthreads Condition Variable			& 5902.5	& 6093.29 	& 714.78 \\
-\uC \code{signal}					& 322		& 323 	& 3.36   \\
-\CFA \code{signal}, 1 \code{monitor}	& 352.5	& 353.11	& 3.66   \\
-\CFA \code{signal}, 2 \code{monitor}	& 430		& 430.29	& 8.97   \\
-\CFA \code{signal}, 4 \code{monitor}	& 594.5	& 606.57	& 18.33  \\
-Java \code{notify}				& 13831.5	& 15698.21	& 4782.3 \\
-\hline
-\end{tabular}
-\end{center}
-\caption{Internal scheduling comparison. All numbers are in nanoseconds(\si{\nano\second})}
-\label{tab:int-sched}
-\end{table}
-
-\subsection{External Scheduling}
-The Internal scheduling benchmark measures the cost of the \code{waitfor} statement (\code{_Accept} in \uC). Listing \ref{lst:ext-sched} shows the code for \CFA, with results in table \ref{tab:ext-sched}. As with all other benchmarks, all omitted tests are functionally identical to one of these tests.
-
-\begin{figure}
-\begin{cfacode}[caption={Benchmark code for external scheduling},label={lst:ext-sched}]
-volatile int go = 0;
-monitor M {};
-M m1;
-thread T {};
-
-void __attribute__((noinline)) do_call( M & mutex a1 ) {}
-
-void ^?{}( T & mutex this ) {}
-void main( T & this ) {
-	while(go == 0) { yield(); }
-	while(go == 1) { do_call(m1); }
-}
-int  __attribute__((noinline)) do_wait( M & mutex a1 ) {
-	go = 1;
-	BENCH(
-		for(size_t i=0; i<n; i++) {
-			waitfor(call, a1);
-		},
-		result
-	)
-	printf("%llu\n", result);
-	go = 0;
-	return 0;
-}
-int main() {
-	T t;
-	return do_wait(m1);
-}
-\end{cfacode}
-\end{figure}
-
-\begin{table}
-\begin{center}
-\begin{tabular}{| l | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] |}
-\cline{2-4}
-\multicolumn{1}{c |}{} & \multicolumn{1}{c |}{ Median } &\multicolumn{1}{c |}{ Average } & \multicolumn{1}{c |}{ Standard Deviation} \\
-\hline
-\uC \code{Accept}					& 350		& 350.61	& 3.11  \\
-\CFA \code{waitfor}, 1 \code{monitor}	& 358.5	& 358.36	& 3.82  \\
-\CFA \code{waitfor}, 2 \code{monitor}	& 422		& 426.79	& 7.95  \\
-\CFA \code{waitfor}, 4 \code{monitor}	& 579.5	& 585.46	& 11.25 \\
-\hline
-\end{tabular}
-\end{center}
-\caption{External scheduling comparison. All numbers are in nanoseconds(\si{\nano\second})}
-\label{tab:ext-sched}
-\end{table}
-
-\subsection{Object Creation}
-Finally, the last benchmark measures the cost of creation for concurrent objects. Listing \ref{lst:creation} shows the code for \texttt{pthread}s and \CFA threads, with results shown in table \ref{tab:creation}. As with all other benchmarks, all omitted tests are functionally identical to one of these tests. The only note here is that the call stacks of \CFA coroutines are lazily created, therefore without priming the coroutine, the creation cost is very low.
-
-\begin{figure}
-\begin{center}
-\texttt{pthread}
-\begin{ccode}
-int main() {
-	BENCH(
-		for(size_t i=0; i<n; i++) {
-			pthread_t thread;
-			if(pthread_create(&thread,NULL,foo,NULL)<0) {
-				perror( "failure" );
-				return 1;
-			}
-
-			if(pthread_join(thread, NULL)<0) {
-				perror( "failure" );
-				return 1;
-			}
-		},
-		result
-	)
-	printf("%llu\n", result);
-}
-\end{ccode}
-
-
-
-\CFA Threads
-\begin{cfacode}
-int main() {
-	BENCH(
-		for(size_t i=0; i<n; i++) {
-			MyThread m;
-		},
-		result
-	)
-	printf("%llu\n", result);
-}
-\end{cfacode}
-\end{center}
-\begin{cfacode}[caption={Benchmark code for \texttt{pthread}s and \CFA to measure object creation},label={lst:creation}]
-\end{cfacode}
-\end{figure}
-
-\begin{table}
-\begin{center}
-\begin{tabular}{| l | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] |}
-\cline{2-4}
-\multicolumn{1}{c |}{} & \multicolumn{1}{c |}{ Median } &\multicolumn{1}{c |}{ Average } & \multicolumn{1}{c |}{ Standard Deviation} \\
-\hline
-Pthreads			& 26996	& 26984.71	& 156.6  \\
-\CFA Coroutine Lazy	& 6		& 5.71	& 0.45   \\
-\CFA Coroutine Eager	& 708		& 706.68	& 4.82   \\
-\CFA Thread			& 1173.5	& 1176.18	& 15.18  \\
-\uC Coroutine		& 109		& 107.46	& 1.74   \\
-\uC Thread			& 526		& 530.89	& 9.73   \\
-Goroutine			& 2520.5	& 2530.93	& 61,56  \\
-Java Thread			& 91114.5	& 92272.79	& 961.58 \\
-\hline
-\end{tabular}
-\end{center}
-\caption{Creation comparison. All numbers are in nanoseconds(\si{\nano\second}).}
-\label{tab:creation}
-\end{table}
Index: doc/theses/thierry/text/together.tex
===================================================================
--- doc/theses/thierry/text/together.tex	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,140 +1,0 @@
-% ======================================================================
-% ======================================================================
-\chapter{Putting It All Together}
-% ======================================================================
-% ======================================================================
-
-
-\section{Threads As Monitors}
-As it was subtly alluded in section \ref{threads}, \code{thread}s in \CFA are in fact monitors, which means that all monitor features are available when using threads. For example, here is a very simple two thread pipeline that could be used for a simulator of a game engine:
-\begin{figure}[H]
-\begin{cfacode}[caption={Toy simulator using \code{thread}s and \code{monitor}s.},label={lst:engine-v1}]
-// Visualization declaration
-thread Renderer {} renderer;
-Frame * simulate( Simulator & this );
-
-// Simulation declaration
-thread Simulator{} simulator;
-void render( Renderer & this );
-
-// Blocking call used as communication
-void draw( Renderer & mutex this, Frame * frame );
-
-// Simulation loop
-void main( Simulator & this ) {
-	while( true ) {
-		Frame * frame = simulate( this );
-		draw( renderer, frame );
-	}
-}
-
-// Rendering loop
-void main( Renderer & this ) {
-	while( true ) {
-		waitfor( draw, this );
-		render( this );
-	}
-}
-\end{cfacode}
-\end{figure}
-One of the obvious complaints of the previous code snippet (other than its toy-like simplicity) is that it does not handle exit conditions and just goes on forever. Luckily, the monitor semantics can also be used to clearly enforce a shutdown order in a concise manner:
-\begin{figure}[H]
-\begin{cfacode}[caption={Same toy simulator with proper termination condition.},label={lst:engine-v2}]
-// Visualization declaration
-thread Renderer {} renderer;
-Frame * simulate( Simulator & this );
-
-// Simulation declaration
-thread Simulator{} simulator;
-void render( Renderer & this );
-
-// Blocking call used as communication
-void draw( Renderer & mutex this, Frame * frame );
-
-// Simulation loop
-void main( Simulator & this ) {
-	while( true ) {
-		Frame * frame = simulate( this );
-		draw( renderer, frame );
-
-		// Exit main loop after the last frame
-		if( frame->is_last ) break;
-	}
-}
-
-// Rendering loop
-void main( Renderer & this ) {
-	while( true ) {
-		   waitfor( draw, this );
-		or waitfor( ^?{}, this ) {
-			// Add an exit condition
-			break;
-		}
-
-		render( this );
-	}
-}
-
-// Call destructor for simulator once simulator finishes
-// Call destructor for renderer to signify shutdown
-\end{cfacode}
-\end{figure}
-
-\section{Fibers \& Threads}
-As mentioned in section \ref{preemption}, \CFA uses preemptive threads by default but can use fibers on demand. Currently, using fibers is done by adding the following line of code to the program~:
-\begin{cfacode}
-unsigned int default_preemption() {
-	return 0;
-}
-\end{cfacode}
-This function is called by the kernel to fetch the default preemption rate, where 0 signifies an infinite time-slice, i.e., no preemption. However, once clusters are fully implemented, it will be possible to create fibers and \glspl{uthread} in the same system, as in listing \ref{lst:fiber-uthread}
-\begin{figure}
-\begin{cfacode}[caption={Using fibers and \glspl{uthread} side-by-side in \CFA},label={lst:fiber-uthread}]
-//Cluster forward declaration
-struct cluster;
-
-//Processor forward declaration
-struct processor;
-
-//Construct clusters with a preemption rate
-void ?{}(cluster& this, unsigned int rate);
-//Construct processor and add it to cluster
-void ?{}(processor& this, cluster& cluster);
-//Construct thread and schedule it on cluster
-void ?{}(thread& this, cluster& cluster);
-
-//Declare two clusters
-cluster thread_cluster = { 10`ms };			//Preempt every 10 ms
-cluster fibers_cluster = { 0 };				//Never preempt
-
-//Construct 4 processors
-processor processors[4] = {
-	//2 for the thread cluster
-	thread_cluster;
-	thread_cluster;
-	//2 for the fibers cluster
-	fibers_cluster;
-	fibers_cluster;
-};
-
-//Declares thread
-thread UThread {};
-void ?{}(UThread& this) {
-	//Construct underlying thread to automatically
-	//be scheduled on the thread cluster
-	(this){ thread_cluster }
-}
-
-void main(UThread & this);
-
-//Declares fibers
-thread Fiber {};
-void ?{}(Fiber& this) {
-	//Construct underlying thread to automatically
-	//be scheduled on the fiber cluster
-	(this.__thread){ fibers_cluster }
-}
-
-void main(Fiber & this);
-\end{cfacode}
-\end{figure}
Index: doc/theses/thierry/thePlan.md
===================================================================
--- doc/theses/thierry/thePlan.md	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,26 +1,0 @@
-_Phase 1_ : Prototype
-done - Threads.
-done - Main thread is a cfa thread.
-done - SimpleBlockingLock.
-done - Synchronisation points in thread destructors.
-done - Processors & SpinLock.
-
-_Phase 2_ : Minimum Viable Product
-done - Monitor type and enter/leave mutex member routines
-done - Multi monitors calls,
-done - Monitors as a language feature (not calling enter/leave by hand)
-
-_Phase 3_ : Monitor features
-Internal scheduling
-External scheduling
-
-_Phase 4_ : Kernel features
-Preemption
-Detach thread
-Cluster migration
-
-_Phase 5_ : Performance
-Proper scheduler
-...
-
-
Index: doc/theses/thierry/thesis.tex
===================================================================
--- doc/theses/thierry/thesis.tex	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,146 +1,0 @@
-% requires tex packages: texlive-base texlive-latex-base tex-common texlive-humanities texlive-latex-extra texlive-fonts-recommended
-
-% inline code �...� (copyright symbol) emacs: C-q M-)
-% red highlighting �...� (registered trademark symbol) emacs: C-q M-.
-% blue highlighting �...� (sharp s symbol) emacs: C-q M-_
-% green highlighting �...� (cent symbol) emacs: C-q M-"
-% LaTex escape �...� (section symbol) emacs: C-q M-'
-% keyword escape �...� (pilcrow symbol) emacs: C-q M-^
-% math escape $...$ (dollar symbol)
-
-\documentclass[letterpaper,12pt,titlepage,oneside,final]{book}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-% Latex packages used in the document.
-\usepackage[T1]{fontenc}					% allow Latin1 (extended ASCII) characters
-\usepackage{textcomp}
-\usepackage[latin1]{inputenc}
-\usepackage{fullpage,times,comment}
-\usepackage{epic,eepic}
-\usepackage{upquote}						% switch curled `'" to straight
-\usepackage{dirtytalk}
-\usepackage{calc}
-\usepackage{xspace}
-\usepackage[labelformat=simple]{subfig}
-\renewcommand{\thesubfigure}{(\alph{subfigure})}
-\usepackage{graphicx}
-\usepackage{tabularx}
-\usepackage{multicol}
-\usepackage[acronym]{glossaries}
-\usepackage{varioref}
-\usepackage{listings}						% format program code
-\usepackage[flushmargin]{footmisc}				% support label/reference in footnote
-\usepackage{latexsym}						% \Box glyph
-\usepackage{mathptmx}						% better math font with "times"
-\usepackage[usenames]{color}
-\usepackage[pagewise]{lineno}
-\renewcommand{\linenumberfont}{\scriptsize\sffamily}
-\usepackage{fancyhdr}
-\usepackage{float}
-\usepackage{siunitx}
-\sisetup{ binary-units=true }
-\input{style}							% bespoke macros used in the document
-\usepackage{url}
-\usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref}
-\usepackage{breakurl}
-\urlstyle{rm}
-
-\usepackage{tikz}
-\def\checkmark{\tikz\fill[scale=0.4](0,.35) -- (.25,0) -- (1,.7) -- (.25,.15) -- cycle;}
-
-\setlength{\topmargin}{-0.45in}				% move running title into header
-\setlength{\headsep}{0.25in}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-% Names used in the document.
-
-\newcommand{\Version}{1.0.0}
-\newcommand{\CS}{C\raisebox{-0.9ex}{\large$^\sharp$}\xspace}
-
-\newcommand{\Textbf}[2][red]{{\color{#1}{\textbf{#2}}}}
-\newcommand{\Emph}[2][red]{{\color{#1}\textbf{\emph{#2}}}}
-\newcommand{\R}[1]{\Textbf{#1}}
-\newcommand{\B}[1]{{\Textbf[blue]{#1}}}
-\newcommand{\G}[1]{{\Textbf[OliveGreen]{#1}}}
-\newcommand{\uC}{$\mu$\CC}
-\newcommand{\cit}{\textsuperscript{[Citation Needed]}\xspace}
-\newcommand{\TODO}{{\Textbf{TODO}}}
-
-\input{glossary}
-
-\newsavebox{\LstBox}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\setcounter{secnumdepth}{2}                           % number subsubsections
-\setcounter{tocdepth}{2}                              % subsubsections in table of contents
-% \linenumbers                                       	% comment out to turn off line numbering
-\makeindex
-\pagestyle{fancy}
-\fancyhf{}
-\cfoot{\thepage}
-\rfoot{v\input{version}}
-
-
-
-%======================================================================
-%   L O G I C A L    D O C U M E N T -- the content of your thesis
-%======================================================================
-\begin{document}
-
-% For a large document, it is a good idea to divide your thesis
-% into several files, each one containing one chapter.
-% To illustrate this idea, the "front pages" (i.e., title page,
-% declaration, borrowers' page, abstract, acknowledgements,
-% dedication, table of contents, list of tables, list of figures,
-% nomenclature) are contained within the file "thesis-frontpgs.tex" which is
-% included into the document by the following statement.
-%----------------------------------------------------------------------
-% FRONT MATERIAL
-%----------------------------------------------------------------------
-\input{frontpgs}
-
-%----------------------------------------------------------------------
-% MAIN BODY
-%----------------------------------------------------------------------
-
-\input{intro}
-
-\input{cforall}
-
-\input{basics}
-
-\input{concurrency}
-
-\input{parallelism}
-
-\input{internals}
-
-\input{together}
-
-\input{results}
-
-\input{future}
-
-
-\clearpage
-
-% B I B L I O G R A P H Y
-% -----------------------------
-\addcontentsline{toc}{chapter}{Bibliography}
-\bibliographystyle{plain}
-\bibliography{pl,local}
-\cleardoublepage
-\phantomsection		% allows hyperref to link to the correct page
-
-% G L O S S A R Y
-% -----------------------------
-\addcontentsline{toc}{chapter}{Glossary}
-\printglossary
-\cleardoublepage
-\phantomsection		% allows hyperref to link to the correct page
-
-
-\end{document}
Index: doc/theses/thierry/version
===================================================================
--- doc/theses/thierry/version	(revision cf68d04b367d8d879c5a9d03ac024235346ad5b9)
+++ 	(revision )
@@ -1,1 +1,0 @@
-0.11.403
Index: doc/theses/thierry_delisle/.gitignore
===================================================================
--- doc/theses/thierry_delisle/.gitignore	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/.gitignore	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,29 @@
+build/*.aux
+build/*.acn
+build/*.acr
+build/*.alg
+build/*.bbl
+build/*.blg
+build/*.brf
+build/*.dvi
+build/*.glg
+build/*.glo
+build/*.gls
+build/*.idx
+build/*.ind
+build/*.ist
+build/*.lof
+build/*.log
+build/*.lol
+build/*.lot
+build/*.out
+build/*.ps
+build/*.pstex
+build/*.pstex_t
+build/*.tex
+build/*.toc
+*.pdf
+*.png
+figures/*.tex
+
+examples
Index: doc/theses/thierry_delisle/Makefile
===================================================================
--- doc/theses/thierry_delisle/Makefile	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/Makefile	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,147 @@
+## Define the appropriate configuration variables.
+
+TeXLIB = .:./style:./text:./annex:./build:../../LaTeXmacros:../../LaTeXmacros/listings:../../LaTeXmacros/enumitem:~/bibliographies:/usr/local/bibliographies:
+LaTeX  = TEXINPUTS=${TeXLIB} && export TEXINPUTS && latex -halt-on-error -output-directory=build -interaction=nonstopmode
+BibTeX = BIBINPUTS=${TeXLIB} && export BIBINPUTS && bibtex -terse
+
+## Define the text source files.
+
+SOURCES = ${addsuffix .tex, \
+thesis \
+style/style \
+style/cfa-format \
+annex/glossary \
+text/frontpgs \
+text/intro \
+text/basics \
+text/cforall \
+text/concurrency \
+text/internals \
+text/parallelism \
+text/results \
+text/together \
+text/future \
+}
+
+FIGURES = ${addprefix build/, ${addsuffix .tex, \
+	monitor \
+	ext_monitor \
+	int_monitor \
+	dependency \
+}}
+
+PICTURES = ${addprefix build/, ${addsuffix .pstex, \
+	system \
+	monitor_structs \
+}}
+
+PROGRAMS = ${addsuffix .tex, \
+}
+
+GRAPHS = ${addsuffix .tex, \
+}
+
+## Define the documents that need to be made.
+
+DOCUMENT = thesis.pdf
+
+# Directives #
+
+all : ${DOCUMENT}
+
+clean :
+	@rm -fv ${DOCUMENT} \
+	build/*.acn     \
+	build/*.acr     \
+	build/*.alg     \
+	build/*.aux     \
+	build/*.bbl     \
+	build/*.blg     \
+	build/*.brf     \
+	build/*.cf      \
+	build/*.dvi     \
+	build/*.glg     \
+	build/*.glo     \
+	build/*.gls     \
+	build/*.ist     \
+	build/*.idx     \
+	build/*.ilg     \
+	build/*.ind     \
+	build/*.log     \
+	build/*.out     \
+	build/*.ps      \
+	build/*.pstex   \
+	build/*.pstex_t \
+	build/*.tex     \
+	build/*.toc     \
+	build/*.lof     \
+	build/*.lol     \
+	build/*.lot     \
+	figures/*.tex   \
+	*.png           \
+
+
+# File Dependencies #
+
+${DOCUMENT} : build/${basename ${DOCUMENT}}.ps
+	ps2pdf $<
+
+build/${basename ${DOCUMENT}}.ps : build/${basename ${DOCUMENT}}.dvi
+	dvips $< -o $@
+
+build/${basename ${DOCUMENT}}.dvi : Makefile ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} ${basename ${DOCUMENT}}.tex ../../LaTeXmacros/common.tex ../../LaTeXmacros/indexstyle annex/local.bib
+
+	@ if [ ! -r ${basename $@}.ind ] ; then touch ${basename $@}.ind ; fi 				# Conditionally create an empty *.ind (index) file for inclusion until makeindex is run.
+	@ echo "Citation lookup"											# Must have *.aux file containing citations for bibtex
+	@ if [ ! -r ${basename $@}.aux ] ; then ${LaTeX} ${basename ${notdir $@}}.tex ; fi
+	@ echo "Citation Pass 1"
+	@ -${BibTeX} ${basename $@}											# Some citations reference others so run steps again to resolve these citations
+	@ echo "Citation Pass 2"
+	@ ${LaTeX} ${basename ${notdir $@}}.tex
+	@ -${BibTeX} ${basename $@}
+	@ echo "Glossary"
+	@ makeglossaries -q -s ${basename $@}.ist ${basename $@}						# Make index from *.aux entries and input index at end of document
+	@ echo ".dvi generation"
+	@ -build/bump_ver.sh
+	@ ${LaTeX} ${basename ${notdir $@}}.tex									# Run again to get index title into table of contents
+
+
+predefined :
+	sed -f predefined.sed ${basename ${DOCUMENT}}.tex > ${basename $@}.cf
+
+## Define the default recipes.
+
+build/%.tex : figures/%.fig
+	fig2dev -L eepic $< > $@
+
+build/%.ps : figures/%.fig
+	fig2dev -L ps $< > $@
+
+build/%.pstex : figures/%.fig
+	fig2dev -L pstex $< > $@
+	fig2dev -L pstex_t -p $@ $< > $@_t
+
+figures/%.tex: build/%.pstex
+	echo -n 	"\documentclass[preview]{standalone}\n" 	\
+			"\usepackage[T1]{fontenc}\n" 			\
+			"\usepackage[usenames]{color}\n" 		\
+			"\usepackage{graphicx}\n" 			\
+			"\usepackage{listings}\n" 			\
+			"\usepackage{xspace}\n" 			\
+			"\input{style}\n" 				\
+			"\\\\begin{document}\n"				\
+			"{\\\\resizebox{3\\\\textwidth}{!}{\input{${basename ${notdir $@}}.pstex_t}}}\n" \
+			"\end{document}" > $@
+
+%.png : build/%.pstex figures/%.tex
+	echo ${basename $@}
+	${LaTeX} figures/${basename $@}.tex
+	dvips build/${basename $@}.dvi -o build/${basename $@}.ps
+	ps2pdf build/${basename $@}.ps
+	convert -negate ${basename $@}.pdf $@
+
+
+
+# Local Variables: #
+# compile-command: "make" #
+# End: #
Index: doc/theses/thierry_delisle/annex/glossary.tex
===================================================================
--- doc/theses/thierry_delisle/annex/glossary.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/annex/glossary.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,101 @@
+\makeglossaries
+
+\longnewglossaryentry{callsite-locking}
+{name={callsite-locking}}
+{
+Locking done by the calling routine. With this technique, a routine calling a monitor routine aquires the monitor \emph{before} making the call to the actuall routine.
+}
+
+\longnewglossaryentry{entry-point-locking}
+{name={entry-point-locking}}
+{
+Locking done by the called routine. With this technique, a monitor routine called by another routine aquires the monitor \emph{after} entering the routine body but prior to any other code.
+}
+
+\longnewglossaryentry{bulk-acq}
+{name={bulk-acquiring}}
+{
+Implicitly acquiring several monitors when entering a monitor.
+}
+
+\longnewglossaryentry{multi-acq}
+{name={multiple-acquisition}}
+{
+Any locking technique that allows a single thread to acquire the same lock multiple times.
+}
+
+\longnewglossaryentry{mon-ctx}
+{name={monitor context}}
+{
+The state of the current thread regarding which monitors are owned.
+}
+
+
+\longnewglossaryentry{uthread}
+{name={user-level thread}}
+{
+Threads created and managed inside user-space. Each thread has its own stack and its own thread of execution. User-level threads are invisible to the underlying operating system.
+
+\textit{Synonyms : User threads, Lightweight threads, Green threads, Virtual threads, Tasks.}
+}
+
+\longnewglossaryentry{kthread}
+{name={kernel-level thread}}
+{
+Threads created and managed inside kernel-space. Each thread has its own stack and its own thread of execution. Kernel-level threads are owned, managed and scheduled by the underlying operating system.
+
+\textit{Synonyms : OS threads, Hardware threads, Physical threads.}
+}
+
+\longnewglossaryentry{fiber}
+{name={fiber}}
+{
+Fibers are non-preemptive user-level threads. They share most of the caracteristics of user-level threads except that they cannot be preempted by another fiber.
+
+\textit{Synonyms : Tasks.}
+}
+
+\longnewglossaryentry{job}
+{name={job}}
+{
+Unit of work, often sent to a thread pool or worker pool to be executed. Has neither its own stack nor its own thread of execution.
+
+\textit{Synonyms : Tasks.}
+}
+
+\longnewglossaryentry{pool}
+{name={thread-pool}}
+{
+Group of homogeneuous threads that loop executing units of works after another.
+
+\textit{Synonyms : }
+}
+
+\longnewglossaryentry{cfacluster}
+{name={cluster}}
+{
+A group of \gls{kthread} executed in isolation.
+
+\textit{Synonyms : None.}
+}
+
+\longnewglossaryentry{cfathread}
+{name={thread}}
+{
+User level threads that are the default in \CFA. Generally declared using the \code{thread} keyword.
+
+\textit{Synonyms : None.}
+}
+
+\longnewglossaryentry{preemption}
+{name={preemption}}
+{
+Involuntary context switch imposed on threads at a specified rate.
+
+\textit{Synonyms : None.}
+}
+
+\newacronym{tls}{TLS}{Thread Local Storage}
+\newacronym{api}{API}{Application Program Interface}
+\newacronym{raii}{RAII}{Resource Acquisition Is Initialization}
+\newacronym{numa}{NUMA}{Non-Uniform Memory Access}
Index: doc/theses/thierry_delisle/annex/local.bib
===================================================================
--- doc/theses/thierry_delisle/annex/local.bib	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/annex/local.bib	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,150 @@
+%    Predefined journal names:
+%  acmcs: Computing Surveys		acta: Acta Infomatica
+%  cacm: Communications of the ACM
+%  ibmjrd: IBM J. Research & Development ibmsj: IBM Systems Journal
+%  ieeese: IEEE Trans. on Soft. Eng.	ieeetc: IEEE Trans. on Computers
+%  ieeetcad: IEEE Trans. on Computer-Aided Design of Integrated Circuits
+%  ipl: Information Processing Letters	jacm: Journal of the ACM
+%  jcss: J. Computer & System Sciences	scp: Science of Comp. Programming
+%  sicomp: SIAM J. on Computing		tocs: ACM Trans. on Comp. Systems
+%  tods: ACM Trans. on Database Sys.	tog: ACM Trans. on Graphics
+%  toms: ACM Trans. on Math. Software	toois: ACM Trans. on Office Info. Sys.
+%  toplas: ACM Trans. on Prog. Lang. & Sys.
+%  tcs: Theoretical Computer Science
+@string{ieeepds="IEEE Transactions on Parallel and Distributed Systems"}
+@string{ieeese="IEEE Transactions on Software Engineering"}
+@string{spe="Software---\-Practice and Experience"}
+@string{sigplan="SIGPLAN Notices"}
+@string{joop="Journal of Object-Oriented Programming"}
+@string{popl="Conference Record of the ACM Symposium on Principles of Programming Languages"}
+@string{osr="Operating Systems Review"}
+@string{pldi="Programming Language Design and Implementation"}
+
+
+@article{HPP:Study,
+	keywords 	= {Parallel, Productivity},
+	author 	= {Lorin Hochstein and Jeff Carver and Forrest Shull and Sima Asgari and Victor Basili and Jeffrey K. Hollingsworth and Marvin V. Zelkowitz },
+	title 	= {Parallel Programmer Productivity: A Case Study of Novice Parallel Programmers},
+}
+
+@article{Chicken,
+	keywords	= {Chicken},
+	author	= {Doug Zongker},
+	title		= {Chicken Chicken Chicken: Chicken Chicken},
+	year		= 2006
+}
+
+@article{TBB,
+	key	= {TBB},
+	keywords 	= {Intel, TBB},
+	title 	= {Intel Thread Building Blocks},
+	note		= "\url{https://www.threadingbuildingblocks.org/}"
+}
+
+@manual{www-cfa,
+	key	= {CFA},
+	keywords 	= {Cforall},
+	author	= {C$\forall$},
+	title 	= {C$\forall$ Programmming Language},
+	note	= {\url{https://plg.uwaterloo.ca/~cforall}},
+}
+
+@mastersthesis{rob-thesis,
+	keywords 	= {Constructors, Destructors, Tuples},
+	author	= {Rob Schluntz},
+	title 	= {Resource Management and Tuples in Cforall},
+	year		= 2017,
+	school	= {University of Waterloo},
+	note	= {\url{https://uwspace.uwaterloo.ca/handle/10012/11830}},
+}
+
+@manual{Cpp-Transactions,
+	keywords	= {C++, Transactional Memory},
+	title		= {Technical Specification for C++ Extensions for Transactional Memory},
+	organization= {International Standard ISO/IEC TS 19841:2015 },
+	publisher   = {American National Standards Institute},
+	address	= {http://www.iso.org},
+	year		= 2015,
+}
+
+@article{BankTransfer,
+	key	= {Bank Transfer},
+	keywords 	= {Bank Transfer},
+	title 	= {Bank Account Transfer Problem},
+	publisher	= {Wiki Wiki Web},
+	address	= {http://wiki.c2.com},
+	year		= 2010
+}
+
+@misc{2FTwoHardThings,
+	keywords 	= {Hard Problem},
+	title 	= {TwoHardThings},
+	author	= {Martin Fowler},
+	howpublished= "\url{https://martinfowler.com/bliki/TwoHardThings.html}",
+	year		= 2009
+}
+
+@article{IntrusiveData,
+	title		= {Intrusive Data Structures},
+	author	= {Jiri Soukup},
+	journal	= {CppReport},
+	year		= 1998,
+	month		= May,
+	volume	= {10/No5.},
+	page		= 22
+}
+
+@article{Herlihy93,
+	author	= {Herlihy, Maurice and Moss, J. Eliot B.},
+	title	= {Transactional memory: architectural support for lock-free data structures},
+	journal	= {SIGARCH Comput. Archit. News},
+	issue_date	= {May 1993},
+	volume	= {21},
+	number	= {2},
+	month	= may,
+	year	= {1993},
+	pages	= {289--300},
+	numpages	= {12},
+	publisher	= {ACM},
+	address	= {New York, NY, USA},
+}
+
+@manual{affinityLinux,
+	key	= {TBB},
+	title		= "{Linux man page - sched\_setaffinity(2)}"
+}
+
+@manual{affinityWindows,
+	title		= "{Windows (vs.85) - SetThreadAffinityMask function}"
+}
+
+@manual{switchToWindows,
+	title		= "{Windows (vs.85) - SwitchToFiber function}"
+}
+
+@manual{affinityFreebsd,
+	title		= "{FreeBSD General Commands Manual - CPUSET(1)}"
+}
+
+@manual{affinityNetbsd,
+	title		= "{NetBSD Library Functions Manual - AFFINITY(3)}"
+}
+
+@manual{affinityMacosx,
+	title		= "{Affinity API Release Notes for OS X v10.5}"
+}
+
+@misc{NodeJs,
+	title		= "{Node.js}",
+	howpublished= "\url{https://nodejs.org/en/}",
+}
+
+@misc{SpringMVC,
+	title		= "{Spring Web MVC}",
+	howpublished= "\url{https://docs.spring.io/spring/docs/current/spring-framework-reference/web.html}",
+}
+
+@misc{Django,
+	title		= "{Django}",
+	howpublished= "\url{https://www.djangoproject.com/}",
+}
Index: doc/theses/thierry_delisle/build/bump_ver.sh
===================================================================
--- doc/theses/thierry_delisle/build/bump_ver.sh	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/build/bump_ver.sh	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,6 @@
+#!/bin/bash
+if [ ! -f version ]; then
+    echo "0.0.0" > version
+fi
+
+sed -r 's/([0-9]+\.[0-9]+.)([0-9]+)/echo "\1\$((\2+1))" > version/ge' version > /dev/null
Index: doc/theses/thierry_delisle/figures/dependency.fig
===================================================================
--- doc/theses/thierry_delisle/figures/dependency.fig	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/figures/dependency.fig	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,119 @@
+#FIG 3.2  Produced by xfig version 3.2.5c
+Landscape
+Center
+Inches
+Letter  
+100.00
+Single
+-2
+1200 2
+6 750 2250 2250 2850
+1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 1050 2550 300 300 750 2550 1350 2550
+4 0 0 50 -1 0 20 0.0000 2 315 1305 900 2700 $\\alpha$3\001
+-6
+6 750 1350 2250 1950
+1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 1050 1650 300 300 750 1650 1350 1650
+4 0 0 50 -1 0 20 0.0000 2 315 1305 900 1800 $\\alpha$2\001
+-6
+6 750 450 2250 1050
+1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 1050 750 300 300 750 750 1350 750
+4 0 0 50 -1 0 20 0.0000 2 315 1305 900 900 $\\alpha$1\001
+-6
+6 750 3150 2250 3750
+1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 1050 3450 300 300 750 3450 1350 3450
+4 0 0 50 -1 0 20 0.0000 2 315 1305 900 3600 $\\alpha$4\001
+-6
+6 750 4050 2250 4650
+1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 1050 4350 300 300 750 4350 1350 4350
+4 0 0 50 -1 0 20 0.0000 2 315 1305 900 4500 $\\alpha$5\001
+-6
+6 3000 1350 4800 1950
+1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 3300 1650 300 300 3000 1650 3600 1650
+4 0 0 50 -1 0 20 0.0000 2 315 1560 3150 1800 $\\gamma$2\001
+-6
+6 3000 450 4800 1050
+1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 3300 750 300 300 3000 750 3600 750
+4 0 0 50 -1 0 20 0.0000 2 315 1560 3150 900 $\\gamma$1\001
+-6
+6 3000 2250 4800 2850
+6 3000 2250 3600 2850
+6 3000 2250 3600 2850
+1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 3300 2550 300 300 3000 2550 3600 2550
+-6
+-6
+4 0 0 50 -1 0 20 0.0000 2 315 1560 3150 2700 $\\gamma$3\001
+-6
+6 3000 3150 4800 3750
+1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 3300 3450 300 300 3000 3450 3600 3450
+4 0 0 50 -1 0 20 0.0000 2 315 1560 3150 3600 $\\gamma$4\001
+-6
+6 3000 4050 4800 4650
+1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 3300 4350 300 300 3000 4350 3600 4350
+4 0 0 50 -1 0 20 0.0000 2 315 1560 3150 4500 $\\gamma$5\001
+-6
+6 3000 4950 4800 5550
+1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 3300 5250 300 300 3000 5250 3600 5250
+4 0 0 50 -1 0 20 0.0000 2 315 1560 3150 5400 $\\gamma$6\001
+-6
+6 5400 1800 6750 4200
+6 5400 1800 6750 2400
+1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 5700 2100 300 300 5400 2100 6000 2100
+4 0 0 50 -1 0 20 0.0000 2 270 1140 5550 2250 $\\beta$1\001
+-6
+6 5400 2700 6750 3300
+1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 5700 3000 300 300 5400 3000 6000 3000
+4 0 0 50 -1 0 20 0.0000 2 270 1140 5550 3150 $\\beta$2\001
+-6
+6 5400 3600 6750 4200
+1 4 0 1 0 7 50 -1 -1 0.000 1 0.0000 5700 3900 300 300 5400 3900 6000 3900
+4 0 0 50 -1 0 20 0.0000 2 270 1140 5550 4050 $\\beta$3\001
+-6
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 1.00 60.00 120.00
+	 5700 2700 5700 2400
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 1.00 60.00 120.00
+	 5700 3600 5700 3300
+-6
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 1.00 60.00 120.00
+	 1050 1350 1050 1050
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 1.00 60.00 120.00
+	 3300 1350 3300 1050
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 1.00 60.00 120.00
+	 3300 2250 3300 1950
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 1.00 60.00 120.00
+	 1050 2250 1050 1950
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 1.00 60.00 120.00
+	 1050 3150 1050 2850
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 1.00 60.00 120.00
+	 3300 3150 3300 2850
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 1.00 60.00 120.00
+	 1050 4050 1050 3750
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 1.00 60.00 120.00
+	 3300 4050 3300 3750
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 1.00 60.00 120.00
+	 3300 4950 3300 4650
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 1.00 60.00 120.00
+	 1350 2550 3000 2550
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 1 2
+	1 1 1.00 60.00 120.00
+	 1350 3450 3000 3450
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 1 2
+	1 1 1.00 60.00 120.00
+	 3000 5175 1350 4500
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 1 2
+	1 1 1.00 60.00 120.00
+	 5462 4060 3582 5156
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 1 2
+	1 1 1.00 60.00 120.00
+	 3564 4198 5438 3144
Index: doc/theses/thierry_delisle/figures/ext_monitor.fig
===================================================================
--- doc/theses/thierry_delisle/figures/ext_monitor.fig	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/figures/ext_monitor.fig	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,96 @@
+#FIG 3.2  Produced by xfig version 3.2.5c
+Landscape
+Center
+Inches
+Letter  
+100.00
+Single
+-2
+1200 2
+5 1 0 1 -1 -1 0 0 -1 0.000 0 1 0 0 3150.000 3450.000 3150 3150 2850 3450 3150 3750
+5 1 0 1 -1 -1 0 0 -1 0.000 0 1 0 0 3150.000 4350.000 3150 4050 2850 4350 3150 4650
+6 5850 1950 6150 2250
+1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 6000 2100 105 105 6000 2100 6105 2205
+4 1 -1 0 0 0 10 0.0000 2 105 90 6000 2160 d\001
+-6
+6 5100 2100 5400 2400
+1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 5250 2250 105 105 5250 2250 5355 2250
+4 1 -1 0 0 0 10 0.0000 2 105 120 5250 2295 X\001
+-6
+6 5100 1800 5400 2100
+1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 5250 1950 105 105 5250 1950 5355 1950
+4 1 -1 0 0 0 10 0.0000 2 105 120 5250 2010 Y\001
+-6
+6 5850 1650 6150 1950
+1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 6000 1800 105 105 6000 1800 6105 1905
+4 1 -1 0 0 0 10 0.0000 2 105 90 6000 1860 b\001
+-6
+6 3070 5445 7275 5655
+1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3150 5550 80 80 3150 5550 3230 5630
+1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4500 5550 105 105 4500 5550 4605 5655
+1 3 0 1 -1 -1 0 0 4 0.000 1 0.0000 6000 5550 105 105 6000 5550 6105 5655
+4 0 -1 0 0 0 12 0.0000 2 135 1035 4725 5625 blocked task\001
+4 0 -1 0 0 0 12 0.0000 2 135 870 3300 5625 active task\001
+4 0 -1 0 0 0 12 0.0000 2 135 1050 6225 5625 routine mask\001
+-6
+1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 3300 3600 105 105 3300 3600 3405 3705
+1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 3600 3600 105 105 3600 3600 3705 3705
+1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 6600 3900 105 105 6600 3900 6705 4005
+1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 6900 3900 105 105 6900 3900 7005 4005
+1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 6000 2700 105 105 6000 2700 6105 2805
+1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 6000 2400 105 105 6000 2400 6105 2505
+1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 5100 4575 80 80 5100 4575 5180 4655
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+	 4050 2925 5475 2925 5475 3225 4050 3225 4050 2925
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4
+	 3150 3750 3750 3750 3750 4050 3150 4050
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 3
+	 3150 3450 3750 3450 3900 3675
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
+	 3750 3150 3600 3375
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 3
+	 3150 4350 3750 4350 3900 4575
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
+	 3750 4050 3600 4275
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4
+	 3150 4650 3750 4650 3750 4950 4950 4950
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
+	 6450 3750 6300 3975
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
+	 4950 4950 5175 5100
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 9
+	 5250 4950 6450 4950 6450 4050 7050 4050 7050 3750 6450 3750
+	 6450 2850 6150 2850 6150 1650
+2 2 1 1 -1 -1 0 0 -1 4.000 0 0 0 0 0 5
+	 5850 4200 5850 3300 4350 3300 4350 4200 5850 4200
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2
+	1 1 1.00 60.00 120.00
+	7 1 1.00 60.00 120.00
+	 5250 3150 5250 2400
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+	 3150 3150 3750 3150 3750 2850 5700 2850 5700 1650
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 5700 2850 6150 3000
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+	 5100 1800 5400 1800 5400 2400 5100 2400 5100 1800
+4 1 -1 0 0 0 10 0.0000 2 75 75 6000 2745 a\001
+4 1 -1 0 0 0 10 0.0000 2 75 75 6000 2445 c\001
+4 1 -1 0 0 0 12 0.0000 2 135 315 5100 5325 exit\001
+4 1 -1 0 0 0 12 0.0000 2 135 135 3300 3075 A\001
+4 1 -1 0 0 0 12 0.0000 2 135 795 3300 4875 condition\001
+4 1 -1 0 0 0 12 0.0000 2 135 135 3300 5100 B\001
+4 0 -1 0 0 0 12 0.0000 2 135 420 6600 3675 stack\001
+4 0 -1 0 0 0 12 0.0000 2 180 750 6600 3225 acceptor/\001
+4 0 -1 0 0 0 12 0.0000 2 180 750 6600 3450 signalled\001
+4 1 -1 0 0 0 12 0.0000 2 135 795 3300 2850 condition\001
+4 1 -1 0 0 0 12 0.0000 2 165 420 6000 1350 entry\001
+4 1 -1 0 0 0 12 0.0000 2 135 495 6000 1575 queue\001
+4 0 -1 0 0 0 12 0.0000 2 135 525 6300 2400 arrival\001
+4 0 -1 0 0 0 12 0.0000 2 135 630 6300 2175 order of\001
+4 1 -1 0 0 0 12 0.0000 2 135 525 5100 3675 shared\001
+4 1 -1 0 0 0 12 0.0000 2 135 735 5100 3975 variables\001
+4 0 0 50 -1 0 11 0.0000 2 165 855 4275 3150 Acceptables\001
+4 0 0 50 -1 0 11 0.0000 2 120 165 5775 2700 W\001
+4 0 0 50 -1 0 11 0.0000 2 120 135 5775 2400 X\001
+4 0 0 50 -1 0 11 0.0000 2 120 105 5775 2100 Z\001
+4 0 0 50 -1 0 11 0.0000 2 120 135 5775 1800 Y\001
Index: doc/theses/thierry_delisle/figures/int_monitor.fig
===================================================================
--- doc/theses/thierry_delisle/figures/int_monitor.fig	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/figures/int_monitor.fig	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,109 @@
+#FIG 3.2  Produced by xfig version 3.2.5c
+Landscape
+Center
+Inches
+Letter  
+100.00
+Single
+-2
+1200 2
+5 1 0 1 0 7 50 -1 -1 0.000 0 1 0 0 675.000 2700.000 675 2400 375 2700 675 3000
+6 4533 2866 4655 3129
+5 1 0 1 0 7 50 -1 -1 0.000 0 1 0 0 4657.017 2997.000 4655 2873 4533 2997 4655 3121
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 4655 2866 4655 3129
+-6
+6 4725 2866 4847 3129
+5 1 0 1 0 7 50 -1 -1 0.000 0 1 0 0 4849.017 2997.000 4847 2873 4725 2997 4847 3121
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 4847 2866 4847 3129
+-6
+6 4911 2866 5033 3129
+5 1 0 1 0 7 50 -1 -1 0.000 0 1 0 0 5035.017 2997.000 5033 2873 4911 2997 5033 3121
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 5033 2866 5033 3129
+-6
+6 9027 2866 9149 3129
+5 1 0 1 0 7 50 -1 -1 0.000 0 0 0 0 9024.983 2997.000 9027 2873 9149 2997 9027 3121
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 9027 2866 9027 3129
+-6
+6 9253 2866 9375 3129
+5 1 0 1 0 7 50 -1 -1 0.000 0 0 0 0 9250.983 2997.000 9253 2873 9375 2997 9253 3121
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 9253 2866 9253 3129
+-6
+6 9478 2866 9600 3129
+5 1 0 1 0 7 50 -1 -1 0.000 0 0 0 0 9475.983 2997.000 9478 2873 9600 2997 9478 3121
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 9478 2866 9478 3129
+-6
+1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 7650 3675 80 80 7650 3675 7730 3755
+1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3150 3675 80 80 3150 3675 3230 3755
+1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 4047 1793 125 125 4047 1793 3929 1752
+1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 4050 1500 125 125 4050 1500 3932 1459
+1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 8550 1500 125 125 8550 1500 8432 1459
+1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 8550 1800 125 125 8550 1800 8432 1759
+1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 1200 2850 125 125 1200 2850 1082 2809
+1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 900 2850 125 125 900 2850 782 2809
+1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 6000 4650 105 105 6000 4650 6105 4755
+1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3900 4650 80 80 3900 4650 3980 4730
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 3900 1950 4200 2100
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 5
+	 3000 4050 1800 4050 1800 1950 3900 1950 3900 1350
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 9
+	 7800 4050 9000 4050 9000 3150 9600 3150 9600 2850 9000 2850
+	 9000 1950 8700 1950 8700 1350
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 8400 1950 8700 2100
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 9
+	 3300 4050 4500 4050 4500 3150 5100 3150 5100 2850 4500 2850
+	 4500 1950 4200 1950 4200 1350
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 5
+	 7500 4050 6300 4050 6300 1950 8400 1950 8400 1350
+2 2 1 1 -1 -1 0 0 -1 4.000 0 0 0 0 0 5
+	 8400 3300 8400 2400 6900 2400 6900 3300 8400 3300
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 9000 2850 8850 3150
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 7500 4050 7800 4200
+2 2 1 1 -1 -1 0 0 -1 4.000 0 0 0 0 0 5
+	 3900 3300 3900 2400 2400 2400 2400 3300 3900 3300
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 4500 2850 4350 3150
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 3000 4050 3300 4200
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 675 3000 1425 3000
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 675 2400 1425 2400
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 1425 2700 1500 2925
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 1425 2400 1350 2625
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 675 2700 1425 2700
+4 1 -1 0 0 0 12 0.0000 2 135 315 2850 4275 exit\001
+4 1 -1 0 0 0 12 0.0000 2 135 315 7350 4275 exit\001
+4 0 -1 0 0 0 12 0.0000 2 180 750 9150 2325 acceptor/\001
+4 0 -1 0 0 0 12 0.0000 2 180 750 9150 2550 signalled\001
+4 0 -1 0 0 0 12 0.0000 2 135 420 9150 2775 stack\001
+4 1 -1 0 0 0 12 0.0000 2 135 525 7650 2775 shared\001
+4 1 -1 0 0 0 12 0.0000 2 135 735 7650 3075 variables\001
+4 1 -1 0 0 0 12 0.0000 2 135 495 8550 1275 queue\001
+4 1 -1 0 0 0 12 0.0000 2 165 420 8550 1125 entry\001
+4 0 -1 0 0 0 12 0.0000 2 135 630 8850 1575 order of\001
+4 0 -1 0 0 0 12 0.0000 2 135 525 8850 1725 arrival\001
+4 0 -1 0 0 0 12 0.0000 2 180 750 4650 2325 acceptor/\001
+4 0 -1 0 0 0 12 0.0000 2 180 750 4650 2550 signalled\001
+4 0 -1 0 0 0 12 0.0000 2 135 420 4650 2775 stack\001
+4 1 -1 0 0 0 12 0.0000 2 135 525 3150 2775 shared\001
+4 1 -1 0 0 0 12 0.0000 2 135 735 3150 3075 variables\001
+4 0 -1 0 0 0 12 0.0000 2 135 525 4350 1725 arrival\001
+4 0 -1 0 0 0 12 0.0000 2 135 630 4350 1500 order of\001
+4 1 -1 0 0 0 12 0.0000 2 135 495 4050 1275 queue\001
+4 1 -1 0 0 0 12 0.0000 2 165 420 4050 1050 entry\001
+4 0 0 50 -1 0 11 0.0000 2 120 705 600 2325 Condition\001
+4 0 -1 0 0 0 12 0.0000 2 135 1215 6150 4725 blocked thread\001
+4 0 -1 0 0 0 12 0.0000 2 135 1050 4050 4725 active thread\001
Index: doc/theses/thierry_delisle/figures/monitor.fig
===================================================================
--- doc/theses/thierry_delisle/figures/monitor.fig	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/figures/monitor.fig	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,101 @@
+#FIG 3.2  Produced by xfig version 3.2.5c
+Landscape
+Center
+Inches
+Letter  
+100.00
+Single
+-2
+1200 2
+5 1 0 1 -1 -1 0 0 -1 0.000 0 1 0 0 1500.000 2700.000 1500 2400 1200 2700 1500 3000
+5 1 0 1 -1 -1 0 0 -1 0.000 0 1 0 0 1500.000 3600.000 1500 3300 1200 3600 1500 3900
+6 4200 1200 4500 1500
+1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4350 1350 105 105 4350 1350 4455 1455
+4 1 -1 0 0 0 10 0.0000 2 105 90 4350 1410 d\001
+-6
+6 4200 900 4500 1200
+1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4350 1050 105 105 4350 1050 4455 1155
+4 1 -1 0 0 0 10 0.0000 2 105 90 4350 1110 b\001
+-6
+6 2400 1500 2700 1800
+1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 2550 1650 105 105 2550 1650 2655 1650
+4 1 -1 0 0 0 10 0.0000 2 105 90 2550 1710 b\001
+-6
+6 2400 1800 2700 2100
+1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 2550 1950 105 105 2550 1950 2655 1950
+4 1 -1 0 0 0 10 0.0000 2 75 75 2550 1995 a\001
+-6
+6 3300 1500 3600 1800
+1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 3450 1650 105 105 3450 1650 3555 1650
+4 1 -1 0 0 0 10 0.0000 2 105 90 3450 1710 d\001
+-6
+6 1350 4650 5325 4950
+1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 1500 4800 80 80 1500 4800 1580 4880
+1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 2850 4800 105 105 2850 4800 2955 4905
+1 3 0 1 -1 -1 0 0 4 0.000 1 0.0000 4350 4800 105 105 4350 4800 4455 4905
+4 0 -1 0 0 0 12 0.0000 2 180 765 4575 4875 duplicate\001
+4 0 -1 0 0 0 12 0.0000 2 135 1035 3075 4875 blocked task\001
+4 0 -1 0 0 0 12 0.0000 2 135 870 1650 4875 active task\001
+-6
+1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 1650 2850 105 105 1650 2850 1755 2955
+1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 1950 2850 105 105 1950 2850 2055 2955
+1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4950 3150 105 105 4950 3150 5055 3255
+1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 5250 3150 105 105 5250 3150 5355 3255
+1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4350 1950 105 105 4350 1950 4455 2055
+1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4350 1650 105 105 4350 1650 4455 1755
+1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3450 3825 80 80 3450 3825 3530 3905
+1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 3450 1950 105 105 3450 1950 3555 1950
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
+	 2400 2100 2625 2250
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
+	 3300 2100 3525 2250
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
+	 4200 2100 4425 2250
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 5
+	 1500 2400 2100 2400 2100 2100 2400 2100 2400 1500
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4
+	 1500 3000 2100 3000 2100 3300 1500 3300
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 3
+	 1500 2700 2100 2700 2250 2925
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
+	 2100 2400 1950 2625
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 3
+	 1500 3600 2100 3600 2250 3825
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
+	 2100 3300 1950 3525
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4
+	 1500 3900 2100 3900 2100 4200 3300 4200
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
+	 4800 3000 4650 3225
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
+	 3300 4200 3525 4350
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4
+	 3600 1500 3600 2100 4200 2100 4200 900
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4
+	 2700 1500 2700 2100 3300 2100 3300 1500
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 9
+	 3600 4200 4800 4200 4800 3300 5400 3300 5400 3000 4800 3000
+	 4800 2100 4500 2100 4500 900
+2 2 1 1 -1 -1 0 0 -1 4.000 0 0 0 0 0 5
+	 4200 3450 4200 2550 2700 2550 2700 3450 4200 3450
+4 1 -1 0 0 0 10 0.0000 2 75 75 4350 1995 a\001
+4 1 -1 0 0 0 10 0.0000 2 75 75 4350 1695 c\001
+4 1 -1 0 0 0 12 0.0000 2 135 315 3450 4575 exit\001
+4 1 -1 0 0 0 12 0.0000 2 135 135 1650 2325 A\001
+4 1 -1 0 0 0 12 0.0000 2 135 795 1650 4125 condition\001
+4 1 -1 0 0 0 12 0.0000 2 135 135 1650 4350 B\001
+4 0 -1 0 0 0 12 0.0000 2 135 420 4950 2925 stack\001
+4 0 -1 0 0 0 12 0.0000 2 180 750 4950 2475 acceptor/\001
+4 0 -1 0 0 0 12 0.0000 2 180 750 4950 2700 signalled\001
+4 1 -1 0 0 0 12 0.0000 2 135 795 1650 2100 condition\001
+4 1 -1 0 0 0 12 0.0000 2 135 135 2550 1425 X\001
+4 1 -1 0 0 0 12 0.0000 2 135 135 3450 1425 Y\001
+4 1 -1 0 0 0 12 0.0000 2 165 420 4350 600 entry\001
+4 1 -1 0 0 0 12 0.0000 2 135 495 4350 825 queue\001
+4 0 -1 0 0 0 12 0.0000 2 135 525 4650 1650 arrival\001
+4 0 -1 0 0 0 12 0.0000 2 135 630 4650 1425 order of\001
+4 1 -1 0 0 0 12 0.0000 2 135 525 3450 2925 shared\001
+4 1 -1 0 0 0 12 0.0000 2 135 735 3450 3225 variables\001
+4 1 -1 0 0 0 12 0.0000 2 120 510 3000 975 mutex\001
+4 1 -1 0 0 0 10 0.0000 2 75 75 3450 1995 c\001
+4 1 -1 0 0 0 12 0.0000 2 135 570 3000 1200 queues\001
Index: doc/theses/thierry_delisle/figures/monitor_structs.fig
===================================================================
--- doc/theses/thierry_delisle/figures/monitor_structs.fig	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/figures/monitor_structs.fig	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,71 @@
+#FIG 3.2  Produced by xfig version 3.2.5c
+Landscape
+Center
+Inches
+Letter  
+100.00
+Single
+-2
+1200 2
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+	 1500 1200 2100 1200 2100 1500 1500 1500 1500 1200
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+	 1500 1500 2100 1500 2100 1800 1500 1800 1500 1500
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+	 3000 1200 3300 1200 3300 1500 3000 1500 3000 1200
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+	 3000 1500 3300 1500 3300 1800 3000 1800 3000 1500
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+	 3000 1800 3300 1800 3300 2100 3000 2100 3000 1800
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+	 3000 2100 3300 2100 3300 2400 3000 2400 3000 2100
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+	 1500 900 2100 900 2100 1200 1500 1200 1500 900
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2
+	1 1 1.00 90.00 120.00
+	5 1 1.00 45.00 90.00
+	 1800 1050 4050 1050
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+	 5100 900 5700 900 5700 1800 5100 1800 5100 900
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+	 6900 1500 7500 1500 7500 2400 6900 2400 6900 1500
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+	 6000 1200 6600 1200 6600 2100 6000 2100 6000 1200
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+	 7800 1800 8400 1800 8400 2700 7800 2700 7800 1800
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2
+	1 1 1.00 90.00 120.00
+	5 1 1.00 45.00 90.00
+	 1800 1350 3000 1350
+3 2 0 3 0 7 50 -1 -1 0.000 1 0 0 10
+	 4275 900 4050 975 4350 1050 4050 1125 4350 1200 4050 1275
+	 4350 1350 4050 1425 4350 1500 4125 1575
+	 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+	 -1.000 0.000
+3 2 0 1 0 7 50 -1 -1 0.000 0 1 1 3
+	1 1 1.00 90.00 120.00
+	5 1 1.00 30.00 90.00
+	 3150 1950 4875 2400 6900 1650
+	 0.000 -1.000 0.000
+3 2 0 1 0 7 50 -1 -1 0.000 0 1 1 3
+	1 1 1.00 90.00 120.00
+	5 1 1.00 60.00 90.00
+	 3150 1350 4200 1650 5100 1050
+	 0.000 -1.000 0.000
+3 2 0 1 0 7 50 -1 -1 0.000 0 1 1 3
+	1 1 1.00 90.00 120.00
+	5 1 1.00 60.00 90.00
+	 3150 1650 4575 2025 6000 1350
+	 0.000 -1.000 0.000
+3 2 0 1 0 7 50 -1 -1 0.000 0 1 1 3
+	1 1 1.00 90.00 120.00
+	5 1 1.00 60.00 90.00
+	 3150 2250 5175 2775 7800 1950
+	 0.000 -1.000 0.000
+4 0 0 50 -1 0 11 0.0000 2 120 705 3000 675 Condition\001
+4 0 0 50 -1 0 11 0.0000 2 120 630 3000 885 Criterion\001
+4 0 0 50 -1 0 11 0.0000 2 120 705 1425 675 Condition\001
+4 0 0 50 -1 0 11 0.0000 2 120 390 1425 825 Node\001
+4 0 0 50 -1 0 11 0.0000 2 120 660 6225 675 Monitors\001
+4 0 0 50 -1 0 11 0.0000 2 165 555 3900 675 Waiting\001
+4 0 0 50 -1 0 11 0.0000 2 120 495 3900 825 Thread\001
Index: doc/theses/thierry_delisle/figures/system.fig
===================================================================
--- doc/theses/thierry_delisle/figures/system.fig	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/figures/system.fig	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,166 @@
+#FIG 3.2  Produced by xfig version 3.2.5c
+Landscape
+Center
+Inches
+Letter  
+100.00
+Single
+-2
+1200 2
+6 5175 2700 6150 3737
+3 2 0 4 0 7 49 -1 -1 0.000 1 0 0 10
+	 5475 2702 5625 2777 5325 2852 5625 2927 5325 3002 5625 3077
+	 5325 3152 5625 3227 5325 3302 5475 3377
+	 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+	 -1.000 0.000
+4 0 0 50 -1 0 11 0.0000 2 120 885 5175 3737 Processor N\001
+4 0 0 50 -1 0 11 0.0000 2 120 975 5175 3527 PThread N+2\001
+-6
+6 3300 2700 4140 3737
+3 2 0 4 0 7 49 -1 -1 0.000 1 0 0 10
+	 3600 2702 3750 2777 3450 2852 3750 2927 3450 3002 3750 3077
+	 3450 3152 3750 3227 3450 3302 3600 3377
+	 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+	 -1.000 0.000
+4 0 0 50 -1 0 11 0.0000 2 120 840 3300 3737 Processor 0\001
+4 0 0 50 -1 0 11 0.0000 2 120 735 3300 3527 PThread 2\001
+-6
+6 600 2700 1725 3737
+3 2 0 4 0 7 49 -1 -1 0.000 1 0 0 10
+	 900 2702 1050 2777 750 2852 1050 2927 750 3002 1050 3077
+	 750 3152 1050 3227 750 3302 900 3377
+	 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+	 -1.000 0.000
+4 0 0 50 -1 0 11 0.0000 2 120 1125 600 3737 Main Processor\001
+4 0 0 50 -1 0 11 0.0000 2 120 735 600 3527 PThread 0\001
+-6
+6 2100 2700 2835 3737
+3 2 0 4 0 7 49 -1 -1 0.000 1 0 0 10
+	 2400 2702 2550 2777 2250 2852 2550 2927 2250 3002 2550 3077
+	 2250 3152 2550 3227 2250 3302 2400 3377
+	 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+	 -1.000 0.000
+4 0 0 50 -1 0 11 0.0000 2 120 450 2100 3737 Alarm\001
+4 0 0 50 -1 0 11 0.0000 2 120 735 2100 3527 PThread 1\001
+-6
+6 600 6301 1290 7367
+3 2 0 2 0 7 49 -1 -1 0.000 1 0 0 10
+	 900 6302 1050 6377 750 6452 1050 6527 750 6602 1050 6677
+	 750 6752 1050 6827 750 6902 900 6977
+	 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+	 -1.000 0.000
+4 0 0 50 -1 0 11 0.0000 2 150 690 600 7337 int main()\001
+4 0 0 50 -1 0 11 0.0000 2 120 570 600 7127 thread 0\001
+-6
+6 1635 6300 2205 7336
+3 2 0 2 0 7 49 -1 -1 0.000 1 0 0 10
+	 1935 6301 2085 6376 1785 6451 2085 6526 1785 6601 2085 6676
+	 1785 6751 2085 6826 1785 6901 1935 6976
+	 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+	 -1.000 0.000
+4 0 0 50 -1 0 11 0.0000 2 120 570 1635 7126 thread 1\001
+-6
+6 2475 6300 3045 7336
+3 2 0 2 0 7 49 -1 -1 0.000 1 0 0 10
+	 2775 6301 2925 6376 2625 6451 2925 6526 2625 6601 2925 6676
+	 2625 6751 2925 6826 2625 6901 2775 6976
+	 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+	 -1.000 0.000
+4 0 0 50 -1 0 11 0.0000 2 120 570 2475 7126 thread 2\001
+-6
+6 3300 6300 3870 7336
+3 2 0 2 0 7 49 -1 -1 0.000 1 0 0 10
+	 3600 6301 3750 6376 3450 6451 3750 6526 3450 6601 3750 6676
+	 3450 6751 3750 6826 3450 6901 3600 6976
+	 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+	 -1.000 0.000
+4 0 0 50 -1 0 11 0.0000 2 120 570 3300 7126 thread 3\001
+-6
+6 5325 6300 5970 7336
+3 2 0 2 0 7 49 -1 -1 0.000 1 0 0 10
+	 5625 6301 5775 6376 5475 6451 5775 6526 5475 6601 5775 6676
+	 5475 6751 5775 6826 5475 6901 5625 6976
+	 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+	 -1.000 0.000
+4 0 0 50 -1 0 11 0.0000 2 120 645 5325 7126 thread M\001
+-6
+6 4125 6300 4695 7336
+3 2 0 2 0 7 49 -1 -1 0.000 1 0 0 10
+	 4425 6301 4575 6376 4275 6451 4575 6526 4275 6601 4575 6676
+	 4275 6751 4575 6826 4275 6901 4425 6976
+	 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+	 -1.000 0.000
+4 0 0 50 -1 0 11 0.0000 2 120 570 4125 7126 thread 4\001
+-6
+6 6975 4050 9525 7875
+2 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5
+	 7125 5400 7575 5400 7575 5850 7125 5850 7125 5400
+2 2 0 1 0 7 50 -1 18 0.000 0 1 -1 0 0 5
+	 7125 4200 7575 4200 7575 4650 7125 4650 7125 4200
+2 2 0 1 0 7 50 -1 45 0.000 0 1 -1 0 0 5
+	 7125 4800 7575 4800 7575 5250 7125 5250 7125 4800
+2 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5
+	 6975 4050 9525 4050 9525 7875 6975 7875 6975 4050
+3 2 0 2 0 7 49 -1 -1 0.000 1 0 0 10
+	 7350 6900 7500 6975 7200 7050 7500 7125 7200 7200 7500 7275
+	 7200 7350 7500 7425 7200 7500 7350 7575
+	 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+	 -1.000 0.000
+3 2 0 4 0 7 49 -1 -1 0.000 1 0 0 10
+	 7350 6000 7500 6075 7200 6150 7500 6225 7200 6300 7500 6375
+	 7200 6450 7500 6525 7200 6600 7350 6675
+	 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+	 -1.000 0.000
+4 0 0 50 -1 0 11 0.0000 2 120 945 7725 4500 Pthread stack\001
+4 0 0 50 -1 0 11 0.0000 2 150 1530 7725 5100 Pthread stack (stolen)\001
+4 0 0 50 -1 0 11 0.0000 2 120 540 7725 6375 Pthread\001
+4 0 0 50 -1 0 11 0.0000 2 150 1065 7725 7275 $\\CFA$ thread\001
+4 0 0 50 -1 0 11 0.0000 2 150 990 7725 5700 $\\CFA$ stack\001
+-6
+1 2 0 1 0 7 50 -1 -1 0.000 1 3.1416 3150 5250 750 450 2400 4800 3900 5700
+2 1 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 2
+	 1200 3900 2475 5025
+2 1 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 2
+	 3600 3900 3450 4800
+2 1 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 2
+	 5550 3900 3825 5025
+2 1 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 2
+	 900 6225 2400 5400
+2 1 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 2
+	 2100 6225 2625 5550
+2 1 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 2
+	 2850 6225 3000 5700
+2 1 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 2
+	 3600 6225 3375 5700
+2 1 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 2
+	 4350 6300 3675 5625
+2 1 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 2
+	 5625 6225 3900 5400
+2 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5
+	 525 975 1275 975 1275 2625 525 2625 525 975
+2 2 0 1 0 7 50 -1 45 0.000 0 1 -1 0 0 5
+	 3225 975 3975 975 3975 2625 3225 2625 3225 975
+2 2 0 1 0 7 50 -1 45 0.000 0 1 -1 0 0 5
+	 5100 975 5850 975 5850 2625 5100 2625 5100 975
+2 2 0 1 0 7 50 -1 45 0.000 0 1 -1 0 0 5
+	 525 7425 1275 7425 1275 9075 525 9075 525 7425
+2 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5
+	 1575 7425 2325 7425 2325 9075 1575 9075 1575 7425
+2 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5
+	 2400 7425 3150 7425 3150 9075 2400 9075 2400 7425
+2 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5
+	 3225 7425 3975 7425 3975 9075 3225 9075 3225 7425
+2 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5
+	 4050 7425 4800 7425 4800 9075 4050 9075 4050 7425
+2 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5
+	 5250 7425 6000 7425 6000 9075 5250 9075 5250 7425
+2 1 1 8 0 7 50 -1 -1 4.000 0 0 -1 1 0 2
+	1 1 2.00 180.00 75.00
+	 2400 3900 2775 4800
+2 2 0 1 0 7 50 -1 18 0.000 0 1 -1 0 0 5
+	 2025 2625 2775 2625 2775 975 2025 975 2025 2625
+4 0 0 50 -1 0 18 0.0000 2 30 225 4500 3150 ...\001
+4 0 0 50 -1 0 18 0.0000 2 30 225 3750 4500 ...\001
+4 0 0 50 -1 0 11 0.0000 2 120 705 2775 5325 Scheduler\001
+4 0 0 50 -1 0 18 0.0000 2 30 225 4950 6600 ...\001
+4 0 0 50 -1 0 18 0.0000 2 30 225 4200 5850 ...\001
Index: doc/theses/thierry_delisle/notes/cor-thread-traits.c
===================================================================
--- doc/theses/thierry_delisle/notes/cor-thread-traits.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/notes/cor-thread-traits.c	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,90 @@
+//-----------------------------------------------------------------------------
+// Coroutine trait
+// Anything that implements this trait can be resumed.
+// Anything that is resumed is a coroutine.
+trait is_coroutine(dtype T) {
+      void main(T* this);
+      coroutine_handle* get_handle(T* this);
+}
+
+//-----------------------------------------------------------------------------
+forall(dtype T | {coroutine_handle* T.c})
+coroutine_handle* get_handle(T* this) {
+	return this->c
+}
+
+//-----------------------------------------------------------------------------
+struct myCoroutine {
+	int bla;
+	coroutine_handle c;
+};
+
+void main(myCoroutine* this) {
+	sout | this->bla | endl;
+}
+
+void foo() {
+	//Run the coroutine
+	myCoroutine myc;
+	resume(myc);
+}
+
+//-----------------------------------------------------------------------------
+// Thread trait
+// Alternative 1
+trait is_thread(dtype T) { 
+      void main(T* this);
+      thread_handle* get_handle(T* this);
+	thread T;
+};
+
+//-----------------------------------------------------------------------------
+forall(dtype T | {thread_handle* T.t})
+thread_handle* get_handle(T* this) {
+	return this->t
+}
+
+//-----------------------------------------------------------------------------
+thread myThread {
+	int bla;
+	thread_handle c;
+};
+
+void main(myThread* this) {
+	sout | this->bla | endl;
+}
+
+void foo() {
+	//Run the thread
+	myThread myc;
+}
+
+//-----------------------------------------------------------------------------
+// Thread trait
+// Alternative 2
+trait is_thread(dtype T) {
+      void main(T* this);
+      thread_handle* get_handle(T* this);
+	
+};
+
+//-----------------------------------------------------------------------------
+forall(dtype T | {thread_handle* T.t})
+thread_handle* get_handle(T* this) {
+	return this->t
+}
+
+//-----------------------------------------------------------------------------
+struct myThread {
+	int bla;
+	thread_handle c;
+};
+
+void main(myThread* this) {
+	sout | this->bla | endl;
+}
+
+void foo() {
+	//Run the thread
+	thread(myThread) myc;
+}
Index: doc/theses/thierry_delisle/notes/lit-review.md
===================================================================
--- doc/theses/thierry_delisle/notes/lit-review.md	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/notes/lit-review.md	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,25 @@
+lit review :
+
+Lister77 : nested monitor calls
+	- explains the problem
+	- no solution
+	- Lister : An implementation of monitors.
+	- Lister : Hierarchical monitors.
+
+Haddon77 : Nested monitor calls
+	- monitors should be release before acquiring a new one.
+
+Horst Wettstein : The problem of nested monitor calls revisited
+	- Solves nested monitor by allowing barging
+
+David L. Parnas : The non problem of nesied monitor calls
+	- not an actual problem in real life
+
+M. Joseph and VoR. Prasad : More on nested monitor call
+	- WTF... don't use monitors, use pure classes instead, whatever that is
+
+Joseph et al, 1978). 
+
+Toby bloom : Evaluating Synchronization Mechanisms
+	- Methods to evaluate concurrency
+
Index: doc/theses/thierry_delisle/notes/notes.md
===================================================================
--- doc/theses/thierry_delisle/notes/notes.md	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/notes/notes.md	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,14 @@
+Internal scheduling notes.
+
+Internal scheduling requires a stack or queue to make sense.
+We also need a stack of "monitor contexts" to be able to restuore stuff.
+
+Multi scheduling try 1 
+ - adding threads to many monitors and synching the monitors
+ - Too hard
+
+Multi scheduling try 2
+ - using a leader when in a group
+ - it's hard but doable to manage who is the leader and keep the current context
+ - basically __monitor_guard_t always saves an restore the leader and current context
+ 
Index: doc/theses/thierry_delisle/style/cfa-format.tex
===================================================================
--- doc/theses/thierry_delisle/style/cfa-format.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/style/cfa-format.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,279 @@
+\usepackage[usenames,dvipsnames]{xcolor}
+\usepackage{listings}
+\usepackage{inconsolata}
+
+\definecolor{basicCol}{HTML}{000000}
+\definecolor{commentCol}{HTML}{000000}
+\definecolor{stringCol}{HTML}{000000}
+\definecolor{keywordCol}{HTML}{000000}
+\definecolor{identifierCol}{HTML}{000000}
+
+% from https://gist.github.com/nikolajquorning/92bbbeef32e1dd80105c9bf2daceb89a
+\lstdefinelanguage{sml} {
+  morekeywords= {
+    EQUAL, GREATER, LESS, NONE, SOME, abstraction, abstype, and, andalso, array, as, before, bool, case, char, datatype, do, else, end, eqtype, exception, exn, false, fn, fun, functor, handle, if, in, include, infix, infixr, int, let, list, local, nil, nonfix, not, o, of, op, open, option, orelse, overload, print, raise, real, rec, ref, sharing, sig, signature, string, struct, structure, substring, then, true, type, unit, val, vector, where, while, with, withtype, word
+  },
+  morestring=[b]",
+  morecomment=[s]{(*}{*)},
+}
+
+\lstdefinelanguage{D}{
+  % Keywords
+  morekeywords=[1]{
+    abstract, alias, align, auto, body, break, cast, catch, class, const,
+    continue, debug, delegate, delete, deprecated, do, else, enum, export,
+    false, final, finally, for, foreach, foreach_reverse, function, goto, if,
+    immutable, import, in, inout, interface, invariant, is, lazy, macro, mixin,
+    module, new, nothrow, null, out, override, package, pragma, private,
+    protected, public, pure, ref, return, shared, static, struct, super,
+    switch, synchronized, template, this, throw, true, try, typedef, typeid,
+    typeof, union, unittest, volatile, while, with
+  },
+  % Special identifiers, common functions
+  morekeywords=[2]{enforce},
+  % Ugly identifiers
+  morekeywords=[3]{
+    __DATE__, __EOF__, __FILE__, __LINE__, __TIMESTAMP__, __TIME__, __VENDOR__,
+    __VERSION__, __ctfe, __gshared, __monitor, __thread, __vptr, _argptr,
+    _arguments, _ctor, _dtor
+  },
+  % Basic types
+  morekeywords=[4]{
+     byte, ubyte, short, ushort, int, uint, long, ulong, cent, ucent, void,
+     bool, bit, float, double, real, ushort, int, uint, long, ulong, float,
+     char, wchar, dchar, string, wstring, dstring, ireal, ifloat, idouble,
+     creal, cfloat, cdouble, size_t, ptrdiff_t, sizediff_t, equals_t, hash_t
+  },
+  % Strings
+  morestring=[b]{"},
+  morestring=[b]{'},
+  morestring=[b]{`},
+  % Comments
+  comment=[l]{//},
+  morecomment=[s]{/*}{*/},
+  morecomment=[s][\color{blue}]{/**}{*/},
+  morecomment=[n]{/+}{+/},
+  morecomment=[n][\color{blue}]{/++}{+/},
+  % Options
+  sensitive=true
+}
+
+\lstdefinelanguage{rust}{
+  % Keywords
+  morekeywords=[1]{
+    abstract, alignof, as, become, box,
+    break, const, continue, crate, do,
+    else, enum, extern, false, final,
+    fn, for, if, impl, in,
+    let, loop, macro, match, mod,
+    move, mut, offsetof, override, priv,
+    proc, pub, pure, ref, return,
+    Self, self, sizeof, static, struct,
+    super, trait, true,  type, typeof,
+    unsafe, unsized, use, virtual, where,
+    while, yield
+  },
+  % Strings
+  morestring=[b]{"},
+  % Comments
+  comment=[l]{//},
+  morecomment=[s]{/*}{*/},
+  % Options
+  sensitive=true
+}
+
+\lstdefinelanguage{pseudo}{
+	morekeywords={string,uint,int,bool,float},%
+	sensitive=true,%
+	morecomment=[l]{//},%
+	morecomment=[s]{/*}{*/},%
+	morestring=[b]',%
+	morestring=[b]",%
+	morestring=[s]{`}{`},%
+}%
+
+\newcommand{\KWC}{K-W C\xspace}
+
+\lstdefinestyle{pseudoStyle}{
+  escapeinside={@@},
+  basicstyle=\linespread{0.9}\sf\footnotesize,		% reduce line spacing and use typewriter font
+  keywordstyle=\bfseries\color{blue},
+  keywordstyle=[2]\bfseries\color{Plum},
+  commentstyle=\itshape\color{OliveGreen},		    % green and italic comments
+  identifierstyle=\color{identifierCol},
+  stringstyle=\sf\color{Mahogany},			          % use sanserif font
+  mathescape=true,
+  columns=fixed,
+  aboveskip=4pt,                                  % spacing above/below code block
+  belowskip=3pt,
+  keepspaces=true,
+  tabsize=4,
+  % frame=lines,
+  literate=,
+  showlines=true,                                 % show blank lines at end of code
+  showspaces=false,
+  showstringspaces=false,
+  escapechar=\$,
+  xleftmargin=\parindentlnth,                     % indent code to paragraph indentation
+  moredelim=[is][\color{red}\bfseries]{**R**}{**R**},    % red highlighting
+  % moredelim=* detects keywords, comments, strings, and other delimiters and applies their formatting
+  % moredelim=** allows cumulative application
+}
+
+\lstdefinestyle{defaultStyle}{
+  escapeinside={@@},
+  basicstyle=\linespread{0.9}\tt\footnotesize,		% reduce line spacing and use typewriter font
+  keywordstyle=\bfseries\color{blue},
+  keywordstyle=[2]\bfseries\color{Plum},
+  commentstyle=\itshape\color{OliveGreen},		    % green and italic comments
+  identifierstyle=\color{identifierCol},
+  stringstyle=\sf\color{Mahogany},			          % use sanserif font
+  mathescape=true,
+  columns=fixed,
+  aboveskip=4pt,                                  % spacing above/below code block
+  belowskip=3pt,
+  keepspaces=true,
+  tabsize=4,
+  % frame=lines,
+  literate=,
+  showlines=true,                                 % show blank lines at end of code
+  showspaces=false,
+  showstringspaces=false,
+  escapechar=\$,
+  xleftmargin=\parindentlnth,                     % indent code to paragraph indentation
+  moredelim=[is][\color{red}\bfseries]{**R**}{**R**},    % red highlighting
+  % moredelim=* detects keywords, comments, strings, and other delimiters and applies their formatting
+  % moredelim=** allows cumulative application
+}
+
+\lstdefinestyle{cfaStyle}{
+  escapeinside={@@},
+  basicstyle=\linespread{0.9}\tt\footnotesize,		% reduce line spacing and use typewriter font
+  keywordstyle=\bfseries\color{blue},
+  keywordstyle=[2]\bfseries\color{Plum},
+  commentstyle=\sf\itshape\color{OliveGreen},		  % green and italic comments
+  identifierstyle=\color{identifierCol},
+  stringstyle=\sf\color{Mahogany},			          % use sanserif font
+  mathescape=true,
+  columns=fixed,
+  aboveskip=4pt,                                  % spacing above/below code block
+  belowskip=3pt,
+  keepspaces=true,
+  tabsize=4,
+  % frame=lines,
+  literate=,
+  showlines=true,                                 % show blank lines at end of code
+  showspaces=false,
+  showstringspaces=false,
+  escapechar=\$,
+  xleftmargin=\parindentlnth,                     % indent code to paragraph indentation
+  moredelim=[is][\color{red}\bfseries]{**R**}{**R**},    % red highlighting
+  morekeywords=[2]{accept, signal, signal_block, wait, waitfor},
+}
+
+\lstMakeShortInline[basewidth=0.5em,breaklines=true,basicstyle=\normalsize\ttfamily\color{basicCol}]@  % single-character for \lstinline
+
+\lstnewenvironment{ccode}[1][]{
+  \lstset{
+    language = C,
+    style=defaultStyle,
+    captionpos=b,
+    #1
+  }
+}{}
+
+\lstnewenvironment{cfacode}[1][]{
+  \lstset{
+    language = CFA,
+    style=cfaStyle,
+    captionpos=b,
+    #1
+  }
+}{}
+
+\lstnewenvironment{pseudo}[1][]{
+  \lstset{
+    language = pseudo,
+    style=pseudoStyle,
+    captionpos=b,
+    #1
+  }
+}{}
+
+\lstnewenvironment{cppcode}[1][]{
+  \lstset{
+    language = c++,
+    style=defaultStyle,
+    captionpos=b,
+    #1
+  }
+}{}
+
+\lstnewenvironment{ucppcode}[1][]{
+  \lstset{
+    language = c++,
+    style=defaultStyle,
+    captionpos=b,
+    #1
+  }
+}{}
+
+\lstnewenvironment{javacode}[1][]{
+  \lstset{
+    language = java,
+    style=defaultStyle,
+    captionpos=b,
+    #1
+  }
+}{}
+
+\lstnewenvironment{scalacode}[1][]{
+  \lstset{
+    language = scala,
+    style=defaultStyle,
+    captionpos=b,
+    #1
+  }
+}{}
+
+\lstnewenvironment{smlcode}[1][]{
+  \lstset{
+    language = sml,
+    style=defaultStyle,
+    captionpos=b,
+    #1
+  }
+}{}
+
+\lstnewenvironment{dcode}[1][]{
+  \lstset{
+    language = D,
+    style=defaultStyle,
+    captionpos=b,
+    #1
+  }
+}{}
+
+\lstnewenvironment{rustcode}[1][]{
+  \lstset{
+    language = rust,
+    style=defaultStyle,
+    captionpos=b,
+    #1
+  }
+}{}
+
+\lstnewenvironment{gocode}[1][]{
+  \lstset{
+    language = Golang,
+    style=defaultStyle,
+    captionpos=b,
+    #1
+  }
+}{}
+
+\newcommand{\zero}{\lstinline{zero_t}\xspace}
+\newcommand{\one}{\lstinline{one_t}\xspace}
+\newcommand{\ateq}{\lstinline{\@=}\xspace}
+\newcommand{\code}[1]{\lstinline[language=CFA,style=cfaStyle]{#1}}
+\newcommand{\pscode}[1]{\lstinline[language=pseudo,style=pseudoStyle]{#1}}
Index: doc/theses/thierry_delisle/style/style.tex
===================================================================
--- doc/theses/thierry_delisle/style/style.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/style/style.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,12 @@
+\input{common}                                          % bespoke macros used in the document
+\input{cfa-format}
+
+% \CFADefaultStyle
+
+% \lstset{
+% morekeywords=[2]{nomutex,mutex,thread,wait,wait_release,signal,signal_block,accept,monitor,suspend,resume,coroutine},
+% keywordstyle=[2]\color{blue},				% second set of keywords for concurency
+% basicstyle=\linespread{0.9}\tt\small,		% reduce line spacing and use typewriter font
+% stringstyle=\sf\color{Mahogany},			% use sanserif font
+% commentstyle=\itshape\color{OliveGreen},		% green and italic comments
+% }%
Index: doc/theses/thierry_delisle/text/basics.tex
===================================================================
--- doc/theses/thierry_delisle/text/basics.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/text/basics.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,497 @@
+% ======================================================================
+% ======================================================================
+\chapter{Concurrency Basics}\label{basics}
+% ======================================================================
+% ======================================================================
+Before any detailed discussion of the concurrency and parallelism in \CFA, it is important to describe the basics of concurrency and how they are expressed in \CFA user code.
+
+\section{Basics of concurrency}
+At its core, concurrency is based on having multiple call-stacks and scheduling among threads of execution executing on these stacks. Concurrency without parallelism only requires having multiple call stacks (or contexts) for a single thread of execution.
+
+Execution with a single thread and multiple stacks where the thread is self-scheduling deterministically across the stacks is called coroutining. Execution with a single and multiple stacks but where the thread is scheduled by an oracle (non-deterministic from the thread's perspective) across the stacks is called concurrency.
+
+Therefore, a minimal concurrency system can be achieved by creating coroutines (see Section \ref{coroutine}), which instead of context-switching among each other, always ask an oracle where to context-switch next. While coroutines can execute on the caller's stack-frame, stack-full coroutines allow full generality and are sufficient as the basis for concurrency. The aforementioned oracle is a scheduler and the whole system now follows a cooperative threading-model (a.k.a., non-preemptive scheduling). The oracle/scheduler can either be a stack-less or stack-full entity and correspondingly require one or two context-switches to run a different coroutine. In any case, a subset of concurrency related challenges start to appear. For the complete set of concurrency challenges to occur, the only feature missing is preemption.
+
+A scheduler introduces order of execution uncertainty, while preemption introduces uncertainty about where context switches occur. Mutual exclusion and synchronization are ways of limiting non-determinism in a concurrent system. Now it is important to understand that uncertainty is desirable; uncertainty can be used by runtime systems to significantly increase performance and is often the basis of giving a user the illusion that tasks are running in parallel. Optimal performance in concurrent applications is often obtained by having as much non-determinism as correctness allows.
+
+\section{\protect\CFA's Thread Building Blocks}
+One of the important features that are missing in C is threading\footnote{While the C11 standard defines a ``threads.h'' header, it is minimal and defined as optional. As such, library support for threading is far from widespread. At the time of writing the thesis, neither \texttt{gcc} nor \texttt{clang} support ``threads.h'' in their respective standard libraries.}. On modern architectures, a lack of threading is unacceptable~\cite{Sutter05, Sutter05b}, and therefore modern programming languages must have the proper tools to allow users to write efficient concurrent programs to take advantage of parallelism. As an extension of C, \CFA needs to express these concepts in a way that is as natural as possible to programmers familiar with imperative languages. And being a system-level language means programmers expect to choose precisely which features they need and which cost they are willing to pay.
+
+\section{Coroutines: A Stepping Stone}\label{coroutine}
+While the main focus of this proposal is concurrency and parallelism, it is important to address coroutines, which are actually a significant building block of a concurrency system. \textbf{Coroutine}s are generalized routines which have predefined points where execution is suspended and can be resumed at a later time. Therefore, they need to deal with context switches and other context-management operations. This proposal includes coroutines both as an intermediate step for the implementation of threads, and a first-class feature of \CFA. Furthermore, many design challenges of threads are at least partially present in designing coroutines, which makes the design effort that much more relevant. The core \acrshort{api} of coroutines revolves around two features: independent call-stacks and \code{suspend}/\code{resume}.
+
+\begin{table}
+\begin{center}
+\begin{tabular}{c @{\hskip 0.025in}|@{\hskip 0.025in} c @{\hskip 0.025in}|@{\hskip 0.025in} c}
+\begin{ccode}[tabsize=2]
+//Using callbacks
+void fibonacci_func(
+	int n,
+	void (*callback)(int)
+) {
+	int first = 0;
+	int second = 1;
+	int next, i;
+	for(i = 0; i < n; i++)
+	{
+		if(i <= 1)
+			next = i;
+		else {
+			next = f1 + f2;
+			f1 = f2;
+			f2 = next;
+		}
+		callback(next);
+	}
+}
+
+int main() {
+	void print_fib(int n) {
+		printf("%d\n", n);
+	}
+
+	fibonacci_func(
+		10, print_fib
+	);
+
+
+
+}
+\end{ccode}&\begin{ccode}[tabsize=2]
+//Using output array
+void fibonacci_array(
+	int n,
+	int* array
+) {
+	int f1 = 0; int f2 = 1;
+	int next, i;
+	for(i = 0; i < n; i++)
+	{
+		if(i <= 1)
+			next = i;
+		else {
+			next = f1 + f2;
+			f1 = f2;
+			f2 = next;
+		}
+		array[i] = next;
+	}
+}
+
+
+int main() {
+	int a[10];
+
+	fibonacci_func(
+		10, a
+	);
+
+	for(int i=0;i<10;i++){
+		printf("%d\n", a[i]);
+	}
+
+}
+\end{ccode}&\begin{ccode}[tabsize=2]
+//Using external state
+typedef struct {
+	int f1, f2;
+} Iterator_t;
+
+int fibonacci_state(
+	Iterator_t* it
+) {
+	int f;
+	f = it->f1 + it->f2;
+	it->f2 = it->f1;
+	it->f1 = max(f,1);
+	return f;
+}
+
+
+
+
+
+
+
+int main() {
+	Iterator_t it={0,0};
+
+	for(int i=0;i<10;i++){
+		printf("%d\n",
+			fibonacci_state(
+				&it
+			);
+		);
+	}
+
+}
+\end{ccode}
+\end{tabular}
+\end{center}
+\caption{Different implementations of a Fibonacci sequence generator in C.}
+\label{lst:fibonacci-c}
+\end{table}
+
+A good example of a problem made easier with coroutines is generators, e.g., generating the Fibonacci sequence. This problem comes with the challenge of decoupling how a sequence is generated and how it is used. Listing \ref{lst:fibonacci-c} shows conventional approaches to writing generators in C. All three of these approach suffer from strong coupling. The left and centre approaches require that the generator have knowledge of how the sequence is used, while the rightmost approach requires holding internal state between calls on behalf of the generator and makes it much harder to handle corner cases like the Fibonacci seed.
+
+Listing \ref{lst:fibonacci-cfa} is an example of a solution to the Fibonacci problem using \CFA coroutines, where the coroutine stack holds sufficient state for the next generation. This solution has the advantage of having very strong decoupling between how the sequence is generated and how it is used. Indeed, this version is as easy to use as the \code{fibonacci_state} solution, while the implementation is very similar to the \code{fibonacci_func} example.
+
+\begin{figure}
+\begin{cfacode}[caption={Implementation of Fibonacci using coroutines},label={lst:fibonacci-cfa}]
+coroutine Fibonacci {
+	int fn; //used for communication
+};
+
+void ?{}(Fibonacci& this) { //constructor
+	this.fn = 0;
+}
+
+//main automatically called on first resume
+void main(Fibonacci& this) with (this) {
+	int fn1, fn2; 		//retained between resumes
+	fn  = 0;
+	fn1 = fn;
+	suspend(this); 		//return to last resume
+
+	fn  = 1;
+	fn2 = fn1;
+	fn1 = fn;
+	suspend(this); 		//return to last resume
+
+	for ( ;; ) {
+		fn  = fn1 + fn2;
+		fn2 = fn1;
+		fn1 = fn;
+		suspend(this); 	//return to last resume
+	}
+}
+
+int next(Fibonacci& this) {
+	resume(this); //transfer to last suspend
+	return this.fn;
+}
+
+void main() { //regular program main
+	Fibonacci f1, f2;
+	for ( int i = 1; i <= 10; i += 1 ) {
+		sout | next( f1 ) | next( f2 ) | endl;
+	}
+}
+\end{cfacode}
+\end{figure}
+
+Listing \ref{lst:fmt-line} shows the \code{Format} coroutine for restructuring text into groups of character blocks of fixed size. The example takes advantage of resuming coroutines in the constructor to simplify the code and highlights the idea that interesting control flow can occur in the constructor.
+
+\begin{figure}
+\begin{cfacode}[tabsize=3,caption={Formatting text into lines of 5 blocks of 4 characters.},label={lst:fmt-line}]
+//format characters into blocks of 4 and groups of 5 blocks per line
+coroutine Format {
+	char ch;									//used for communication
+	int g, b;								//global because used in destructor
+};
+
+void  ?{}(Format& fmt) {
+	resume( fmt );  						//prime (start) coroutine
+}
+
+void ^?{}(Format& fmt) with fmt {
+	if ( fmt.g != 0 || fmt.b != 0 )
+	sout | endl;
+}
+
+void main(Format& fmt) with fmt {
+	for ( ;; ) {							//for as many characters
+		for(g = 0; g < 5; g++) {		//groups of 5 blocks
+			for(b = 0; b < 4; fb++) {	//blocks of 4 characters
+				suspend();
+				sout | ch;					//print character
+			}
+			sout | "  ";					//print block separator
+		}
+		sout | endl;						//print group separator
+	}
+}
+
+void prt(Format & fmt, char ch) {
+	fmt.ch = ch;
+	resume(fmt);
+}
+
+int main() {
+	Format fmt;
+	char ch;
+	Eof: for ( ;; ) {						//read until end of file
+		sin | ch;							//read one character
+		if(eof(sin)) break Eof;			//eof ?
+		prt(fmt, ch);						//push character for formatting
+	}
+}
+\end{cfacode}
+\end{figure}
+
+\subsection{Construction}
+One important design challenge for implementing coroutines and threads (shown in section \ref{threads}) is that the runtime system needs to run code after the user-constructor runs to connect the fully constructed object into the system. In the case of coroutines, this challenge is simpler since there is no non-determinism from preemption or scheduling. However, the underlying challenge remains the same for coroutines and threads.
+
+The runtime system needs to create the coroutine's stack and, more importantly, prepare it for the first resumption. The timing of the creation is non-trivial since users expect both to have fully constructed objects once execution enters the coroutine main and to be able to resume the coroutine from the constructor. There are several solutions to this problem but the chosen option effectively forces the design of the coroutine.
+
+Furthermore, \CFA faces an extra challenge as polymorphic routines create invisible thunks when cast to non-polymorphic routines and these thunks have function scope. For example, the following code, while looking benign, can run into undefined behaviour because of thunks:
+
+\begin{cfacode}
+//async: Runs function asynchronously on another thread
+forall(otype T)
+extern void async(void (*func)(T*), T* obj);
+
+forall(otype T)
+void noop(T*) {}
+
+void bar() {
+	int a;
+	async(noop, &a); //start thread running noop with argument a
+}
+\end{cfacode}
+
+The generated C code\footnote{Code trimmed down for brevity} creates a local thunk to hold type information:
+
+\begin{ccode}
+extern void async(/* omitted */, void (*func)(void*), void* obj);
+
+void noop(/* omitted */, void* obj){}
+
+void bar(){
+	int a;
+	void _thunk0(int* _p0){
+		/* omitted */
+		noop(/* omitted */, _p0);
+	}
+	/* omitted */
+	async(/* omitted */, ((void (*)(void*))(&_thunk0)), (&a));
+}
+\end{ccode}
+The problem in this example is a storage management issue, the function pointer \code{_thunk0} is only valid until the end of the block, which limits the viable solutions because storing the function pointer for too long causes undefined behaviour; i.e., the stack-based thunk being destroyed before it can be used. This challenge is an extension of challenges that come with second-class routines. Indeed, GCC nested routines also have the limitation that nested routine cannot be passed outside of the declaration scope. The case of coroutines and threads is simply an extension of this problem to multiple call stacks.
+
+\subsection{Alternative: Composition}
+One solution to this challenge is to use composition/containment, where coroutine fields are added to manage the coroutine.
+
+\begin{cfacode}
+struct Fibonacci {
+	int fn; //used for communication
+	coroutine c; //composition
+};
+
+void FibMain(void*) {
+	//...
+}
+
+void ?{}(Fibonacci& this) {
+	this.fn = 0;
+	//Call constructor to initialize coroutine
+	(this.c){myMain};
+}
+\end{cfacode}
+The downside of this approach is that users need to correctly construct the coroutine handle before using it. Like any other objects, the user must carefully choose construction order to prevent usage of objects not yet constructed. However, in the case of coroutines, users must also pass to the coroutine information about the coroutine main, like in the previous example. This opens the door for user errors and requires extra runtime storage to pass at runtime information that can be known statically.
+
+\subsection{Alternative: Reserved keyword}
+The next alternative is to use language support to annotate coroutines as follows:
+
+\begin{cfacode}
+coroutine Fibonacci {
+	int fn; //used for communication
+};
+\end{cfacode}
+The \code{coroutine} keyword means the compiler can find and inject code where needed. The downside of this approach is that it makes coroutine a special case in the language. Users wanting to extend coroutines or build their own for various reasons can only do so in ways offered by the language. Furthermore, implementing coroutines without language supports also displays the power of the programming language used. While this is ultimately the option used for idiomatic \CFA code, coroutines and threads can still be constructed by users without using the language support. The reserved keywords are only present to improve ease of use for the common cases.
+
+\subsection{Alternative: Lambda Objects}
+
+For coroutines as for threads, many implementations are based on routine pointers or function objects~\cite{Butenhof97, ANSI14:C++, MS:VisualC++, BoostCoroutines15}. For example, Boost implements coroutines in terms of four functor object types:
+\begin{cfacode}
+asymmetric_coroutine<>::pull_type
+asymmetric_coroutine<>::push_type
+symmetric_coroutine<>::call_type
+symmetric_coroutine<>::yield_type
+\end{cfacode}
+Often, the canonical threading paradigm in languages is based on function pointers, \texttt{pthread} being one of the most well-known examples. The main problem of this approach is that the thread usage is limited to a generic handle that must otherwise be wrapped in a custom type. Since the custom type is simple to write in \CFA and solves several issues, added support for routine/lambda based coroutines adds very little.
+
+A variation of this would be to use a simple function pointer in the same way \texttt{pthread} does for threads:
+\begin{cfacode}
+void foo( coroutine_t cid, void* arg ) {
+	int* value = (int*)arg;
+	//Coroutine body
+}
+
+int main() {
+	int value = 0;
+	coroutine_t cid = coroutine_create( &foo, (void*)&value );
+	coroutine_resume( &cid );
+}
+\end{cfacode}
+This semantics is more common for thread interfaces but coroutines work equally well. As discussed in section \ref{threads}, this approach is superseded by static approaches in terms of expressivity.
+
+\subsection{Alternative: Trait-Based Coroutines}
+
+Finally, the underlying approach, which is the one closest to \CFA idioms, is to use trait-based lazy coroutines. This approach defines a coroutine as anything that satisfies the trait \code{is_coroutine} (as defined below) and is used as a coroutine.
+
+\begin{cfacode}
+trait is_coroutine(dtype T) {
+      void main(T& this);
+      coroutine_desc* get_coroutine(T& this);
+};
+
+forall( dtype T | is_coroutine(T) ) void suspend(T&);
+forall( dtype T | is_coroutine(T) ) void resume (T&);
+\end{cfacode}
+This ensures that an object is not a coroutine until \code{resume} is called on the object. Correspondingly, any object that is passed to \code{resume} is a coroutine since it must satisfy the \code{is_coroutine} trait to compile. The advantage of this approach is that users can easily create different types of coroutines, for example, changing the memory layout of a coroutine is trivial when implementing the \code{get_coroutine} routine. The \CFA keyword \code{coroutine} simply has the effect of implementing the getter and forward declarations required for users to implement the main routine.
+
+\begin{center}
+\begin{tabular}{c c c}
+\begin{cfacode}[tabsize=3]
+coroutine MyCoroutine {
+	int someValue;
+};
+\end{cfacode} & == & \begin{cfacode}[tabsize=3]
+struct MyCoroutine {
+	int someValue;
+	coroutine_desc __cor;
+};
+
+static inline
+coroutine_desc* get_coroutine(
+	struct MyCoroutine& this
+) {
+	return &this.__cor;
+}
+
+void main(struct MyCoroutine* this);
+\end{cfacode}
+\end{tabular}
+\end{center}
+
+The combination of these two approaches allows users new to coroutining and concurrency to have an easy and concise specification, while more advanced users have tighter control on memory layout and initialization.
+
+\section{Thread Interface}\label{threads}
+The basic building blocks of multithreading in \CFA are \glspl{cfathread}. Both user and kernel threads are supported, where user threads are the concurrency mechanism and kernel threads are the parallel mechanism. User threads offer a flexible and lightweight interface. A thread can be declared using a struct declaration \code{thread} as follows:
+
+\begin{cfacode}
+thread foo {};
+\end{cfacode}
+
+As for coroutines, the keyword is a thin wrapper around a \CFA trait:
+
+\begin{cfacode}
+trait is_thread(dtype T) {
+      void ^?{}(T & mutex this);
+      void main(T & this);
+      thread_desc* get_thread(T & this);
+};
+\end{cfacode}
+
+Obviously, for this thread implementation to be useful it must run some user code. Several other threading interfaces use a function-pointer representation as the interface of threads (for example \Csharp~\cite{Csharp} and Scala~\cite{Scala}). However, this proposal considers that statically tying a \code{main} routine to a thread supersedes this approach. Since the \code{main} routine is already a special routine in \CFA (where the program begins), it is a natural extension of the semantics to use overloading to declare mains for different threads (the normal main being the main of the initial thread). As such the \code{main} routine of a thread can be defined as
+\begin{cfacode}
+thread foo {};
+
+void main(foo & this) {
+	sout | "Hello World!" | endl;
+}
+\end{cfacode}
+
+In this example, threads of type \code{foo} start execution in the \code{void main(foo &)} routine, which prints \code{"Hello World!".} While this thesis encourages this approach to enforce strongly typed programming, users may prefer to use the routine-based thread semantics for the sake of simplicity. With the static semantics it is trivial to write a thread type that takes a function pointer as a parameter and executes it on its stack asynchronously.
+\begin{cfacode}
+typedef void (*voidFunc)(int);
+
+thread FuncRunner {
+	voidFunc func;
+	int arg;
+};
+
+void ?{}(FuncRunner & this, voidFunc inFunc, int arg) {
+	this.func = inFunc;
+	this.arg  = arg;
+}
+
+void main(FuncRunner & this) {
+	//thread starts here and runs the function
+	this.func( this.arg );
+}
+
+void hello(/*unused*/ int) {
+	sout | "Hello World!" | endl;
+}
+
+int main() {
+	FuncRunner f = {hello, 42};
+	return 0?
+}
+\end{cfacode}
+
+A consequence of the strongly typed approach to main is that memory layout of parameters and return values to/from a thread are now explicitly specified in the \acrshort{api}.
+
+Of course, for threads to be useful, it must be possible to start and stop threads and wait for them to complete execution. While using an \acrshort{api} such as \code{fork} and \code{join} is relatively common in the literature, such an interface is unnecessary. Indeed, the simplest approach is to use \acrshort{raii} principles and have threads \code{fork} after the constructor has completed and \code{join} before the destructor runs.
+\begin{cfacode}
+thread World;
+
+void main(World & this) {
+	sout | "World!" | endl;
+}
+
+void main() {
+	World w;
+	//Thread forks here
+
+	//Printing "Hello " and "World!" are run concurrently
+	sout | "Hello " | endl;
+
+	//Implicit join at end of scope
+}
+\end{cfacode}
+
+This semantic has several advantages over explicit semantics: a thread is always started and stopped exactly once, users cannot make any programming errors, and it naturally scales to multiple threads meaning basic synchronization is very simple.
+
+\begin{cfacode}
+thread MyThread {
+	//...
+};
+
+//main
+void main(MyThread& this) {
+	//...
+}
+
+void foo() {
+	MyThread thrds[10];
+	//Start 10 threads at the beginning of the scope
+
+	DoStuff();
+
+	//Wait for the 10 threads to finish
+}
+\end{cfacode}
+
+However, one of the drawbacks of this approach is that threads always form a tree where nodes must always outlive their children, i.e., they are always destroyed in the opposite order of construction because of C scoping rules. This restriction is relaxed by using dynamic allocation, so threads can outlive the scope in which they are created, much like dynamically allocating memory lets objects outlive the scope in which they are created.
+
+\begin{cfacode}
+thread MyThread {
+	//...
+};
+
+void main(MyThread& this) {
+	//...
+}
+
+void foo() {
+	MyThread* long_lived;
+	{
+		//Start a thread at the beginning of the scope
+		MyThread short_lived;
+
+		//create another thread that will outlive the thread in this scope
+		long_lived = new MyThread;
+
+		DoStuff();
+
+		//Wait for the thread short_lived to finish
+	}
+	DoMoreStuff();
+
+	//Now wait for the long_lived to finish
+	delete long_lived;
+}
+\end{cfacode}
Index: doc/theses/thierry_delisle/text/cforall.tex
===================================================================
--- doc/theses/thierry_delisle/text/cforall.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/text/cforall.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,167 @@
+% ======================================================================
+% ======================================================================
+\chapter{\CFA Overview}
+% ======================================================================
+% ======================================================================
+
+The following is a quick introduction to the \CFA language, specifically tailored to the features needed to support concurrency.
+
+\CFA is an extension of ISO-C and therefore supports all of the same paradigms as C. It is a non-object-oriented system-language, meaning most of the major abstractions have either no runtime overhead or can be opted out easily. Like C, the basics of \CFA revolve around structures and routines, which are thin abstractions over machine code. The vast majority of the code produced by the \CFA translator respects memory layouts and calling conventions laid out by C. Interestingly, while \CFA is not an object-oriented language, lacking the concept of a receiver (e.g., {\tt this}), it does have some notion of objects\footnote{C defines the term objects as : ``region of data storage in the execution environment, the contents of which can represent
+values''~\cite[3.15]{C11}}, most importantly construction and destruction of objects. Most of the following code examples can be found on the \CFA website~\cite{www-cfa}.
+
+% ======================================================================
+\section{References}
+
+Like \CC, \CFA introduces rebind-able references providing multiple dereferencing as an alternative to pointers. In regards to concurrency, the semantic difference between pointers and references are not particularly relevant, but since this document uses mostly references, here is a quick overview of the semantics:
+\begin{cfacode}
+int x, *p1 = &x, **p2 = &p1, ***p3 = &p2,
+	&r1 = x,    &&r2 = r1,   &&&r3 = r2;
+***p3 = 3;							//change x
+r3    = 3;							//change x, ***r3
+**p3  = ...;						//change p1
+*p3   = ...;						//change p2
+int y, z, & ar[3] = {x, y, z};		//initialize array of references
+typeof( ar[1]) p;					//is int, referenced object type
+typeof(&ar[1]) q;					//is int &, reference type
+sizeof( ar[1]) == sizeof(int);		//is true, referenced object size
+sizeof(&ar[1]) == sizeof(int *);	//is true, reference size
+\end{cfacode}
+The important take away from this code example is that a reference offers a handle to an object, much like a pointer, but which is automatically dereferenced for convenience.
+
+% ======================================================================
+\section{Overloading}
+
+Another important feature of \CFA is function overloading as in Java and \CC, where routines with the same name are selected based on the number and type of the arguments. As well, \CFA uses the return type as part of the selection criteria, as in Ada~\cite{Ada}. For routines with multiple parameters and returns, the selection is complex.
+\begin{cfacode}
+//selection based on type and number of parameters
+void f(void);			//(1)
+void f(char);			//(2)
+void f(int, double);	//(3)
+f();					//select (1)
+f('a');					//select (2)
+f(3, 5.2);				//select (3)
+
+//selection based on  type and number of returns
+char   f(int);			//(1)
+double f(int);			//(2)
+char   c = f(3);		//select (1)
+double d = f(4);		//select (2)
+\end{cfacode}
+This feature is particularly important for concurrency since the runtime system relies on creating different types to represent concurrency objects. Therefore, overloading is necessary to prevent the need for long prefixes and other naming conventions that prevent name clashes. As seen in chapter \ref{basics}, routine \code{main} is an example that benefits from overloading.
+
+% ======================================================================
+\section{Operators}
+Overloading also extends to operators. The syntax for denoting operator-overloading is to name a routine with the symbol of the operator and question marks where the arguments of the operation appear, e.g.:
+\begin{cfacode}
+int ++? (int op);              		//unary prefix increment
+int ?++ (int op);              		//unary postfix increment
+int ?+? (int op1, int op2);    		//binary plus
+int ?<=?(int op1, int op2);   		//binary less than
+int ?=? (int & op1, int op2);  		//binary assignment
+int ?+=?(int & op1, int op2); 		//binary plus-assignment
+
+struct S {int i, j;};
+S ?+?(S op1, S op2) {				//add two structures
+	return (S){op1.i + op2.i, op1.j + op2.j};
+}
+S s1 = {1, 2}, s2 = {2, 3}, s3;
+s3 = s1 + s2;						//compute sum: s3 == {2, 5}
+\end{cfacode}
+While concurrency does not use operator overloading directly, this feature is more important as an introduction for the syntax of constructors.
+
+% ======================================================================
+\section{Constructors/Destructors}
+Object lifetime is often a challenge in concurrency. \CFA uses the approach of giving concurrent meaning to object lifetime as a means of synchronization and/or mutual exclusion. Since \CFA relies heavily on the lifetime of objects, constructors and destructors is a core feature required for concurrency and parallelism. \CFA uses the following syntax for constructors and destructors:
+\begin{cfacode}
+struct S {
+	size_t size;
+	int * ia;
+};
+void ?{}(S & s, int asize) {	//constructor operator
+	s.size = asize;				//initialize fields
+	s.ia = calloc(size, sizeof(S));
+}
+void ^?{}(S & s) {				//destructor operator
+	free(ia);					//de-initialization fields
+}
+int main() {
+	S x = {10}, y = {100};		//implicit calls: ?{}(x, 10), ?{}(y, 100)
+	...							//use x and y
+	^x{};  ^y{};				//explicit calls to de-initialize
+	x{20};  y{200};				//explicit calls to reinitialize
+	...							//reuse x and y
+}								//implicit calls: ^?{}(y), ^?{}(x)
+\end{cfacode}
+The language guarantees that every object and all their fields are constructed. Like \CC, construction of an object is automatically done on allocation and destruction of the object is done on deallocation. Allocation and deallocation can occur on the stack or on the heap.
+\begin{cfacode}
+{
+	struct S s = {10};	//allocation, call constructor
+	...
+}						//deallocation, call destructor
+struct S * s = new();	//allocation, call constructor
+...
+delete(s);				//deallocation, call destructor
+\end{cfacode}
+Note that like \CC, \CFA introduces \code{new} and \code{delete}, which behave like \code{malloc} and \code{free} in addition to constructing and destructing objects, after calling \code{malloc} and before calling \code{free}, respectively.
+
+% ======================================================================
+\section{Parametric Polymorphism}
+\label{s:ParametricPolymorphism}
+Routines in \CFA can also be reused for multiple types. This capability is done using the \code{forall} clauses, which allow separately compiled routines to support generic usage over multiple types. For example, the following sum function works for any type that supports construction from 0 and addition:
+\begin{cfacode}
+//constraint type, 0 and +
+forall(otype T | { void ?{}(T *, zero_t); T ?+?(T, T); })
+T sum(T a[ ], size_t size) {
+	T total = 0;				//construct T from 0
+	for(size_t i = 0; i < size; i++)
+		total = total + a[i];	//select appropriate +
+	return total;
+}
+
+S sa[5];
+int i = sum(sa, 5);				//use S's 0 construction and +
+\end{cfacode}
+
+Since writing constraints on types can become cumbersome for more constrained functions, \CFA also has the concept of traits. Traits are named collection of constraints that can be used both instead and in addition to regular constraints:
+\begin{cfacode}
+trait summable( otype T ) {
+	void ?{}(T *, zero_t);		//constructor from 0 literal
+	T ?+?(T, T);				//assortment of additions
+	T ?+=?(T *, T);
+	T ++?(T *);
+	T ?++(T *);
+};
+forall( otype T | summable(T) )	//use trait
+T sum(T a[], size_t size);
+\end{cfacode}
+
+Note that the type use for assertions can be either an \code{otype} or a \code{dtype}. Types declared as \code{otype} refer to ``complete'' objects, i.e., objects with a size, a default constructor, a copy constructor, a destructor and an assignment operator. Using \code{dtype,} on the other hand, has none of these assumptions but is extremely restrictive, it only guarantees the object is addressable.
+
+% ======================================================================
+\section{with Clause/Statement}
+Since \CFA lacks the concept of a receiver, certain functions end up needing to repeat variable names often. To remove this inconvenience, \CFA provides the \code{with} statement, which opens an aggregate scope making its fields directly accessible (like Pascal).
+\begin{cfacode}
+struct S { int i, j; };
+int mem(S & this) with (this)		//with clause
+	i = 1;							//this->i
+	j = 2;							//this->j
+}
+int foo() {
+	struct S1 { ... } s1;
+	struct S2 { ... } s2;
+	with (s1) 						//with statement
+	{
+		//access fields of s1 without qualification
+		with (s2)					//nesting
+		{
+			//access fields of s1 and s2 without qualification
+		}
+	}
+	with (s1, s2) 					//scopes open in parallel
+	{
+		//access fields of s1 and s2 without qualification
+	}
+}
+\end{cfacode}
+
+For more information on \CFA see \cite{cforall-ug,rob-thesis,www-cfa}.
Index: doc/theses/thierry_delisle/text/concurrency.tex
===================================================================
--- doc/theses/thierry_delisle/text/concurrency.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/text/concurrency.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,998 @@
+% ======================================================================
+% ======================================================================
+\chapter{Concurrency}
+% ======================================================================
+% ======================================================================
+Several tools can be used to solve concurrency challenges. Since many of these challenges appear with the use of mutable shared state, some languages and libraries simply disallow mutable shared state (Erlang~\cite{Erlang}, Haskell~\cite{Haskell}, Akka (Scala)~\cite{Akka}). In these paradigms, interaction among concurrent objects relies on message passing~\cite{Thoth,Harmony,V-Kernel} or other paradigms closely relate to networking concepts (channels~\cite{CSP,Go} for example). However, in languages that use routine calls as their core abstraction mechanism, these approaches force a clear distinction between concurrent and non-concurrent paradigms (i.e., message passing versus routine calls). This distinction in turn means that, in order to be effective, programmers need to learn two sets of design patterns. While this distinction can be hidden away in library code, effective use of the library still has to take both paradigms into account.
+
+Approaches based on shared memory are more closely related to non-concurrent paradigms since they often rely on basic constructs like routine calls and shared objects. At the lowest level, concurrent paradigms are implemented as atomic operations and locks. Many such mechanisms have been proposed, including semaphores~\cite{Dijkstra68b} and path expressions~\cite{Campbell74}. However, for productivity reasons it is desirable to have a higher-level construct be the core concurrency paradigm~\cite{HPP:Study}.
+
+An approach that is worth mentioning because it is gaining in popularity is transactional memory~\cite{Herlihy93}. While this approach is even pursued by system languages like \CC~\cite{Cpp-Transactions}, the performance and feature set is currently too restrictive to be the main concurrency paradigm for system languages, which is why it was rejected as the core paradigm for concurrency in \CFA.
+
+One of the most natural, elegant, and efficient mechanisms for synchronization and communication, especially for shared-memory systems, is the \emph{monitor}. Monitors were first proposed by Brinch Hansen~\cite{Hansen73} and later described and extended by C.A.R.~Hoare~\cite{Hoare74}. Many programming languages---e.g., Concurrent Pascal~\cite{ConcurrentPascal}, Mesa~\cite{Mesa}, Modula~\cite{Modula-2}, Turing~\cite{Turing:old}, Modula-3~\cite{Modula-3}, NeWS~\cite{NeWS}, Emerald~\cite{Emerald}, \uC~\cite{Buhr92a} and Java~\cite{Java}---provide monitors as explicit language constructs. In addition, operating-system kernels and device drivers have a monitor-like structure, although they often use lower-level primitives such as semaphores or locks to simulate monitors. For these reasons, this project proposes monitors as the core concurrency construct.
+
+\section{Basics}
+Non-determinism requires concurrent systems to offer support for mutual-exclusion and synchronization. Mutual-exclusion is the concept that only a fixed number of threads can access a critical section at any given time, where a critical section is a group of instructions on an associated portion of data that requires the restricted access. On the other hand, synchronization enforces relative ordering of execution and synchronization tools provide numerous mechanisms to establish timing relationships among threads.
+
+\subsection{Mutual-Exclusion}
+As mentioned above, mutual-exclusion is the guarantee that only a fix number of threads can enter a critical section at once. However, many solutions exist for mutual exclusion, which vary in terms of performance, flexibility and ease of use. Methods range from low-level locks, which are fast and flexible but require significant attention to be correct, to  higher-level concurrency techniques, which sacrifice some performance in order to improve ease of use. Ease of use comes by either guaranteeing some problems cannot occur (e.g., being deadlock free) or by offering a more explicit coupling between data and corresponding critical section. For example, the \CC \code{std::atomic<T>} offers an easy way to express mutual-exclusion on a restricted set of operations (e.g., reading/writing large types atomically). Another challenge with low-level locks is composability. Locks have restricted composability because it takes careful organizing for multiple locks to be used while preventing deadlocks. Easing composability is another feature higher-level mutual-exclusion mechanisms often offer.
+
+\subsection{Synchronization}
+As with mutual-exclusion, low-level synchronization primitives often offer good performance and good flexibility at the cost of ease of use. Again, higher-level mechanisms often simplify usage by adding either better coupling between synchronization and data (e.g., message passing) or offering a simpler solution to otherwise involved challenges. As mentioned above, synchronization can be expressed as guaranteeing that event \textit{X} always happens before \textit{Y}. Most of the time, synchronization happens within a critical section, where threads must acquire mutual-exclusion in a certain order. However, it may also be desirable to guarantee that event \textit{Z} does not occur between \textit{X} and \textit{Y}. Not satisfying this property is called \textbf{barging}. For example, where event \textit{X} tries to effect event \textit{Y} but another thread acquires the critical section and emits \textit{Z} before \textit{Y}. The classic example is the thread that finishes using a resource and unblocks a thread waiting to use the resource, but the unblocked thread must compete to acquire the resource. Preventing or detecting barging is an involved challenge with low-level locks, which can be made much easier by higher-level constructs. This challenge is often split into two different methods, barging avoidance and barging prevention. Algorithms that use flag variables to detect barging threads are said to be using barging avoidance, while algorithms that baton-pass locks~\cite{Andrews89} between threads instead of releasing the locks are said to be using barging prevention.
+
+% ======================================================================
+% ======================================================================
+\section{Monitors}
+% ======================================================================
+% ======================================================================
+A \textbf{monitor} is a set of routines that ensure mutual-exclusion when accessing shared state. More precisely, a monitor is a programming technique that associates mutual-exclusion to routine scopes, as opposed to mutex locks, where mutual-exclusion is defined by lock/release calls independently of any scoping of the calling routine. This strong association eases readability and maintainability, at the cost of flexibility. Note that both monitors and mutex locks, require an abstract handle to identify them. This concept is generally associated with object-oriented languages like Java~\cite{Java} or \uC~\cite{uC++book} but does not strictly require OO semantics. The only requirement is the ability to declare a handle to a shared object and a set of routines that act on it:
+\begin{cfacode}
+typedef /*some monitor type*/ monitor;
+int f(monitor & m);
+
+int main() {
+	monitor m;  //Handle m
+	f(m);       //Routine using handle
+}
+\end{cfacode}
+
+% ======================================================================
+% ======================================================================
+\subsection{Call Semantics} \label{call}
+% ======================================================================
+% ======================================================================
+The above monitor example displays some of the intrinsic characteristics. First, it is necessary to use pass-by-reference over pass-by-value for monitor routines. This semantics is important, because at their core, monitors are implicit mutual-exclusion objects (locks), and these objects cannot be copied. Therefore, monitors are non-copy-able objects (\code{dtype}).
+
+Another aspect to consider is when a monitor acquires its mutual exclusion. For example, a monitor may need to be passed through multiple helper routines that do not acquire the monitor mutual-exclusion on entry. Passthrough can occur for generic helper routines (\code{swap}, \code{sort}, etc.) or specific helper routines like the following to implement an atomic counter:
+
+\begin{cfacode}
+monitor counter_t { /*...see section $\ref{data}$...*/ };
+
+void ?{}(counter_t & nomutex this); //constructor
+size_t ++?(counter_t & mutex this); //increment
+
+//need for mutex is platform dependent
+void ?{}(size_t * this, counter_t & mutex cnt); //conversion
+\end{cfacode}
+This counter is used as follows:
+\begin{center}
+\begin{tabular}{c @{\hskip 0.35in} c @{\hskip 0.35in} c}
+\begin{cfacode}
+//shared counter
+counter_t cnt1, cnt2;
+
+//multiple threads access counter
+thread 1 : cnt1++; cnt2++;
+thread 2 : cnt1++; cnt2++;
+thread 3 : cnt1++; cnt2++;
+	...
+thread N : cnt1++; cnt2++;
+\end{cfacode}
+\end{tabular}
+\end{center}
+Notice how the counter is used without any explicit synchronization and yet supports thread-safe semantics for both reading and writing, which is similar in usage to the \CC template \code{std::atomic}.
+
+Here, the constructor (\code{?\{\}}) uses the \code{nomutex} keyword to signify that it does not acquire the monitor mutual-exclusion when constructing. This semantics is because an object not yet con\-structed should never be shared and therefore does not require mutual exclusion. Furthermore, it allows the implementation greater freedom when it initializes the monitor locking. The prefix increment operator uses \code{mutex} to protect the incrementing process from race conditions. Finally, there is a conversion operator from \code{counter_t} to \code{size_t}. This conversion may or may not require the \code{mutex} keyword depending on whether or not reading a \code{size_t} is an atomic operation.
+
+For maximum usability, monitors use \gls{multi-acq} semantics, which means a single thread can acquire the same monitor multiple times without deadlock. For example, listing \ref{fig:search} uses recursion and \gls{multi-acq} to print values inside a binary tree.
+\begin{figure}
+\begin{cfacode}[caption={Recursive printing algorithm using \gls{multi-acq}.},label={fig:search}]
+monitor printer { ... };
+struct tree {
+	tree * left, right;
+	char * value;
+};
+void print(printer & mutex p, char * v);
+
+void print(printer & mutex p, tree * t) {
+	print(p, t->value);
+	print(p, t->left );
+	print(p, t->right);
+}
+\end{cfacode}
+\end{figure}
+
+Having both \code{mutex} and \code{nomutex} keywords can be redundant, depending on the meaning of a routine having neither of these keywords. For example, it is reasonable that it should default to the safest option (\code{mutex}) when given a routine without qualifiers \code{void foo(counter_t & this)}, whereas assuming \code{nomutex} is unsafe and may cause subtle errors. On the other hand, \code{nomutex} is the ``normal'' parameter behaviour, it effectively states explicitly that ``this routine is not special''. Another alternative is making exactly one of these keywords mandatory, which provides the same semantics but without the ambiguity of supporting routines with neither keyword. Mandatory keywords would also have the added benefit of being self-documented but at the cost of extra typing. While there are several benefits to mandatory keywords, they do bring a few challenges. Mandatory keywords in \CFA would imply that the compiler must know without doubt whether or not a parameter is a monitor or not. Since \CFA relies heavily on traits as an abstraction mechanism, the distinction between a type that is a monitor and a type that looks like a monitor can become blurred. For this reason, \CFA only has the \code{mutex} keyword and uses no keyword to mean \code{nomutex}.
+
+The next semantic decision is to establish when \code{mutex} may be used as a type qualifier. Consider the following declarations:
+\begin{cfacode}
+int f1(monitor & mutex m);
+int f2(const monitor & mutex m);
+int f3(monitor ** mutex m);
+int f4(monitor * mutex m []);
+int f5(graph(monitor *) & mutex m);
+\end{cfacode}
+The problem is to identify which object(s) should be acquired. Furthermore, each object needs to be acquired only once. In the case of simple routines like \code{f1} and \code{f2} it is easy to identify an exhaustive list of objects to acquire on entry. Adding indirections (\code{f3}) still allows the compiler and programmer to identify which object is acquired. However, adding in arrays (\code{f4}) makes it much harder. Array lengths are not necessarily known in C, and even then, making sure objects are only acquired once becomes none-trivial. This problem can be extended to absurd limits like \code{f5}, which uses a graph of monitors. To make the issue tractable, this project imposes the requirement that a routine may only acquire one monitor per parameter and it must be the type of the parameter with at most one level of indirection (ignoring potential qualifiers). Also note that while routine \code{f3} can be supported, meaning that monitor \code{**m} is acquired, passing an array to this routine would be type-safe and yet result in undefined behaviour because only the first element of the array is acquired. However, this ambiguity is part of the C type-system with respects to arrays. For this reason, \code{mutex} is disallowed in the context where arrays may be passed:
+\begin{cfacode}
+int f1(monitor & mutex m);    //Okay : recommended case
+int f2(monitor * mutex m);    //Not Okay : Could be an array
+int f3(monitor mutex m []);  //Not Okay : Array of unknown length
+int f4(monitor ** mutex m);   //Not Okay : Could be an array
+int f5(monitor * mutex m []); //Not Okay : Array of unknown length
+\end{cfacode}
+Note that not all array functions are actually distinct in the type system. However, even if the code generation could tell the difference, the extra information is still not sufficient to extend meaningfully the monitor call semantic.
+
+Unlike object-oriented monitors, where calling a mutex member \emph{implicitly} acquires mutual-exclusion of the receiver object, \CFA uses an explicit mechanism to specify the object that acquires mutual-exclusion. A consequence of this approach is that it extends naturally to multi-monitor calls.
+\begin{cfacode}
+int f(MonitorA & mutex a, MonitorB & mutex b);
+
+MonitorA a;
+MonitorB b;
+f(a,b);
+\end{cfacode}
+While OO monitors could be extended with a mutex qualifier for multiple-monitor calls, no example of this feature could be found. The capability to acquire multiple locks before entering a critical section is called \emph{\gls{bulk-acq}}. In practice, writing multi-locking routines that do not lead to deadlocks is tricky. Having language support for such a feature is therefore a significant asset for \CFA. In the case presented above, \CFA guarantees that the order of acquisition is consistent across calls to different routines using the same monitors as arguments. This consistent ordering means acquiring multiple monitors is safe from deadlock when using \gls{bulk-acq}. However, users can still force the acquiring order. For example, notice which routines use \code{mutex}/\code{nomutex} and how this affects acquiring order:
+\begin{cfacode}
+void foo(A& mutex a, B& mutex b) { //acquire a & b
+	...
+}
+
+void bar(A& mutex a, B& /*nomutex*/ b) { //acquire a
+	... foo(a, b); ... //acquire b
+}
+
+void baz(A& /*nomutex*/ a, B& mutex b) { //acquire b
+	... foo(a, b); ... //acquire a
+}
+\end{cfacode}
+The \gls{multi-acq} monitor lock allows a monitor lock to be acquired by both \code{bar} or \code{baz} and acquired again in \code{foo}. In the calls to \code{bar} and \code{baz} the monitors are acquired in opposite order.
+
+However, such use leads to lock acquiring order problems. In the example above, the user uses implicit ordering in the case of function \code{foo} but explicit ordering in the case of \code{bar} and \code{baz}. This subtle difference means that calling these routines concurrently may lead to deadlock and is therefore undefined behaviour. As shown~\cite{Lister77}, solving this problem requires:
+\begin{enumerate}
+	\item Dynamically tracking the monitor-call order.
+	\item Implement rollback semantics.
+\end{enumerate}
+While the first requirement is already a significant constraint on the system, implementing a general rollback semantics in a C-like language is still prohibitively complex~\cite{Dice10}. In \CFA, users simply need to be careful when acquiring multiple monitors at the same time or only use \gls{bulk-acq} of all the monitors. While \CFA provides only a partial solution, most systems provide no solution and the \CFA partial solution handles many useful cases.
+
+For example, \gls{multi-acq} and \gls{bulk-acq} can be used together in interesting ways:
+\begin{cfacode}
+monitor bank { ... };
+
+void deposit( bank & mutex b, int deposit );
+
+void transfer( bank & mutex mybank, bank & mutex yourbank, int me2you) {
+	deposit( mybank, -me2you );
+	deposit( yourbank, me2you );
+}
+\end{cfacode}
+This example shows a trivial solution to the bank-account transfer problem~\cite{BankTransfer}. Without \gls{multi-acq} and \gls{bulk-acq}, the solution to this problem is much more involved and requires careful engineering.
+
+\subsection{\code{mutex} statement} \label{mutex-stmt}
+
+The call semantics discussed above have one software engineering issue: only a routine can acquire the mutual-exclusion of a set of monitor. \CFA offers the \code{mutex} statement to work around the need for unnecessary names, avoiding a major software engineering problem~\cite{2FTwoHardThings}. Table \ref{lst:mutex-stmt} shows an example of the \code{mutex} statement, which introduces a new scope in which the mutual-exclusion of a set of monitor is acquired. Beyond naming, the \code{mutex} statement has no semantic difference from a routine call with \code{mutex} parameters.
+
+\begin{table}
+\begin{center}
+\begin{tabular}{|c|c|}
+function call & \code{mutex} statement \\
+\hline
+\begin{cfacode}[tabsize=3]
+monitor M {};
+void foo( M & mutex m1, M & mutex m2 ) {
+	//critical section
+}
+
+void bar( M & m1, M & m2 ) {
+	foo( m1, m2 );
+}
+\end{cfacode}&\begin{cfacode}[tabsize=3]
+monitor M {};
+void bar( M & m1, M & m2 ) {
+	mutex(m1, m2) {
+		//critical section
+	}
+}
+
+
+\end{cfacode}
+\end{tabular}
+\end{center}
+\caption{Regular call semantics vs. \code{mutex} statement}
+\label{lst:mutex-stmt}
+\end{table}
+
+% ======================================================================
+% ======================================================================
+\subsection{Data semantics} \label{data}
+% ======================================================================
+% ======================================================================
+Once the call semantics are established, the next step is to establish data semantics. Indeed, until now a monitor is used simply as a generic handle but in most cases monitors contain shared data. This data should be intrinsic to the monitor declaration to prevent any accidental use of data without its appropriate protection. For example, here is a complete version of the counter shown in section \ref{call}:
+\begin{cfacode}
+monitor counter_t {
+	int value;
+};
+
+void ?{}(counter_t & this) {
+	this.cnt = 0;
+}
+
+int ?++(counter_t & mutex this) {
+	return ++this.value;
+}
+
+//need for mutex is platform dependent here
+void ?{}(int * this, counter_t & mutex cnt) {
+	*this = (int)cnt;
+}
+\end{cfacode}
+
+Like threads and coroutines, monitors are defined in terms of traits with some additional language support in the form of the \code{monitor} keyword. The monitor trait is:
+\begin{cfacode}
+trait is_monitor(dtype T) {
+	monitor_desc * get_monitor( T & );
+	void ^?{}( T & mutex );
+};
+\end{cfacode}
+Note that the destructor of a monitor must be a \code{mutex} routine to prevent deallocation while a thread is accessing the monitor. As with any object, calls to a monitor, using \code{mutex} or otherwise, is undefined behaviour after the destructor has run.
+
+% ======================================================================
+% ======================================================================
+\section{Internal Scheduling} \label{intsched}
+% ======================================================================
+% ======================================================================
+In addition to mutual exclusion, the monitors at the core of \CFA's concurrency can also be used to achieve synchronization. With monitors, this capability is generally achieved with internal or external scheduling as in~\cite{Hoare74}. With \textbf{scheduling} loosely defined as deciding which thread acquires the critical section next, \textbf{internal scheduling} means making the decision from inside the critical section (i.e., with access to the shared state), while \textbf{external scheduling} means making the decision when entering the critical section (i.e., without access to the shared state). Since internal scheduling within a single monitor is mostly a solved problem, this thesis concentrates on extending internal scheduling to multiple monitors. Indeed, like the \gls{bulk-acq} semantics, internal scheduling extends to multiple monitors in a way that is natural to the user but requires additional complexity on the implementation side.
+
+First, here is a simple example of internal scheduling:
+
+\begin{cfacode}
+monitor A {
+	condition e;
+}
+
+void foo(A& mutex a1, A& mutex a2) {
+	...
+	//Wait for cooperation from bar()
+	wait(a1.e);
+	...
+}
+
+void bar(A& mutex a1, A& mutex a2) {
+	//Provide cooperation for foo()
+	...
+	//Unblock foo
+	signal(a1.e);
+}
+\end{cfacode}
+There are two details to note here. First, \code{signal} is a delayed operation; it only unblocks the waiting thread when it reaches the end of the critical section. This semantics is needed to respect mutual-exclusion, i.e., the signaller and signalled thread cannot be in the monitor simultaneously. The alternative is to return immediately after the call to \code{signal}, which is significantly more restrictive. Second, in \CFA, while it is common to store a \code{condition} as a field of the monitor, a \code{condition} variable can be stored/created independently of a monitor. Here routine \code{foo} waits for the \code{signal} from \code{bar} before making further progress, ensuring a basic ordering.
+
+An important aspect of the implementation is that \CFA does not allow barging, which means that once function \code{bar} releases the monitor, \code{foo} is guaranteed to be the next thread to acquire the monitor (unless some other thread waited on the same condition). This guarantee offers the benefit of not having to loop around waits to recheck that a condition is met. The main reason \CFA offers this guarantee is that users can easily introduce barging if it becomes a necessity but adding barging prevention or barging avoidance is more involved without language support. Supporting barging prevention as well as extending internal scheduling to multiple monitors is the main source of complexity in the design and implementation of \CFA concurrency.
+
+% ======================================================================
+% ======================================================================
+\subsection{Internal Scheduling - Multi-Monitor}
+% ======================================================================
+% ======================================================================
+It is easy to understand the problem of multi-monitor scheduling using a series of pseudo-code examples. Note that for simplicity in the following snippets of pseudo-code, waiting and signalling is done using an implicit condition variable, like Java built-in monitors. Indeed, \code{wait} statements always use the implicit condition variable as parameters and explicitly name the monitors (A and B) associated with the condition. Note that in \CFA, condition variables are tied to a \emph{group} of monitors on first use (called branding), which means that using internal scheduling with distinct sets of monitors requires one condition variable per set of monitors. The example below shows the simple case of having two threads (one for each column) and a single monitor A.
+
+\begin{multicols}{2}
+thread 1
+\begin{pseudo}
+acquire A
+	wait A
+release A
+\end{pseudo}
+
+\columnbreak
+
+thread 2
+\begin{pseudo}
+acquire A
+	signal A
+release A
+\end{pseudo}
+\end{multicols}
+One thread acquires before waiting (atomically blocking and releasing A) and the other acquires before signalling. It is important to note here that both \code{wait} and \code{signal} must be called with the proper monitor(s) already acquired. This semantic is a logical requirement for barging prevention.
+
+A direct extension of the previous example is a \gls{bulk-acq} version:
+\begin{multicols}{2}
+\begin{pseudo}
+acquire A & B
+	wait A & B
+release A & B
+\end{pseudo}
+\columnbreak
+\begin{pseudo}
+acquire A & B
+	signal A & B
+release A & B
+\end{pseudo}
+\end{multicols}
+\noindent This version uses \gls{bulk-acq} (denoted using the {\sf\&} symbol), but the presence of multiple monitors does not add a particularly new meaning. Synchronization happens between the two threads in exactly the same way and order. The only difference is that mutual exclusion covers a group of monitors. On the implementation side, handling multiple monitors does add a degree of complexity as the next few examples demonstrate.
+
+While deadlock issues can occur when nesting monitors, these issues are only a symptom of the fact that locks, and by extension monitors, are not perfectly composable. For monitors, a well-known deadlock problem is the Nested Monitor Problem~\cite{Lister77}, which occurs when a \code{wait} is made by a thread that holds more than one monitor. For example, the following pseudo-code runs into the nested-monitor problem:
+\begin{multicols}{2}
+\begin{pseudo}
+acquire A
+	acquire B
+		wait B
+	release B
+release A
+\end{pseudo}
+
+\columnbreak
+
+\begin{pseudo}
+acquire A
+	acquire B
+		signal B
+	release B
+release A
+\end{pseudo}
+\end{multicols}
+\noindent The \code{wait} only releases monitor \code{B} so the signalling thread cannot acquire monitor \code{A} to get to the \code{signal}. Attempting release of all acquired monitors at the \code{wait} introduces a different set of problems, such as releasing monitor \code{C}, which has nothing to do with the \code{signal}.
+
+However, for monitors as for locks, it is possible to write a program using nesting without encountering any problems if nesting is done correctly. For example, the next pseudo-code snippet acquires monitors {\sf A} then {\sf B} before waiting, while only acquiring {\sf B} when signalling, effectively avoiding the Nested Monitor Problem~\cite{Lister77}.
+
+\begin{multicols}{2}
+\begin{pseudo}
+acquire A
+	acquire B
+		wait B
+	release B
+release A
+\end{pseudo}
+
+\columnbreak
+
+\begin{pseudo}
+
+acquire B
+	signal B
+release B
+
+\end{pseudo}
+\end{multicols}
+
+\noindent However, this simple refactoring may not be possible, forcing more complex restructuring.
+
+% ======================================================================
+% ======================================================================
+\subsection{Internal Scheduling - In Depth}
+% ======================================================================
+% ======================================================================
+
+A larger example is presented to show complex issues for \gls{bulk-acq} and its implementation options are analyzed. Listing \ref{lst:int-bulk-pseudo} shows an example where \gls{bulk-acq} adds a significant layer of complexity to the internal signalling semantics, and listing \ref{lst:int-bulk-cfa} shows the corresponding \CFA code to implement the pseudo-code in listing \ref{lst:int-bulk-pseudo}. For the purpose of translating the given pseudo-code into \CFA-code, any method of introducing a monitor is acceptable, e.g., \code{mutex} parameters, global variables, pointer parameters, or using locals with the \code{mutex} statement.
+
+\begin{figure}[!t]
+\begin{multicols}{2}
+Waiting thread
+\begin{pseudo}[numbers=left]
+acquire A
+	//Code Section 1
+	acquire A & B
+		//Code Section 2
+		wait A & B
+		//Code Section 3
+	release A & B
+	//Code Section 4
+release A
+\end{pseudo}
+\columnbreak
+Signalling thread
+\begin{pseudo}[numbers=left, firstnumber=10,escapechar=|]
+acquire A
+	//Code Section 5
+	acquire A & B
+		//Code Section 6
+		|\label{line:signal1}|signal A & B
+		//Code Section 7
+	|\label{line:releaseFirst}|release A & B
+	//Code Section 8
+|\label{line:lastRelease}|release A
+\end{pseudo}
+\end{multicols}
+\begin{cfacode}[caption={Internal scheduling with \gls{bulk-acq}},label={lst:int-bulk-pseudo}]
+\end{cfacode}
+\begin{center}
+\begin{cfacode}[xleftmargin=.4\textwidth]
+monitor A a;
+monitor B b;
+condition c;
+\end{cfacode}
+\end{center}
+\begin{multicols}{2}
+Waiting thread
+\begin{cfacode}
+mutex(a) {
+	//Code Section 1
+	mutex(a, b) {
+		//Code Section 2
+		wait(c);
+		//Code Section 3
+	}
+	//Code Section 4
+}
+\end{cfacode}
+\columnbreak
+Signalling thread
+\begin{cfacode}
+mutex(a) {
+	//Code Section 5
+	mutex(a, b) {
+		//Code Section 6
+		signal(c);
+		//Code Section 7
+	}
+	//Code Section 8
+}
+\end{cfacode}
+\end{multicols}
+\begin{cfacode}[caption={Equivalent \CFA code for listing \ref{lst:int-bulk-pseudo}},label={lst:int-bulk-cfa}]
+\end{cfacode}
+\begin{multicols}{2}
+Waiter
+\begin{pseudo}[numbers=left]
+acquire A
+	acquire A & B
+		wait A & B
+	release A & B
+release A
+\end{pseudo}
+
+\columnbreak
+
+Signaller
+\begin{pseudo}[numbers=left, firstnumber=6,escapechar=|]
+acquire A
+	acquire A & B
+		signal A & B
+	release A & B
+	|\label{line:secret}|//Secretly keep B here
+release A
+//Wakeup waiter and transfer A & B
+\end{pseudo}
+\end{multicols}
+\begin{cfacode}[caption={Listing \ref{lst:int-bulk-pseudo}, with delayed signalling comments},label={lst:int-secret}]
+\end{cfacode}
+\end{figure}
+
+The complexity begins at code sections 4 and 8 in listing \ref{lst:int-bulk-pseudo}, which are where the existing semantics of internal scheduling needs to be extended for multiple monitors. The root of the problem is that \gls{bulk-acq} is used in a context where one of the monitors is already acquired, which is why it is important to define the behaviour of the previous pseudo-code. When the signaller thread reaches the location where it should ``release \code{A & B}'' (listing \ref{lst:int-bulk-pseudo} line \ref{line:releaseFirst}), it must actually transfer ownership of monitor \code{B} to the waiting thread. This ownership transfer is required in order to prevent barging into \code{B} by another thread, since both the signalling and signalled threads still need monitor \code{A}. There are three options:
+
+\subsubsection{Delaying Signals}
+The obvious solution to the problem of multi-monitor scheduling is to keep ownership of all locks until the last lock is ready to be transferred. It can be argued that that moment is when the last lock is no longer needed, because this semantics fits most closely to the behaviour of single-monitor scheduling. This solution has the main benefit of transferring ownership of groups of monitors, which simplifies the semantics from multiple objects to a single group of objects, effectively making the existing single-monitor semantic viable by simply changing monitors to monitor groups. This solution releases the monitors once every monitor in a group can be released. However, since some monitors are never released (e.g., the monitor of a thread), this interpretation means a group might never be released. A more interesting interpretation is to transfer the group until all its monitors are released, which means the group is not passed further and a thread can retain its locks.
+
+However, listing \ref{lst:int-secret} shows this solution can become much more complicated depending on what is executed while secretly holding B at line \ref{line:secret}, while avoiding the need to transfer ownership of a subset of the condition monitors. Listing \ref{lst:dependency} shows a slightly different example where a third thread is waiting on monitor \code{A}, using a different condition variable. Because the third thread is signalled when secretly holding \code{B}, the goal  becomes unreachable. Depending on the order of signals (listing \ref{lst:dependency} line \ref{line:signal-ab} and \ref{line:signal-a}) two cases can happen:
+
+\paragraph{Case 1: thread $\alpha$ goes first.} In this case, the problem is that monitor \code{A} needs to be passed to thread $\beta$ when thread $\alpha$ is done with it.
+\paragraph{Case 2: thread $\beta$ goes first.} In this case, the problem is that monitor \code{B} needs to be retained and passed to thread $\alpha$ along with monitor \code{A}, which can be done directly or possibly using thread $\beta$ as an intermediate.
+\\
+
+Note that ordering is not determined by a race condition but by whether signalled threads are enqueued in FIFO or FILO order. However, regardless of the answer, users can move line \ref{line:signal-a} before line \ref{line:signal-ab} and get the reverse effect for listing \ref{lst:dependency}.
+
+In both cases, the threads need to be able to distinguish, on a per monitor basis, which ones need to be released and which ones need to be transferred, which means knowing when to release a group becomes complex and inefficient (see next section) and therefore effectively precludes this approach.
+
+\subsubsection{Dependency graphs}
+
+
+\begin{figure}
+\begin{multicols}{3}
+Thread $\alpha$
+\begin{pseudo}[numbers=left, firstnumber=1]
+acquire A
+	acquire A & B
+		wait A & B
+	release A & B
+release A
+\end{pseudo}
+\columnbreak
+Thread $\gamma$
+\begin{pseudo}[numbers=left, firstnumber=6, escapechar=|]
+acquire A
+	acquire A & B
+		|\label{line:signal-ab}|signal A & B
+	|\label{line:release-ab}|release A & B
+	|\label{line:signal-a}|signal A
+|\label{line:release-a}|release A
+\end{pseudo}
+\columnbreak
+Thread $\beta$
+\begin{pseudo}[numbers=left, firstnumber=12, escapechar=|]
+acquire A
+	wait A
+|\label{line:release-aa}|release A
+\end{pseudo}
+\end{multicols}
+\begin{cfacode}[caption={Pseudo-code for the three thread example.},label={lst:dependency}]
+\end{cfacode}
+\begin{center}
+\input{dependency}
+\end{center}
+\caption{Dependency graph of the statements in listing \ref{lst:dependency}}
+\label{fig:dependency}
+\end{figure}
+
+In listing \ref{lst:int-bulk-pseudo}, there is a solution that satisfies both barging prevention and mutual exclusion. If ownership of both monitors is transferred to the waiter when the signaller releases \code{A & B} and then the waiter transfers back ownership of \code{A} back to the signaller when it releases it, then the problem is solved (\code{B} is no longer in use at this point). Dynamically finding the correct order is therefore the second possible solution. The problem is effectively resolving a dependency graph of ownership requirements. Here even the simplest of code snippets requires two transfers and has a super-linear complexity. This complexity can be seen in listing \ref{lst:explosion}, which is just a direct extension to three monitors, requires at least three ownership transfer and has multiple solutions. Furthermore, the presence of multiple solutions for ownership transfer can cause deadlock problems if a specific solution is not consistently picked; In the same way that multiple lock acquiring order can cause deadlocks.
+\begin{figure}
+\begin{multicols}{2}
+\begin{pseudo}
+acquire A
+	acquire B
+		acquire C
+			wait A & B & C
+		release C
+	release B
+release A
+\end{pseudo}
+
+\columnbreak
+
+\begin{pseudo}
+acquire A
+	acquire B
+		acquire C
+			signal A & B & C
+		release C
+	release B
+release A
+\end{pseudo}
+\end{multicols}
+\begin{cfacode}[caption={Extension to three monitors of listing \ref{lst:int-bulk-pseudo}},label={lst:explosion}]
+\end{cfacode}
+\end{figure}
+
+Given the three threads example in listing \ref{lst:dependency}, figure \ref{fig:dependency} shows the corresponding dependency graph that results, where every node is a statement of one of the three threads, and the arrows the dependency of that statement (e.g., $\alpha1$ must happen before $\alpha2$). The extra challenge is that this dependency graph is effectively post-mortem, but the runtime system needs to be able to build and solve these graphs as the dependencies unfold. Resolving dependency graphs being a complex and expensive endeavour, this solution is not the preferred one.
+
+\subsubsection{Partial Signalling} \label{partial-sig}
+Finally, the solution that is chosen for \CFA is to use partial signalling. Again using listing \ref{lst:int-bulk-pseudo}, the partial signalling solution transfers ownership of monitor \code{B} at lines \ref{line:signal1} to the waiter but does not wake the waiting thread since it is still using monitor \code{A}. Only when it reaches line \ref{line:lastRelease} does it actually wake up the waiting thread. This solution has the benefit that complexity is encapsulated into only two actions: passing monitors to the next owner when they should be released and conditionally waking threads if all conditions are met. This solution has a much simpler implementation than a dependency graph solving algorithms, which is why it was chosen. Furthermore, after being fully implemented, this solution does not appear to have any significant downsides.
+
+Using partial signalling, listing \ref{lst:dependency} can be solved easily:
+\begin{itemize}
+	\item When thread $\gamma$ reaches line \ref{line:release-ab} it transfers monitor \code{B} to thread $\alpha$ and continues to hold monitor \code{A}.
+	\item When thread $\gamma$ reaches line \ref{line:release-a}  it transfers monitor \code{A} to thread $\beta$  and wakes it up.
+	\item When thread $\beta$  reaches line \ref{line:release-aa} it transfers monitor \code{A} to thread $\alpha$ and wakes it up.
+\end{itemize}
+
+% ======================================================================
+% ======================================================================
+\subsection{Signalling: Now or Later}
+% ======================================================================
+% ======================================================================
+\begin{table}
+\begin{tabular}{|c|c|}
+\code{signal} & \code{signal_block} \\
+\hline
+\begin{cfacode}[tabsize=3]
+monitor DatingService
+{
+	//compatibility codes
+	enum{ CCodes = 20 };
+
+	int girlPhoneNo
+	int boyPhoneNo;
+};
+
+condition girls[CCodes];
+condition boys [CCodes];
+condition exchange;
+
+int girl(int phoneNo, int ccode)
+{
+	//no compatible boy ?
+	if(empty(boys[ccode]))
+	{
+		//wait for boy
+		wait(girls[ccode]);
+
+		//make phone number available
+		girlPhoneNo = phoneNo;
+
+		//wake boy from chair
+		signal(exchange);
+	}
+	else
+	{
+		//make phone number available
+		girlPhoneNo = phoneNo;
+
+		//wake boy
+		signal(boys[ccode]);
+
+		//sit in chair
+		wait(exchange);
+	}
+	return boyPhoneNo;
+}
+
+int boy(int phoneNo, int ccode)
+{
+	//same as above
+	//with boy/girl interchanged
+}
+\end{cfacode}&\begin{cfacode}[tabsize=3]
+monitor DatingService
+{
+	//compatibility codes
+	enum{ CCodes = 20 };
+
+	int girlPhoneNo;
+	int boyPhoneNo;
+};
+
+condition girls[CCodes];
+condition boys [CCodes];
+//exchange is not needed
+
+int girl(int phoneNo, int ccode)
+{
+	//no compatible boy ?
+	if(empty(boys[ccode]))
+	{
+		//wait for boy
+		wait(girls[ccode]);
+
+		//make phone number available
+		girlPhoneNo = phoneNo;
+
+		//wake boy from chair
+		signal(exchange);
+	}
+	else
+	{
+		//make phone number available
+		girlPhoneNo = phoneNo;
+
+		//wake boy
+		signal_block(boys[ccode]);
+
+		//second handshake unnecessary
+
+	}
+	return boyPhoneNo;
+}
+
+int boy(int phoneNo, int ccode)
+{
+	//same as above
+	//with boy/girl interchanged
+}
+\end{cfacode}
+\end{tabular}
+\caption{Dating service example using \code{signal} and \code{signal_block}. }
+\label{tbl:datingservice}
+\end{table}
+An important note is that, until now, signalling a monitor was a delayed operation. The ownership of the monitor is transferred only when the monitor would have otherwise been released, not at the point of the \code{signal} statement. However, in some cases, it may be more convenient for users to immediately transfer ownership to the thread that is waiting for cooperation, which is achieved using the \code{signal_block} routine.
+
+The example in table \ref{tbl:datingservice} highlights the difference in behaviour. As mentioned, \code{signal} only transfers ownership once the current critical section exits; this behaviour requires additional synchronization when a two-way handshake is needed. To avoid this explicit synchronization, the \code{condition} type offers the \code{signal_block} routine, which handles the two-way handshake as shown in the example. This feature removes the need for a second condition variables and simplifies programming. Like every other monitor semantic, \code{signal_block} uses barging prevention, which means mutual-exclusion is baton-passed both on the front end and the back end of the call to \code{signal_block}, meaning no other thread can acquire the monitor either before or after the call.
+
+% ======================================================================
+% ======================================================================
+\section{External scheduling} \label{extsched}
+% ======================================================================
+% ======================================================================
+An alternative to internal scheduling is external scheduling (see Table~\ref{tbl:sched}).
+\begin{table}
+\begin{tabular}{|c|c|c|}
+Internal Scheduling & External Scheduling & Go\\
+\hline
+\begin{ucppcode}[tabsize=3]
+_Monitor Semaphore {
+	condition c;
+	bool inUse;
+public:
+	void P() {
+		if(inUse)
+			wait(c);
+		inUse = true;
+	}
+	void V() {
+		inUse = false;
+		signal(c);
+	}
+}
+\end{ucppcode}&\begin{ucppcode}[tabsize=3]
+_Monitor Semaphore {
+
+	bool inUse;
+public:
+	void P() {
+		if(inUse)
+			_Accept(V);
+		inUse = true;
+	}
+	void V() {
+		inUse = false;
+
+	}
+}
+\end{ucppcode}&\begin{gocode}[tabsize=3]
+type MySem struct {
+	inUse bool
+	c     chan bool
+}
+
+// acquire
+func (s MySem) P() {
+	if s.inUse {
+		select {
+		case <-s.c:
+		}
+	}
+	s.inUse = true
+}
+
+// release
+func (s MySem) V() {
+	s.inUse = false
+
+	//This actually deadlocks
+	//when single thread
+	s.c <- false
+}
+\end{gocode}
+\end{tabular}
+\caption{Different forms of scheduling.}
+\label{tbl:sched}
+\end{table}
+This method is more constrained and explicit, which helps users reduce the non-deterministic nature of concurrency. Indeed, as the following examples demonstrate, external scheduling allows users to wait for events from other threads without the concern of unrelated events occurring. External scheduling can generally be done either in terms of control flow (e.g., Ada with \code{accept}, \uC with \code{_Accept}) or in terms of data (e.g., Go with channels). Of course, both of these paradigms have their own strengths and weaknesses, but for this project, control-flow semantics was chosen to stay consistent with the rest of the languages semantics. Two challenges specific to \CFA arise when trying to add external scheduling with loose object definitions and multiple-monitor routines. The previous example shows a simple use \code{_Accept} versus \code{wait}/\code{signal} and its advantages. Note that while other languages often use \code{accept}/\code{select} as the core external scheduling keyword, \CFA uses \code{waitfor} to prevent name collisions with existing socket \acrshort{api}s.
+
+For the \code{P} member above using internal scheduling, the call to \code{wait} only guarantees that \code{V} is the last routine to access the monitor, allowing a third routine, say \code{isInUse()}, acquire mutual exclusion several times while routine \code{P} is waiting. On the other hand, external scheduling guarantees that while routine \code{P} is waiting, no other routine than \code{V} can acquire the monitor.
+
+% ======================================================================
+% ======================================================================
+\subsection{Loose Object Definitions}
+% ======================================================================
+% ======================================================================
+In \uC, a monitor class declaration includes an exhaustive list of monitor operations. Since \CFA is not object oriented, monitors become both more difficult to implement and less clear for a user:
+
+\begin{cfacode}
+monitor A {};
+
+void f(A & mutex a);
+void g(A & mutex a) {
+	waitfor(f); //Obvious which f() to wait for
+}
+
+void f(A & mutex a, int); //New different F added in scope
+void h(A & mutex a) {
+	waitfor(f); //Less obvious which f() to wait for
+}
+\end{cfacode}
+
+Furthermore, external scheduling is an example where implementation constraints become visible from the interface. Here is the pseudo-code for the entering phase of a monitor:
+\begin{center}
+\begin{tabular}{l}
+\begin{pseudo}
+	if monitor is free
+		enter
+	elif already own the monitor
+		continue
+	elif monitor accepts me
+		enter
+	else
+		block
+\end{pseudo}
+\end{tabular}
+\end{center}
+For the first two conditions, it is easy to implement a check that can evaluate the condition in a few instructions. However, a fast check for \pscode{monitor accepts me} is much harder to implement depending on the constraints put on the monitors. Indeed, monitors are often expressed as an entry queue and some acceptor queue as in Figure~\ref{fig:ClassicalMonitor}.
+
+\begin{figure}
+\centering
+\subfloat[Classical Monitor] {
+\label{fig:ClassicalMonitor}
+{\resizebox{0.45\textwidth}{!}{\input{monitor}}}
+}% subfloat
+\qquad
+\subfloat[\Gls{bulk-acq} Monitor] {
+\label{fig:BulkMonitor}
+{\resizebox{0.45\textwidth}{!}{\input{ext_monitor}}}
+}% subfloat
+\caption{External Scheduling Monitor}
+\end{figure}
+
+There are other alternatives to these pictures, but in the case of the left picture, implementing a fast accept check is relatively easy. Restricted to a fixed number of mutex members, N, the accept check reduces to updating a bitmask when the acceptor queue changes, a check that executes in a single instruction even with a fairly large number (e.g., 128) of mutex members. This approach requires a unique dense ordering of routines with an upper-bound and that ordering must be consistent across translation units. For OO languages these constraints are common, since objects only offer adding member routines consistently across translation units via inheritance. However, in \CFA users can extend objects with mutex routines that are only visible in certain translation unit. This means that establishing a program-wide dense-ordering among mutex routines can only be done in the program linking phase, and still could have issues when using dynamically shared objects.
+
+The alternative is to alter the implementation as in Figure~\ref{fig:BulkMonitor}.
+Here, the mutex routine called is associated with a thread on the entry queue while a list of acceptable routines is kept separate. Generating a mask dynamically means that the storage for the mask information can vary between calls to \code{waitfor}, allowing for more flexibility and extensions. Storing an array of accepted function pointers replaces the single instruction bitmask comparison with dereferencing a pointer followed by a linear search. Furthermore, supporting nested external scheduling (e.g., listing \ref{lst:nest-ext}) may now require additional searches for the \code{waitfor} statement to check if a routine is already queued.
+
+\begin{figure}
+\begin{cfacode}[caption={Example of nested external scheduling},label={lst:nest-ext}]
+monitor M {};
+void foo( M & mutex a ) {}
+void bar( M & mutex b ) {
+	//Nested in the waitfor(bar, c) call
+	waitfor(foo, b);
+}
+void baz( M & mutex c ) {
+	waitfor(bar, c);
+}
+
+\end{cfacode}
+\end{figure}
+
+Note that in the right picture, tasks need to always keep track of the monitors associated with mutex routines, and the routine mask needs to have both a function pointer and a set of monitors, as is discussed in the next section. These details are omitted from the picture for the sake of simplicity.
+
+At this point, a decision must be made between flexibility and performance. Many design decisions in \CFA achieve both flexibility and performance, for example polymorphic routines add significant flexibility but inlining them means the optimizer can easily remove any runtime cost. Here, however, the cost of flexibility cannot be trivially removed. In the end, the most flexible approach has been chosen since it allows users to write programs that would otherwise be  hard to write. This decision is based on the assumption that writing fast but inflexible locks is closer to a solved problem than writing locks that are as flexible as external scheduling in \CFA.
+
+% ======================================================================
+% ======================================================================
+\subsection{Multi-Monitor Scheduling}
+% ======================================================================
+% ======================================================================
+
+External scheduling, like internal scheduling, becomes significantly more complex when introducing multi-monitor syntax. Even in the simplest possible case, some new semantics needs to be established:
+\begin{cfacode}
+monitor M {};
+
+void f(M & mutex a);
+
+void g(M & mutex b, M & mutex c) {
+	waitfor(f); //two monitors M => unknown which to pass to f(M & mutex)
+}
+\end{cfacode}
+The obvious solution is to specify the correct monitor as follows:
+
+\begin{cfacode}
+monitor M {};
+
+void f(M & mutex a);
+
+void g(M & mutex a, M & mutex b) {
+	//wait for call to f with argument b
+	waitfor(f, b);
+}
+\end{cfacode}
+This syntax is unambiguous. Both locks are acquired and kept by \code{g}. When routine \code{f} is called, the lock for monitor \code{b} is temporarily transferred from \code{g} to \code{f} (while \code{g} still holds lock \code{a}). This behaviour can be extended to the multi-monitor \code{waitfor} statement as follows.
+
+\begin{cfacode}
+monitor M {};
+
+void f(M & mutex a, M & mutex b);
+
+void g(M & mutex a, M & mutex b) {
+	//wait for call to f with arguments a and b
+	waitfor(f, a, b);
+}
+\end{cfacode}
+
+Note that the set of monitors passed to the \code{waitfor} statement must be entirely contained in the set of monitors already acquired in the routine. \code{waitfor} used in any other context is undefined behaviour.
+
+An important behaviour to note is when a set of monitors only match partially:
+
+\begin{cfacode}
+mutex struct A {};
+
+mutex struct B {};
+
+void g(A & mutex a, B & mutex b) {
+	waitfor(f, a, b);
+}
+
+A a1, a2;
+B b;
+
+void foo() {
+	g(a1, b); //block on accept
+}
+
+void bar() {
+	f(a2, b); //fulfill cooperation
+}
+\end{cfacode}
+While the equivalent can happen when using internal scheduling, the fact that conditions are specific to a set of monitors means that users have to use two different condition variables. In both cases, partially matching monitor sets does not wakeup the waiting thread. It is also important to note that in the case of external scheduling the order of parameters is irrelevant; \code{waitfor(f,a,b)} and \code{waitfor(f,b,a)} are indistinguishable waiting condition.
+
+% ======================================================================
+% ======================================================================
+\subsection{\code{waitfor} Semantics}
+% ======================================================================
+% ======================================================================
+
+Syntactically, the \code{waitfor} statement takes a function identifier and a set of monitors. While the set of monitors can be any list of expressions, the function name is more restricted because the compiler validates at compile time the validity of the function type and the parameters used with the \code{waitfor} statement. It checks that the set of monitors passed in matches the requirements for a function call. Listing \ref{lst:waitfor} shows various usages of the waitfor statement and which are acceptable. The choice of the function type is made ignoring any non-\code{mutex} parameter. One limitation of the current implementation is that it does not handle overloading, but overloading is possible.
+\begin{figure}
+\begin{cfacode}[caption={Various correct and incorrect uses of the waitfor statement},label={lst:waitfor}]
+monitor A{};
+monitor B{};
+
+void f1( A & mutex );
+void f2( A & mutex, B & mutex );
+void f3( A & mutex, int );
+void f4( A & mutex, int );
+void f4( A & mutex, double );
+
+void foo( A & mutex a1, A & mutex a2, B & mutex b1, B & b2 ) {
+	A * ap = & a1;
+	void (*fp)( A & mutex ) = f1;
+
+	waitfor(f1, a1);     //Correct : 1 monitor case
+	waitfor(f2, a1, b1); //Correct : 2 monitor case
+	waitfor(f3, a1);     //Correct : non-mutex arguments are ignored
+	waitfor(f1, *ap);    //Correct : expression as argument
+
+	waitfor(f1, a1, b1); //Incorrect : Too many mutex arguments
+	waitfor(f2, a1);     //Incorrect : Too few mutex arguments
+	waitfor(f2, a1, a2); //Incorrect : Mutex arguments don't match
+	waitfor(f1, 1);      //Incorrect : 1 not a mutex argument
+	waitfor(f9, a1);     //Incorrect : f9 function does not exist
+	waitfor(*fp, a1 );   //Incorrect : fp not an identifier
+	waitfor(f4, a1);     //Incorrect : f4 ambiguous
+
+	waitfor(f2, a1, b2); //Undefined behaviour : b2 not mutex
+}
+\end{cfacode}
+\end{figure}
+
+Finally, for added flexibility, \CFA supports constructing a complex \code{waitfor} statement using the \code{or}, \code{timeout} and \code{else}. Indeed, multiple \code{waitfor} clauses can be chained together using \code{or}; this chain forms a single statement that uses baton pass to any function that fits one of the function+monitor set passed in. To enable users to tell which accepted function executed, \code{waitfor}s are followed by a statement (including the null statement \code{;}) or a compound statement, which is executed after the clause is triggered. A \code{waitfor} chain can also be followed by a \code{timeout}, to signify an upper bound on the wait, or an \code{else}, to signify that the call should be non-blocking, which checks for a matching function call already arrived and otherwise continues. Any and all of these clauses can be preceded by a \code{when} condition to dynamically toggle the accept clauses on or off based on some current state. Listing \ref{lst:waitfor2} demonstrates several complex masks and some incorrect ones.
+
+\begin{figure}
+\begin{cfacode}[caption={Various correct and incorrect uses of the or, else, and timeout clause around a waitfor statement},label={lst:waitfor2}]
+monitor A{};
+
+void f1( A & mutex );
+void f2( A & mutex );
+
+void foo( A & mutex a, bool b, int t ) {
+	//Correct : blocking case
+	waitfor(f1, a);
+
+	//Correct : block with statement
+	waitfor(f1, a) {
+		sout | "f1" | endl;
+	}
+
+	//Correct : block waiting for f1 or f2
+	waitfor(f1, a) {
+		sout | "f1" | endl;
+	} or waitfor(f2, a) {
+		sout | "f2" | endl;
+	}
+
+	//Correct : non-blocking case
+	waitfor(f1, a); or else;
+
+	//Correct : non-blocking case
+	waitfor(f1, a) {
+		sout | "blocked" | endl;
+	} or else {
+		sout | "didn't block" | endl;
+	}
+
+	//Correct : block at most 10 seconds
+	waitfor(f1, a) {
+		sout | "blocked" | endl;
+	} or timeout( 10`s) {
+		sout | "didn't block" | endl;
+	}
+
+	//Correct : block only if b == true
+	//if b == false, don't even make the call
+	when(b) waitfor(f1, a);
+
+	//Correct : block only if b == true
+	//if b == false, make non-blocking call
+	waitfor(f1, a); or when(!b) else;
+
+	//Correct : block only of t > 1
+	waitfor(f1, a); or when(t > 1) timeout(t); or else;
+
+	//Incorrect : timeout clause is dead code
+	waitfor(f1, a); or timeout(t); or else;
+
+	//Incorrect : order must be
+	//waitfor [or waitfor... [or timeout] [or else]]
+	timeout(t); or waitfor(f1, a); or else;
+}
+\end{cfacode}
+\end{figure}
+
+% ======================================================================
+% ======================================================================
+\subsection{Waiting For The Destructor}
+% ======================================================================
+% ======================================================================
+An interesting use for the \code{waitfor} statement is destructor semantics. Indeed, the \code{waitfor} statement can accept any \code{mutex} routine, which includes the destructor (see section \ref{data}). However, with the semantics discussed until now, waiting for the destructor does not make any sense, since using an object after its destructor is called is undefined behaviour. The simplest approach is to disallow \code{waitfor} on a destructor. However, a more expressive approach is to flip ordering of execution when waiting for the destructor, meaning that waiting for the destructor allows the destructor to run after the current \code{mutex} routine, similarly to how a condition is signalled.
+\begin{figure}
+\begin{cfacode}[caption={Example of an executor which executes action in series until the destructor is called.},label={lst:dtor-order}]
+monitor Executer {};
+struct  Action;
+
+void ^?{}   (Executer & mutex this);
+void execute(Executer & mutex this, const Action & );
+void run    (Executer & mutex this) {
+	while(true) {
+		   waitfor(execute, this);
+		or waitfor(^?{}   , this) {
+			break;
+		}
+	}
+}
+\end{cfacode}
+\end{figure}
+For example, listing \ref{lst:dtor-order} shows an example of an executor with an infinite loop, which waits for the destructor to break out of this loop. Switching the semantic meaning introduces an idiomatic way to terminate a task and/or wait for its termination via destruction.
Index: doc/theses/thierry_delisle/text/frontpgs.tex
===================================================================
--- doc/theses/thierry_delisle/text/frontpgs.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/text/frontpgs.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,165 @@
+% T I T L E   P A G E
+% -------------------
+% Last updated May 24, 2011, by Stephen Carr, IST-Client Services
+% The title page is counted as page `i' but we need to suppress the
+% page number.  We also don't want any headers or footers.
+\pagestyle{empty}
+\pagenumbering{roman}
+
+% The contents of the title page are specified in the "titlepage"
+% environment.
+\begin{titlepage}
+        \begin{center}
+        \vspace*{1.0cm}
+
+        \Huge
+        {\bf Concurrency in \CFA}
+
+        \vspace*{1.0cm}
+
+        \normalsize
+        by \\
+
+        \vspace*{1.0cm}
+
+        \Large
+        Thierry Delisle \\
+
+        \vspace*{3.0cm}
+
+        \normalsize
+        A thesis \\
+        presented to the University of Waterloo \\
+        in fulfillment of the \\
+        thesis requirement for the degree of \\
+        Master of Mathematics \\
+        in \\
+        Computer Science \\
+
+        \vspace*{2.0cm}
+
+        Waterloo, Ontario, Canada, 2018 \\
+
+        \vspace*{1.0cm}
+
+        \copyright\ Thierry Delisle 2018 \\
+        \end{center}
+\end{titlepage}
+
+% The rest of the front pages should contain no headers and be numbered using Roman numerals starting with `ii'
+\pagestyle{plain}
+\setcounter{page}{2}
+
+\cleardoublepage % Ends the current page and causes all figures and tables that have so far appeared in the input to be printed.
+% In a two-sided printing style, it also makes the next page a right-hand (odd-numbered) page, producing a blank page if necessary.
+
+
+
+% D E C L A R A T I O N   P A G E
+% -------------------------------
+  % The following is the sample Delaration Page as provided by the GSO
+  % December 13th, 2006.  It is designed for an electronic thesis.
+  \noindent
+%I hereby declare that I am the sole author of this thesis. This is a true copy of the thesis, including any required final revisions, as accepted by my examiners.
+
+  \bigskip
+
+  \noindent
+
+  I hereby declare that I am the sole author of this thesis. This is a true copy of the thesis, including any required final revisions, as accepted by my examiners.
+
+
+  I understand that my thesis may be made electronically available to the public.
+
+\cleardoublepage
+%\newpage
+
+% A B S T R A C T
+% ---------------
+
+\begin{center}\textbf{Abstract}\end{center}
+
+\CFA is a modern, non-object-oriented extension of the C programming language. This thesis serves as a definition and an implementation for the concurrency and parallelism \CFA offers. These features are created from scratch due to the lack of concurrency in ISO C. Lightweight threads are introduced into the language. In addition, monitors are introduced as a high-level tool for control-flow based synchronization and mutual-exclusion. The main contributions of this thesis are two-fold: it extends the existing semantics of monitors introduce by~\cite{Hoare74} to handle monitors in groups and also details the engineering effort needed to introduce these features as core language features. Indeed, these features are added with respect to expectations of C programmers, and integrate with the \CFA type-system and other language features.
+
+
+\cleardoublepage
+%\newpage
+
+% A C K N O W L E D G E M E N T S
+% -------------------------------
+
+\begin{center}\textbf{Acknowledgements}\end{center}
+
+I would like to thank my supervisor, Professor Peter Buhr, for his guidance through my degree as well as the editing of this document.
+
+I would like to thank Professors Martin Karsten and Gregor Richards, for reading my thesis and providing helpful feedback.
+
+Thanks to Aaron Moss, Rob Schluntz and Andrew Beach for their work on the \CFA project as well as all the discussions which have helped me concretize the ideas in this thesis.
+
+Finally, I acknowledge that this has been possible thanks to the financial help offered by the David R. Cheriton School of Computer Science and the corporate partnership with Huawei Ltd.
+
+\cleardoublepage
+%\newpage
+
+% % D E D I C A T I O N
+% % -------------------
+
+% \begin{center}\textbf{Dedication}\end{center}
+
+% % This is dedicated to the one I love.
+% TODO
+% \cleardoublepage
+% %\newpage
+
+% T A B L E   O F   C O N T E N T S
+% ---------------------------------
+\renewcommand\contentsname{Table of Contents}
+\tableofcontents
+\cleardoublepage
+\phantomsection
+%\newpage
+
+% L I S T   O F   T A B L E S
+% ---------------------------
+\addcontentsline{toc}{chapter}{List of Tables}
+\listoftables
+\cleardoublepage
+\phantomsection		% allows hyperref to link to the correct page
+%\newpage
+
+% L I S T   O F   F I G U R E S
+% -----------------------------
+\addcontentsline{toc}{chapter}{List of Figures}
+\listoffigures
+\cleardoublepage
+\phantomsection		% allows hyperref to link to the correct page
+%\newpage
+
+% L I S T   O F   L I S T I N G S
+% -----------------------------
+\addcontentsline{toc}{chapter}{List of Listings}
+\lstlistoflistings
+\cleardoublepage
+\phantomsection		% allows hyperref to link to the correct page
+%\newpage
+
+% L I S T   O F   S Y M B O L S
+% -----------------------------
+% To include a Nomenclature section
+% \addcontentsline{toc}{chapter}{\textbf{Nomenclature}}
+% \renewcommand{\nomname}{Nomenclature}
+% \printglossary
+% \cleardoublepage
+% \phantomsection % allows hyperref to link to the correct page
+% \newpage
+
+% L I S T   O F   T A B L E S
+% -----------------------------
+\addcontentsline{toc}{chapter}{List of Acronyms}
+\printglossary[type=\acronymtype,title={List of Acronyms}]
+\cleardoublepage
+\phantomsection		% allows hyperref to link to the correct page
+
+% Change page numbering back to Arabic numerals
+\pagenumbering{arabic}
+
Index: doc/theses/thierry_delisle/text/future.tex
===================================================================
--- doc/theses/thierry_delisle/text/future.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/text/future.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,112 @@
+
+\chapter{Conclusion}
+This thesis has achieved a minimal concurrency \acrshort{api} that is simple, efficient and usable as the basis for higher-level features. The approach presented is based on a lightweight thread-system for parallelism, which sits on top of clusters of processors. This M:N model is judged to be both more efficient and allow more flexibility for users. Furthermore, this document introduces monitors as the main concurrency tool for users. This thesis also offers a novel approach allowing multiple monitors to be accessed simultaneously without running into the Nested Monitor Problem~\cite{Lister77}. It also offers a full implementation of the concurrency runtime written entirely in \CFA, effectively the largest \CFA code base to date.
+
+
+% ======================================================================
+% ======================================================================
+\section{Future Work}
+% ======================================================================
+% ======================================================================
+
+\subsection{Performance} \label{futur:perf}
+This thesis presents a first implementation of the \CFA concurrency runtime. Therefore, there is still significant work to improve performance. Many of the data structures and algorithms may change in the future to more efficient versions. For example, the number of monitors in a single \gls{bulk-acq} is only bound by the stack size, this is probably unnecessarily generous. It may be possible that limiting the number helps increase performance. However, it is not obvious that the benefit would be significant.
+
+\subsection{Flexible Scheduling} \label{futur:sched}
+An important part of concurrency is scheduling. Different scheduling algorithms can affect performance (both in terms of average and variation). However, no single scheduler is optimal for all workloads and therefore there is value in being able to change the scheduler for given programs. One solution is to offer various tweaking options to users, allowing the scheduler to be adjusted to the requirements of the workload. However, in order to be truly flexible, it would be interesting to allow users to add arbitrary data and arbitrary scheduling algorithms. For example, a web server could attach Type-of-Service information to threads and have a ``ToS aware'' scheduling algorithm tailored to this specific web server. This path of flexible schedulers will be explored for \CFA.
+
+\subsection{Non-Blocking I/O} \label{futur:nbio}
+While most of the parallelism tools are aimed at data parallelism and control-flow parallelism, many modern workloads are not bound on computation but on IO operations, a common case being web servers and XaaS (anything as a service). These types of workloads often require significant engineering around amortizing costs of blocking IO operations. At its core, non-blocking I/O is an operating system level feature that allows queuing IO operations (e.g., network operations) and registering for notifications instead of waiting for requests to complete. In this context, the role of the language makes Non-Blocking IO easily available and with low overhead. The current trend is to use asynchronous programming using tools like callbacks and/or futures and promises, which can be seen in frameworks like Node.js~\cite{NodeJs} for JavaScript, Spring MVC~\cite{SpringMVC} for Java and Django~\cite{Django} for Python. However, while these are valid solutions, they lead to code that is harder to read and maintain because it is much less linear.
+
+\subsection{Other Concurrency Tools} \label{futur:tools}
+While monitors offer a flexible and powerful concurrent core for \CFA, other concurrency tools are also necessary for a complete multi-paradigm concurrency package. Examples of such tools can include simple locks and condition variables, futures and promises~\cite{promises}, executors and actors. These additional features are useful when monitors offer a level of abstraction that is inadequate for certain tasks.
+
+\subsection{Implicit Threading} \label{futur:implcit}
+Simpler applications can benefit greatly from having implicit parallelism. That is, parallelism that does not rely on the user to write concurrency. This type of parallelism can be achieved both at the language level and at the library level. The canonical example of implicit parallelism is parallel for loops, which are the simplest example of a divide and conquer algorithms~\cite{uC++book}. Table \ref{lst:parfor} shows three different code examples that accomplish point-wise sums of large arrays. Note that none of these examples explicitly declare any concurrency or parallelism objects.
+
+\begin{table}
+\begin{center}
+\begin{tabular}[t]{|c|c|c|}
+Sequential & Library Parallel & Language Parallel \\
+\begin{cfacode}[tabsize=3]
+void big_sum(
+	int* a, int* b,
+	int* o,
+	size_t len)
+{
+	for(
+		int i = 0;
+		i < len;
+		++i )
+	{
+		o[i]=a[i]+b[i];
+	}
+}
+
+
+
+
+
+int* a[10000];
+int* b[10000];
+int* c[10000];
+//... fill in a & b
+big_sum(a,b,c,10000);
+\end{cfacode} &\begin{cfacode}[tabsize=3]
+void big_sum(
+	int* a, int* b,
+	int* o,
+	size_t len)
+{
+	range ar(a, a+len);
+	range br(b, b+len);
+	range or(o, o+len);
+	parfor( ai, bi, oi,
+	[](	int* ai,
+		int* bi,
+		int* oi)
+	{
+		oi=ai+bi;
+	});
+}
+
+
+int* a[10000];
+int* b[10000];
+int* c[10000];
+//... fill in a & b
+big_sum(a,b,c,10000);
+\end{cfacode}&\begin{cfacode}[tabsize=3]
+void big_sum(
+	int* a, int* b,
+	int* o,
+	size_t len)
+{
+	parfor (ai,bi,oi)
+	    in (a, b, o )
+	{
+		oi = ai + bi;
+	}
+}
+
+
+
+
+
+
+
+int* a[10000];
+int* b[10000];
+int* c[10000];
+//... fill in a & b
+big_sum(a,b,c,10000);
+\end{cfacode}
+\end{tabular}
+\end{center}
+\caption{For loop to sum numbers: Sequential, using library parallelism and language parallelism.}
+\label{lst:parfor}
+\end{table}
+
+Implicit parallelism is a restrictive solution and therefore has its limitations. However, it is a quick and simple approach to parallelism, which may very well be sufficient for smaller applications and reduces the amount of boilerplate needed to start benefiting from parallelism in modern CPUs.
+
+
Index: doc/theses/thierry_delisle/text/internals.tex
===================================================================
--- doc/theses/thierry_delisle/text/internals.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/text/internals.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,326 @@
+
+\chapter{Behind the Scenes}
+There are several challenges specific to \CFA when implementing concurrency. These challenges are a direct result of \gls{bulk-acq} and loose object definitions. These two constraints are the root cause of most design decisions in the implementation. Furthermore, to avoid contention from dynamically allocating memory in a concurrent environment, the internal-scheduling design is (almost) entirely free of mallocs. This approach avoids the chicken and egg problem~\cite{Chicken} of having a memory allocator that relies on the threading system and a threading system that relies on the runtime. This extra goal means that memory management is a constant concern in the design of the system.
+
+The main memory concern for concurrency is queues. All blocking operations are made by parking threads onto queues and all queues are designed with intrusive nodes, where each node has pre-allocated link fields for chaining, to avoid the need for memory allocation. Since several concurrency operations can use an unbound amount of memory (depending on \gls{bulk-acq}), statically defining information in the intrusive fields of threads is insufficient.The only way to use a variable amount of memory without requiring memory allocation is to pre-allocate large buffers of memory eagerly and store the information in these buffers. Conveniently, the call stack fits that description and is easy to use, which is why it is used heavily in the implementation of internal scheduling, particularly variable-length arrays. Since stack allocation is based on scopes, the first step of the implementation is to identify the scopes that are available to store the information, and which of these can have a variable-length array. The threads and the condition both have a fixed amount of memory, while \code{mutex} routines and blocking calls allow for an unbound amount, within the stack size.
+
+Note that since the major contributions of this thesis are extending monitor semantics to \gls{bulk-acq} and loose object definitions, any challenges that are not resulting of these characteristics of \CFA are considered as solved problems and therefore not discussed.
+
+% ======================================================================
+% ======================================================================
+\section{Mutex Routines}
+% ======================================================================
+% ======================================================================
+
+The first step towards the monitor implementation is simple \code{mutex} routines. In the single monitor case, mutual-exclusion is done using the entry/exit procedure in listing \ref{lst:entry1}. The entry/exit procedures do not have to be extended to support multiple monitors. Indeed it is sufficient to enter/leave monitors one-by-one as long as the order is correct to prevent deadlock~\cite{Havender68}. In \CFA, ordering of monitor acquisition relies on memory ordering. This approach is sufficient because all objects are guaranteed to have distinct non-overlapping memory layouts and mutual-exclusion for a monitor is only defined for its lifetime, meaning that destroying a monitor while it is acquired is undefined behaviour. When a mutex call is made, the concerned monitors are aggregated into a variable-length pointer array and sorted based on pointer values. This array persists for the entire duration of the mutual-exclusion and its ordering reused extensively.
+\begin{figure}
+\begin{multicols}{2}
+Entry
+\begin{pseudo}
+if monitor is free
+	enter
+elif already own the monitor
+	continue
+else
+	block
+increment recursions
+\end{pseudo}
+\columnbreak
+Exit
+\begin{pseudo}
+decrement recursion
+if recursion == 0
+	if entry queue not empty
+		wake-up thread
+\end{pseudo}
+\end{multicols}
+\begin{pseudo}[caption={Initial entry and exit routine for monitors},label={lst:entry1}]
+\end{pseudo}
+\end{figure}
+
+\subsection{Details: Interaction with polymorphism}
+Depending on the choice of semantics for when monitor locks are acquired, interaction between monitors and \CFA's concept of polymorphism can be more complex to support. However, it is shown that entry-point locking solves most of the issues.
+
+First of all, interaction between \code{otype} polymorphism (see Section~\ref{s:ParametricPolymorphism}) and monitors is impossible since monitors do not support copying. Therefore, the main question is how to support \code{dtype} polymorphism. It is important to present the difference between the two acquiring options: \glspl{callsite-locking} and entry-point locking, i.e., acquiring the monitors before making a mutex routine-call or as the first operation of the mutex routine-call. For example:
+\begin{table}[H]
+\begin{center}
+\begin{tabular}{|c|c|c|}
+Mutex & \gls{callsite-locking} & \gls{entry-point-locking} \\
+call & pseudo-code & pseudo-code \\
+\hline
+\begin{cfacode}[tabsize=3]
+void foo(monitor& mutex a){
+
+	//Do Work
+	//...
+
+}
+
+void main() {
+	monitor a;
+
+	foo(a);
+
+}
+\end{cfacode} & \begin{pseudo}[tabsize=3]
+foo(& a) {
+
+	//Do Work
+	//...
+
+}
+
+main() {
+	monitor a;
+	acquire(a);
+	foo(a);
+	release(a);
+}
+\end{pseudo} & \begin{pseudo}[tabsize=3]
+foo(& a) {
+	acquire(a);
+	//Do Work
+	//...
+	release(a);
+}
+
+main() {
+	monitor a;
+
+	foo(a);
+
+}
+\end{pseudo}
+\end{tabular}
+\end{center}
+\caption{Call-site vs entry-point locking for mutex calls}
+\label{tbl:locking-site}
+\end{table}
+
+Note the \code{mutex} keyword relies on the type system, which means that in cases where a generic monitor-routine is desired, writing the mutex routine is possible with the proper trait, e.g.:
+\begin{cfacode}
+//Incorrect: T may not be monitor
+forall(dtype T)
+void foo(T * mutex t);
+
+//Correct: this function only works on monitors (any monitor)
+forall(dtype T | is_monitor(T))
+void bar(T * mutex t));
+\end{cfacode}
+
+Both entry point and \gls{callsite-locking} are feasible implementations. The current \CFA implementation uses entry-point locking because it requires less work when using \gls{raii}, effectively transferring the burden of implementation to object construction/destruction. It is harder to use \gls{raii} for call-site locking, as it does not necessarily have an existing scope that matches exactly the scope of the mutual exclusion, i.e., the function body. For example, the monitor call can appear in the middle of an expression. Furthermore, entry-point locking requires less code generation since any useful routine is called multiple times but there is only one entry point for many call sites.
+
+% ======================================================================
+% ======================================================================
+\section{Threading} \label{impl:thread}
+% ======================================================================
+% ======================================================================
+
+Figure \ref{fig:system1} shows a high-level picture if the \CFA runtime system in regards to concurrency. Each component of the picture is explained in detail in the flowing sections.
+
+\begin{figure}
+\begin{center}
+{\resizebox{\textwidth}{!}{\input{system.pstex_t}}}
+\end{center}
+\caption{Overview of the entire system}
+\label{fig:system1}
+\end{figure}
+
+\subsection{Processors}
+Parallelism in \CFA is built around using processors to specify how much parallelism is desired. \CFA processors are object wrappers around kernel threads, specifically \texttt{pthread}s in the current implementation of \CFA. Indeed, any parallelism must go through operating-system libraries. However, \glspl{uthread} are still the main source of concurrency, processors are simply the underlying source of parallelism. Indeed, processor \glspl{kthread} simply fetch a \gls{uthread} from the scheduler and run it; they are effectively executers for user-threads. The main benefit of this approach is that it offers a well-defined boundary between kernel code and user code, for example, kernel thread quiescing, scheduling and interrupt handling. Processors internally use coroutines to take advantage of the existing context-switching semantics.
+
+\subsection{Stack Management}
+One of the challenges of this system is to reduce the footprint as much as possible. Specifically, all \texttt{pthread}s created also have a stack created with them, which should be used as much as possible. Normally, coroutines also create their own stack to run on, however, in the case of the coroutines used for processors, these coroutines run directly on the \gls{kthread} stack, effectively stealing the processor stack. The exception to this rule is the Main Processor, i.e., the initial \gls{kthread} that is given to any program. In order to respect C user expectations, the stack of the initial kernel thread, the main stack of the program, is used by the main user thread rather than the main processor, which can grow very large.
+
+\subsection{Context Switching}
+As mentioned in section \ref{coroutine}, coroutines are a stepping stone for implementing threading, because they share the same mechanism for context-switching between different stacks. To improve performance and simplicity, context-switching is implemented using the following assumption: all context-switches happen inside a specific function call. This assumption means that the context-switch only has to copy the callee-saved registers onto the stack and then switch the stack registers with the ones of the target coroutine/thread. Note that the instruction pointer can be left untouched since the context-switch is always inside the same function. Threads, however, do not context-switch between each other directly. They context-switch to the scheduler. This method is called a 2-step context-switch and has the advantage of having a clear distinction between user code and the kernel where scheduling and other system operations happen. Obviously, this doubles the context-switch cost because threads must context-switch to an intermediate stack. The alternative 1-step context-switch uses the stack of the ``from'' thread to schedule and then context-switches directly to the ``to'' thread. However, the performance of the 2-step context-switch is still superior to a \code{pthread_yield} (see section \ref{results}). Additionally, for users in need for optimal performance, it is important to note that having a 2-step context-switch as the default does not prevent \CFA from offering a 1-step context-switch (akin to the Microsoft \code{SwitchToFiber}~\cite{switchToWindows} routine). This option is not currently present in \CFA, but the changes required to add it are strictly additive.
+
+\subsection{Preemption} \label{preemption}
+Finally, an important aspect for any complete threading system is preemption. As mentioned in chapter \ref{basics}, preemption introduces an extra degree of uncertainty, which enables users to have multiple threads interleave transparently, rather than having to cooperate among threads for proper scheduling and CPU distribution. Indeed, preemption is desirable because it adds a degree of isolation among threads. In a fully cooperative system, any thread that runs a long loop can starve other threads, while in a preemptive system, starvation can still occur but it does not rely on every thread having to yield or block on a regular basis, which reduces significantly a programmer burden. Obviously, preemption is not optimal for every workload. However any preemptive system can become a cooperative system by making the time slices extremely large. Therefore, \CFA uses a preemptive threading system.
+
+Preemption in \CFA\footnote{Note that the implementation of preemption is strongly tied with the underlying threading system. For this reason, only the Linux implementation is cover, \CFA does not run on Windows at the time of writting} is based on kernel timers, which are used to run a discrete-event simulation. Every processor keeps track of the current time and registers an expiration time with the preemption system. When the preemption system receives a change in preemption, it inserts the time in a sorted order and sets a kernel timer for the closest one, effectively stepping through preemption events on each signal sent by the timer. These timers use the Linux signal {\tt SIGALRM}, which is delivered to the process rather than the kernel-thread. This results in an implementation problem, because when delivering signals to a process, the kernel can deliver the signal to any kernel thread for which the signal is not blocked, i.e.:
+\begin{quote}
+A process-directed signal may be delivered to any one of the threads that does not currently have the signal blocked. If more than one of the threads has the signal unblocked, then the kernel chooses an arbitrary thread to which to deliver the signal.
+SIGNAL(7) - Linux Programmer's Manual
+\end{quote}
+For the sake of simplicity, and in order to prevent the case of having two threads receiving alarms simultaneously, \CFA programs block the {\tt SIGALRM} signal on every kernel thread except one.
+
+Now because of how involuntary context-switches are handled, the kernel thread handling {\tt SIGALRM} cannot also be a processor thread. Hence, involuntary context-switching is done by sending signal {\tt SIGUSR1} to the corresponding proces\-sor and having the thread yield from inside the signal handler. This approach effectively context-switches away from the signal handler back to the kernel and the signal handler frame is eventually unwound when the thread is scheduled again. As a result, a signal handler can start on one kernel thread and terminate on a second kernel thread (but the same user thread). It is important to note that signal handlers save and restore signal masks because user-thread migration can cause a signal mask to migrate from one kernel thread to another. This behaviour is only a problem if all kernel threads, among which a user thread can migrate, differ in terms of signal masks\footnote{Sadly, official POSIX documentation is silent on what distinguishes ``async-signal-safe'' functions from other functions.}. However, since the kernel thread handling preemption requires a different signal mask, executing user threads on the kernel-alarm thread can cause deadlocks. For this reason, the alarm thread is in a tight loop around a system call to \code{sigwaitinfo}, requiring very little CPU time for preemption. One final detail about the alarm thread is how to wake it when additional communication is required (e.g., on thread termination). This unblocking is also done using {\tt SIGALRM}, but sent through the \code{pthread_sigqueue}. Indeed, \code{sigwait} can differentiate signals sent from \code{pthread_sigqueue} from signals sent from alarms or the kernel.
+
+\subsection{Scheduler}
+Finally, an aspect that was not mentioned yet is the scheduling algorithm. Currently, the \CFA scheduler uses a single ready queue for all processors, which is the simplest approach to scheduling. Further discussion on scheduling is present in section \ref{futur:sched}.
+
+% ======================================================================
+% ======================================================================
+\section{Internal Scheduling} \label{impl:intsched}
+% ======================================================================
+% ======================================================================
+The following figure is the traditional illustration of a monitor (repeated from page~\pageref{fig:ClassicalMonitor} for convenience):
+
+\begin{figure}[H]
+\begin{center}
+{\resizebox{0.4\textwidth}{!}{\input{monitor}}}
+\end{center}
+\caption{Traditional illustration of a monitor}
+\end{figure}
+
+This picture has several components, the two most important being the entry queue and the AS-stack. The entry queue is an (almost) FIFO list where threads waiting to enter are parked, while the acceptor/signaller (AS) stack is a FILO list used for threads that have been signalled or otherwise marked as running next.
+
+For \CFA, this picture does not have support for blocking multiple monitors on a single condition. To support \gls{bulk-acq} two changes to this picture are required. First, it is no longer helpful to attach the condition to \emph{a single} monitor. Secondly, the thread waiting on the condition has to be separated across multiple monitors, seen in figure \ref{fig:monitor_cfa}.
+
+\begin{figure}[H]
+\begin{center}
+{\resizebox{0.8\textwidth}{!}{\input{int_monitor}}}
+\end{center}
+\caption{Illustration of \CFA Monitor}
+\label{fig:monitor_cfa}
+\end{figure}
+
+This picture and the proper entry and leave algorithms (see listing \ref{lst:entry2}) is the fundamental implementation of internal scheduling. Note that when a thread is moved from the condition to the AS-stack, it is conceptually split into N pieces, where N is the number of monitors specified in the parameter list. The thread is woken up when all the pieces have popped from the AS-stacks and made active. In this picture, the threads are split into halves but this is only because there are two monitors. For a specific signalling operation every monitor needs a piece of thread on its AS-stack.
+
+\begin{figure}[b]
+\begin{multicols}{2}
+Entry
+\begin{pseudo}
+if monitor is free
+	enter
+elif already own the monitor
+	continue
+else
+	block
+increment recursion
+
+\end{pseudo}
+\columnbreak
+Exit
+\begin{pseudo}
+decrement recursion
+if recursion == 0
+	if signal_stack not empty
+		set_owner to thread
+		if all monitors ready
+			wake-up thread
+
+	if entry queue not empty
+		wake-up thread
+\end{pseudo}
+\end{multicols}
+\begin{pseudo}[caption={Entry and exit routine for monitors with internal scheduling},label={lst:entry2}]
+\end{pseudo}
+\end{figure}
+
+The solution discussed in \ref{intsched} can be seen in the exit routine of listing \ref{lst:entry2}. Basically, the solution boils down to having a separate data structure for the condition queue and the AS-stack, and unconditionally transferring ownership of the monitors but only unblocking the thread when the last monitor has transferred ownership. This solution is deadlock safe as well as preventing any potential barging. The data structures used for the AS-stack are reused extensively for external scheduling, but in the case of internal scheduling, the data is allocated using variable-length arrays on the call stack of the \code{wait} and \code{signal_block} routines.
+
+\begin{figure}[H]
+\begin{center}
+{\resizebox{0.8\textwidth}{!}{\input{monitor_structs.pstex_t}}}
+\end{center}
+\caption{Data structures involved in internal/external scheduling}
+\label{fig:structs}
+\end{figure}
+
+Figure \ref{fig:structs} shows a high-level representation of these data structures. The main idea behind them is that, a thread cannot contain an arbitrary number of intrusive ``next'' pointers for linking onto monitors. The \code{condition node} is the data structure that is queued onto a condition variable and, when signalled, the condition queue is popped and each \code{condition criterion} is moved to the AS-stack. Once all the criteria have been popped from their respective AS-stacks, the thread is woken up, which is what is shown in listing \ref{lst:entry2}.
+
+% ======================================================================
+% ======================================================================
+\section{External Scheduling}
+% ======================================================================
+% ======================================================================
+Similarly to internal scheduling, external scheduling for multiple monitors relies on the idea that waiting-thread queues are no longer specific to a single monitor, as mentioned in section \ref{extsched}. For internal scheduling, these queues are part of condition variables, which are still unique for a given scheduling operation (i.e., no signal statement uses multiple conditions). However, in the case of external scheduling, there is no equivalent object which is associated with \code{waitfor} statements. This absence means the queues holding the waiting threads must be stored inside at least one of the monitors that is acquired. These monitors being the only objects that have sufficient lifetime and are available on both sides of the \code{waitfor} statement. This requires an algorithm to choose which monitor holds the relevant queue. It is also important that said algorithm be independent of the order in which users list parameters. The proposed algorithm is to fall back on monitor lock ordering (sorting by address) and specify that the monitor that is acquired first is the one with the relevant waiting queue. This assumes that the lock acquiring order is static for the lifetime of all concerned objects but that is a reasonable constraint.
+
+This algorithm choice has two consequences:
+\begin{itemize}
+	\item The queue of the monitor with the lowest address is no longer a true FIFO queue because threads can be moved to the front of the queue. These queues need to contain a set of monitors for each of the waiting threads. Therefore, another thread whose set contains the same lowest address monitor but different lower priority monitors may arrive first but enter the critical section after a thread with the correct pairing.
+	\item The queue of the lowest priority monitor is both required and potentially unused. Indeed, since it is not known at compile time which monitor is the monitor which has the lowest address, every monitor needs to have the correct queues even though it is possible that some queues go unused for the entire duration of the program, for example if a monitor is only used in a specific pair.
+\end{itemize}
+Therefore, the following modifications need to be made to support external scheduling:
+\begin{itemize}
+	\item The threads waiting on the entry queue need to keep track of which routine they are trying to enter, and using which set of monitors. The \code{mutex} routine already has all the required information on its stack, so the thread only needs to keep a pointer to that information.
+	\item The monitors need to keep a mask of acceptable routines. This mask contains for each acceptable routine, a routine pointer and an array of monitors to go with it. It also needs storage to keep track of which routine was accepted. Since this information is not specific to any monitor, the monitors actually contain a pointer to an integer on the stack of the waiting thread. Note that if a thread has acquired two monitors but executes a \code{waitfor} with only one monitor as a parameter, setting the mask of acceptable routines to both monitors will not cause any problems since the extra monitor will not change ownership regardless. This becomes relevant when \code{when} clauses affect the number of monitors passed to a \code{waitfor} statement.
+	\item The entry/exit routines need to be updated as shown in listing \ref{lst:entry3}.
+\end{itemize}
+
+\subsection{External Scheduling - Destructors}
+Finally, to support the ordering inversion of destructors, the code generation needs to be modified to use a special entry routine. This routine is needed because of the storage requirements of the call order inversion. Indeed, when waiting for the destructors, storage is needed for the waiting context and the lifetime of said storage needs to outlive the waiting operation it is needed for. For regular \code{waitfor} statements, the call stack of the routine itself matches this requirement but it is no longer the case when waiting for the destructor since it is pushed on to the AS-stack for later. The \code{waitfor} semantics can then be adjusted correspondingly, as seen in listing \ref{lst:entry-dtor}
+
+\begin{figure}
+\begin{multicols}{2}
+Entry
+\begin{pseudo}
+if monitor is free
+	enter
+elif already own the monitor
+	continue
+elif matches waitfor mask
+	push criteria to AS-stack
+	continue
+else
+	block
+increment recursion
+\end{pseudo}
+\columnbreak
+Exit
+\begin{pseudo}
+decrement recursion
+if recursion == 0
+	if signal_stack not empty
+		set_owner to thread
+		if all monitors ready
+			wake-up thread
+		endif
+	endif
+
+	if entry queue not empty
+		wake-up thread
+	endif
+\end{pseudo}
+\end{multicols}
+\begin{pseudo}[caption={Entry and exit routine for monitors with internal scheduling and external scheduling},label={lst:entry3}]
+\end{pseudo}
+\end{figure}
+
+\begin{figure}
+\begin{multicols}{2}
+Destructor Entry
+\begin{pseudo}
+if monitor is free
+	enter
+elif already own the monitor
+	increment recursion
+	return
+create wait context
+if matches waitfor mask
+	reset mask
+	push self to AS-stack
+	baton pass
+else
+	wait
+increment recursion
+\end{pseudo}
+\columnbreak
+Waitfor
+\begin{pseudo}
+if matching thread is already there
+	if found destructor
+		push destructor to AS-stack
+		unlock all monitors
+	else
+		push self to AS-stack
+		baton pass
+	endif
+	return
+endif
+if non-blocking
+	Unlock all monitors
+	Return
+endif
+
+push self to AS-stack
+set waitfor mask
+block
+return
+\end{pseudo}
+\end{multicols}
+\begin{pseudo}[caption={Pseudo code for the \code{waitfor} routine and the \code{mutex} entry routine for destructors},label={lst:entry-dtor}]
+\end{pseudo}
+\end{figure}
Index: doc/theses/thierry_delisle/text/intro.tex
===================================================================
--- doc/theses/thierry_delisle/text/intro.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/text/intro.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,8 @@
+% ======================================================================
+\chapter{Introduction}
+% ======================================================================
+This thesis provides a minimal concurrency \acrshort{api} that is simple, efficient and can be reused to build higher-level features. The simplest possible concurrency system is a thread and a lock but this low-level approach is hard to master. An easier approach for users is to support higher-level constructs as the basis of concurrency. Indeed, for highly productive concurrent programming, high-level approaches are much more popular~\cite{HPP:Study}. Examples are task based, message passing and implicit threading. The high-level approach and its minimal \acrshort{api} are tested in a dialect of C, called \CFA. Furthermore, the proposed \acrshort{api} doubles as an early definition of the \CFA language and library. This thesis also provides an implementation of the concurrency library for \CFA as well as all the required language features added to the source-to-source translator.
+
+There are actually two problems that need to be solved in the design of concurrency for a programming language: which concurrency and which parallelism tools are available to the programmer. While these two concepts are often combined, they are in fact distinct, requiring different tools~\cite{Buhr05a}. Concurrency tools need to handle mutual exclusion and synchronization, while parallelism tools are about performance, cost and resource utilization.
+
+In the context of this thesis, a \textbf{thread} is a fundamental unit of execution that runs a sequence of code, generally on a program stack. Having multiple simultaneous threads gives rise to concurrency and generally requires some kind of locking mechanism to ensure proper execution. Correspondingly, \textbf{concurrency} is defined as the concepts and challenges that occur when multiple independent (sharing memory, timing dependencies, etc.) concurrent threads are introduced. Accordingly, \textbf{locking} (and by extension locks) are defined as a mechanism that prevents the progress of certain threads in order to avoid problems due to concurrency. Finally, in this thesis \textbf{parallelism} is distinct from concurrency and is defined as running multiple threads simultaneously. More precisely, parallelism implies \emph{actual} simultaneous execution as opposed to concurrency which only requires \emph{apparent} simultaneous execution. As such, parallelism is only observable in the differences in performance or, more generally, differences in timing.
Index: doc/theses/thierry_delisle/text/parallelism.tex
===================================================================
--- doc/theses/thierry_delisle/text/parallelism.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/text/parallelism.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,39 @@
+% ######     #    ######     #    #       #       ####### #       ###  #####  #     #
+% #     #   # #   #     #   # #   #       #       #       #        #  #     # ##   ##
+% #     #  #   #  #     #  #   #  #       #       #       #        #  #       # # # #
+% ######  #     # ######  #     # #       #       #####   #        #   #####  #  #  #
+% #       ####### #   #   ####### #       #       #       #        #        # #     #
+% #       #     # #    #  #     # #       #       #       #        #  #     # #     #
+% #       #     # #     # #     # ####### ####### ####### ####### ###  #####  #     #
+\chapter{Parallelism}
+Historically, computer performance was about processor speeds and instruction counts. However, with heat dissipation being a direct consequence of speed increase, parallelism has become the new source for increased performance~\cite{Sutter05, Sutter05b}. In this decade, it is no longer reasonable to create a high-performance application without caring about parallelism. Indeed, parallelism is an important aspect of performance and more specifically throughput and hardware utilization. The lowest-level approach of parallelism is to use \glspl{kthread} in combination with semantics like \code{fork}, \code{join}, etc. However, since these have significant costs and limitations, \glspl{kthread} are now mostly used as an implementation tool rather than a user oriented one. There are several alternatives to solve these issues that all have strengths and weaknesses. While there are many variations of the presented paradigms, most of these variations do not actually change the guarantees or the semantics, they simply move costs in order to achieve better performance for certain workloads.
+
+\section{Paradigms}
+\subsection{User-Level Threads}
+A direct improvement on the \gls{kthread} approach is to use \glspl{uthread}. These threads offer most of the same features that the operating system already provides but can be used on a much larger scale. This approach is the most powerful solution as it allows all the features of multithreading, while removing several of the more expensive costs of kernel threads. The downside is that almost none of the low-level threading problems are hidden; users still have to think about data races, deadlocks and synchronization issues. These issues can be somewhat alleviated by a concurrency toolkit with strong guarantees, but the parallelism toolkit offers very little to reduce complexity in itself.
+
+Examples of languages that support \glspl{uthread} are Erlang~\cite{Erlang} and \uC~\cite{uC++book}.
+
+\subsection{Fibers : User-Level Threads Without Preemption} \label{fibers}
+A popular variant of \glspl{uthread} is what is often referred to as \glspl{fiber}. However, \glspl{fiber} do not present meaningful semantic differences with \glspl{uthread}. The significant difference between \glspl{uthread} and \glspl{fiber} is the lack of \gls{preemption} in the latter. Advocates of \glspl{fiber} list their high performance and ease of implementation as major strengths, but the performance difference between \glspl{uthread} and \glspl{fiber} is controversial, and the ease of implementation, while true, is a weak argument in the context of language design. Therefore this proposal largely ignores fibers.
+
+An example of a language that uses fibers is Go~\cite{Go}
+
+\subsection{Jobs and Thread Pools}
+An approach on the opposite end of the spectrum is to base parallelism on \glspl{pool}. Indeed, \glspl{pool} offer limited flexibility but at the benefit of a simpler user interface. In \gls{pool} based systems, users express parallelism as units of work, called jobs, and a dependency graph (either explicit or implicit) that ties them together. This approach means users need not worry about concurrency but significantly limit the interaction that can occur among jobs. Indeed, any \gls{job} that blocks also block the underlying worker, which effectively means the CPU utilization, and therefore throughput, suffers noticeably. It can be argued that a solution to this problem is to use more workers than available cores. However, unless the number of jobs and the number of workers are comparable, having a significant number of blocked jobs always results in idles cores.
+
+The gold standard of this implementation is Intel's TBB library~\cite{TBB}.
+
+\subsection{Paradigm Performance}
+While the choice between the three paradigms listed above may have significant performance implications, it is difficult to pin down the performance implications of choosing a model at the language level. Indeed, in many situations one of these paradigms may show better performance but it all strongly depends on the workload. Having a large amount of mostly independent units of work to execute almost guarantees equivalent performance across paradigms and that the \gls{pool}-based system has the best efficiency thanks to the lower memory overhead (i.e., no thread stack per job). However, interactions among jobs can easily exacerbate contention. User-level threads allow fine-grain context switching, which results in better resource utilization, but a context switch is more expensive and the extra control means users need to tweak more variables to get the desired performance. Finally, if the units of uninterrupted work are large, enough the paradigm choice is largely amortized by the actual work done.
+
+\section{The \protect\CFA\ Kernel : Processors, Clusters and Threads}\label{kernel}
+A \gls{cfacluster} is a group of \glspl{kthread} executed in isolation. \Glspl{uthread} are scheduled on the \glspl{kthread} of a given \gls{cfacluster}, allowing organization between \glspl{uthread} and \glspl{kthread}. It is important that \glspl{kthread} belonging to a same \glspl{cfacluster} have homogeneous settings, otherwise migrating a \gls{uthread} from one \gls{kthread} to the other can cause issues. A \gls{cfacluster} also offers a pluggable scheduler that can optimize the workload generated by the \glspl{uthread}.
+
+\Glspl{cfacluster} have not been fully implemented in the context of this thesis. Currently \CFA only supports one \gls{cfacluster}, the initial one.
+
+\subsection{Future Work: Machine Setup}\label{machine}
+While this was not done in the context of this thesis, another important aspect of clusters is affinity. While many common desktop and laptop PCs have homogeneous CPUs, other devices often have more heterogeneous setups. For example, a system using \acrshort{numa} configurations may benefit from users being able to tie clusters and/or kernel threads to certain CPU cores. OS support for CPU affinity is now common~\cite{affinityLinux, affinityWindows, affinityFreebsd, affinityNetbsd, affinityMacosx}, which means it is both possible and desirable for \CFA to offer an abstraction mechanism for portable CPU affinity.
+
+\subsection{Paradigms}\label{cfaparadigms}
+Given these building blocks, it is possible to reproduce all three of the popular paradigms. Indeed, \glspl{uthread} is the default paradigm in \CFA. However, disabling \gls{preemption} on the \gls{cfacluster} means \glspl{cfathread} effectively become \glspl{fiber}. Since several \glspl{cfacluster} with different scheduling policy can coexist in the same application, this allows \glspl{fiber} and \glspl{uthread} to coexist in the runtime of an application. Finally, it is possible to build executors for thread pools from \glspl{uthread} or \glspl{fiber}, which includes specialized jobs like actors~\cite{Actors}.
Index: doc/theses/thierry_delisle/text/results.tex
===================================================================
--- doc/theses/thierry_delisle/text/results.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/text/results.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,332 @@
+% ======================================================================
+% ======================================================================
+\chapter{Performance Results} \label{results}
+% ======================================================================
+% ======================================================================
+\section{Machine Setup}
+Table \ref{tab:machine} shows the characteristics of the machine used to run the benchmarks. All tests were made on this machine.
+\begin{table}[H]
+\begin{center}
+\begin{tabular}{| l | r | l | r |}
+\hline
+Architecture		& x86\_64 			& NUMA node(s) 	& 8 \\
+\hline
+CPU op-mode(s)		& 32-bit, 64-bit 		& Model name 	& AMD Opteron\texttrademark  Processor 6380 \\
+\hline
+Byte Order			& Little Endian 		& CPU Freq 		& 2.5\si{\giga\hertz} \\
+\hline
+CPU(s)			& 64 				& L1d cache 	& \SI{16}{\kibi\byte} \\
+\hline
+Thread(s) per core	& 2 				& L1i cache 	& \SI{64}{\kibi\byte} \\
+\hline
+Core(s) per socket	& 8 				& L2 cache 		& \SI{2048}{\kibi\byte} \\
+\hline
+Socket(s)			& 4 				& L3 cache 		& \SI{6144}{\kibi\byte} \\
+\hline
+\hline
+Operating system		& Ubuntu 16.04.3 LTS	& Kernel		& Linux 4.4-97-generic \\
+\hline
+Compiler			& GCC 6.3 		& Translator	& CFA 1 \\
+\hline
+Java version		& OpenJDK-9 		& Go version	& 1.9.2 \\
+\hline
+\end{tabular}
+\end{center}
+\caption{Machine setup used for the tests}
+\label{tab:machine}
+\end{table}
+
+\section{Micro Benchmarks}
+All benchmarks are run using the same harness to produce the results, seen as the \code{BENCH()} macro in the following examples. This macro uses the following logic to benchmark the code:
+\begin{pseudo}
+#define BENCH(run, result) \
+	before = gettime(); \
+	run; \
+	after  = gettime(); \
+	result = (after - before) / N;
+\end{pseudo}
+The method used to get time is \code{clock_gettime(CLOCK_THREAD_CPUTIME_ID);}. Each benchmark is using many iterations of a simple call to measure the cost of the call. The specific number of iterations depends on the specific benchmark.
+
+\subsection{Context-Switching}
+The first interesting benchmark is to measure how long context-switches take. The simplest approach to do this is to yield on a thread, which executes a 2-step context switch. Yielding causes the thread to context-switch to the scheduler and back, more precisely: from the \gls{uthread} to the \gls{kthread} then from the \gls{kthread} back to the same \gls{uthread} (or a different one in the general case). In order to make the comparison fair, coroutines also execute a 2-step context-switch by resuming another coroutine which does nothing but suspending in a tight loop, which is a resume/suspend cycle instead of a yield. Listing \ref{lst:ctx-switch} shows the code for coroutines and threads with the results in table \ref{tab:ctx-switch}. All omitted tests are functionally identical to one of these tests. The difference between coroutines and threads can be attributed to the cost of scheduling.
+\begin{figure}
+\begin{multicols}{2}
+\CFA Coroutines
+\begin{cfacode}
+coroutine GreatSuspender {};
+void main(GreatSuspender& this) {
+	while(true) { suspend(); }
+}
+int main() {
+	GreatSuspender s;
+	resume(s);
+	BENCH(
+		for(size_t i=0; i<n; i++) {
+			resume(s);
+		},
+		result
+	)
+	printf("%llu\n", result);
+}
+\end{cfacode}
+\columnbreak
+\CFA Threads
+\begin{cfacode}
+
+
+
+
+int main() {
+
+
+	BENCH(
+		for(size_t i=0; i<n; i++) {
+			yield();
+		},
+		result
+	)
+	printf("%llu\n", result);
+}
+\end{cfacode}
+\end{multicols}
+\begin{cfacode}[caption={\CFA benchmark code used to measure context-switches for coroutines and threads.},label={lst:ctx-switch}]
+\end{cfacode}
+\end{figure}
+
+\begin{table}
+\begin{center}
+\begin{tabular}{| l | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] |}
+\cline{2-4}
+\multicolumn{1}{c |}{} & \multicolumn{1}{c |}{ Median } &\multicolumn{1}{c |}{ Average } & \multicolumn{1}{c |}{ Standard Deviation} \\
+\hline
+Kernel Thread	& 241.5	& 243.86	& 5.08 \\
+\CFA Coroutine	& 38		& 38		& 0    \\
+\CFA Thread		& 103		& 102.96	& 2.96 \\
+\uC Coroutine	& 46		& 45.86	& 0.35 \\
+\uC Thread		& 98		& 99.11	& 1.42 \\
+Goroutine		& 150		& 149.96	& 3.16 \\
+Java Thread		& 289		& 290.68	& 8.72 \\
+\hline
+\end{tabular}
+\end{center}
+\caption{Context Switch comparison. All numbers are in nanoseconds(\si{\nano\second})}
+\label{tab:ctx-switch}
+\end{table}
+
+\subsection{Mutual-Exclusion}
+The next interesting benchmark is to measure the overhead to enter/leave a critical-section. For monitors, the simplest approach is to measure how long it takes to enter and leave a monitor routine. Listing \ref{lst:mutex} shows the code for \CFA. To put the results in context, the cost of entering a non-inline function and the cost of acquiring and releasing a \code{pthread_mutex} lock is also measured. The results can be shown in table \ref{tab:mutex}.
+
+\begin{figure}
+\begin{cfacode}[caption={\CFA benchmark code used to measure mutex routines.},label={lst:mutex}]
+monitor M {};
+void __attribute__((noinline)) call( M & mutex m /*, m2, m3, m4*/ ) {}
+
+int main() {
+	M m/*, m2, m3, m4*/;
+	BENCH(
+		for(size_t i=0; i<n; i++) {
+			call(m/*, m2, m3, m4*/);
+		},
+		result
+	)
+	printf("%llu\n", result);
+}
+\end{cfacode}
+\end{figure}
+
+\begin{table}
+\begin{center}
+\begin{tabular}{| l | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] |}
+\cline{2-4}
+\multicolumn{1}{c |}{} & \multicolumn{1}{c |}{ Median } &\multicolumn{1}{c |}{ Average } & \multicolumn{1}{c |}{ Standard Deviation} \\
+\hline
+C routine						& 2		& 2		& 0    \\
+FetchAdd + FetchSub				& 26		& 26		& 0    \\
+Pthreads Mutex Lock				& 31		& 31.86	& 0.99 \\
+\uC \code{monitor} member routine		& 30		& 30		& 0    \\
+\CFA \code{mutex} routine, 1 argument	& 41		& 41.57	& 0.9  \\
+\CFA \code{mutex} routine, 2 argument	& 76		& 76.96	& 1.57 \\
+\CFA \code{mutex} routine, 4 argument	& 145		& 146.68	& 3.85 \\
+Java synchronized routine			& 27		& 28.57	& 2.6  \\
+\hline
+\end{tabular}
+\end{center}
+\caption{Mutex routine comparison. All numbers are in nanoseconds(\si{\nano\second})}
+\label{tab:mutex}
+\end{table}
+
+\subsection{Internal Scheduling}
+The internal-scheduling benchmark measures the cost of waiting on and signalling a condition variable. Listing \ref{lst:int-sched} shows the code for \CFA, with results table \ref{tab:int-sched}. As with all other benchmarks, all omitted tests are functionally identical to one of these tests.
+
+\begin{figure}
+\begin{cfacode}[caption={Benchmark code for internal scheduling},label={lst:int-sched}]
+volatile int go = 0;
+condition c;
+monitor M {};
+M m1;
+
+void __attribute__((noinline)) do_call( M & mutex a1 ) { signal(c); }
+
+thread T {};
+void ^?{}( T & mutex this ) {}
+void main( T & this ) {
+	while(go == 0) { yield(); }
+	while(go == 1) { do_call(m1); }
+}
+int  __attribute__((noinline)) do_wait( M & mutex a1 ) {
+	go = 1;
+	BENCH(
+		for(size_t i=0; i<n; i++) {
+			wait(c);
+		},
+		result
+	)
+	printf("%llu\n", result);
+	go = 0;
+	return 0;
+}
+int main() {
+	T t;
+	return do_wait(m1);
+}
+\end{cfacode}
+\end{figure}
+
+\begin{table}
+\begin{center}
+\begin{tabular}{| l | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] |}
+\cline{2-4}
+\multicolumn{1}{c |}{} & \multicolumn{1}{c |}{ Median } &\multicolumn{1}{c |}{ Average } & \multicolumn{1}{c |}{ Standard Deviation} \\
+\hline
+Pthreads Condition Variable			& 5902.5	& 6093.29 	& 714.78 \\
+\uC \code{signal}					& 322		& 323 	& 3.36   \\
+\CFA \code{signal}, 1 \code{monitor}	& 352.5	& 353.11	& 3.66   \\
+\CFA \code{signal}, 2 \code{monitor}	& 430		& 430.29	& 8.97   \\
+\CFA \code{signal}, 4 \code{monitor}	& 594.5	& 606.57	& 18.33  \\
+Java \code{notify}				& 13831.5	& 15698.21	& 4782.3 \\
+\hline
+\end{tabular}
+\end{center}
+\caption{Internal scheduling comparison. All numbers are in nanoseconds(\si{\nano\second})}
+\label{tab:int-sched}
+\end{table}
+
+\subsection{External Scheduling}
+The Internal scheduling benchmark measures the cost of the \code{waitfor} statement (\code{_Accept} in \uC). Listing \ref{lst:ext-sched} shows the code for \CFA, with results in table \ref{tab:ext-sched}. As with all other benchmarks, all omitted tests are functionally identical to one of these tests.
+
+\begin{figure}
+\begin{cfacode}[caption={Benchmark code for external scheduling},label={lst:ext-sched}]
+volatile int go = 0;
+monitor M {};
+M m1;
+thread T {};
+
+void __attribute__((noinline)) do_call( M & mutex a1 ) {}
+
+void ^?{}( T & mutex this ) {}
+void main( T & this ) {
+	while(go == 0) { yield(); }
+	while(go == 1) { do_call(m1); }
+}
+int  __attribute__((noinline)) do_wait( M & mutex a1 ) {
+	go = 1;
+	BENCH(
+		for(size_t i=0; i<n; i++) {
+			waitfor(call, a1);
+		},
+		result
+	)
+	printf("%llu\n", result);
+	go = 0;
+	return 0;
+}
+int main() {
+	T t;
+	return do_wait(m1);
+}
+\end{cfacode}
+\end{figure}
+
+\begin{table}
+\begin{center}
+\begin{tabular}{| l | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] |}
+\cline{2-4}
+\multicolumn{1}{c |}{} & \multicolumn{1}{c |}{ Median } &\multicolumn{1}{c |}{ Average } & \multicolumn{1}{c |}{ Standard Deviation} \\
+\hline
+\uC \code{Accept}					& 350		& 350.61	& 3.11  \\
+\CFA \code{waitfor}, 1 \code{monitor}	& 358.5	& 358.36	& 3.82  \\
+\CFA \code{waitfor}, 2 \code{monitor}	& 422		& 426.79	& 7.95  \\
+\CFA \code{waitfor}, 4 \code{monitor}	& 579.5	& 585.46	& 11.25 \\
+\hline
+\end{tabular}
+\end{center}
+\caption{External scheduling comparison. All numbers are in nanoseconds(\si{\nano\second})}
+\label{tab:ext-sched}
+\end{table}
+
+\subsection{Object Creation}
+Finally, the last benchmark measures the cost of creation for concurrent objects. Listing \ref{lst:creation} shows the code for \texttt{pthread}s and \CFA threads, with results shown in table \ref{tab:creation}. As with all other benchmarks, all omitted tests are functionally identical to one of these tests. The only note here is that the call stacks of \CFA coroutines are lazily created, therefore without priming the coroutine, the creation cost is very low.
+
+\begin{figure}
+\begin{center}
+\texttt{pthread}
+\begin{ccode}
+int main() {
+	BENCH(
+		for(size_t i=0; i<n; i++) {
+			pthread_t thread;
+			if(pthread_create(&thread,NULL,foo,NULL)<0) {
+				perror( "failure" );
+				return 1;
+			}
+
+			if(pthread_join(thread, NULL)<0) {
+				perror( "failure" );
+				return 1;
+			}
+		},
+		result
+	)
+	printf("%llu\n", result);
+}
+\end{ccode}
+
+
+
+\CFA Threads
+\begin{cfacode}
+int main() {
+	BENCH(
+		for(size_t i=0; i<n; i++) {
+			MyThread m;
+		},
+		result
+	)
+	printf("%llu\n", result);
+}
+\end{cfacode}
+\end{center}
+\begin{cfacode}[caption={Benchmark code for \texttt{pthread}s and \CFA to measure object creation},label={lst:creation}]
+\end{cfacode}
+\end{figure}
+
+\begin{table}
+\begin{center}
+\begin{tabular}{| l | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] |}
+\cline{2-4}
+\multicolumn{1}{c |}{} & \multicolumn{1}{c |}{ Median } &\multicolumn{1}{c |}{ Average } & \multicolumn{1}{c |}{ Standard Deviation} \\
+\hline
+Pthreads			& 26996	& 26984.71	& 156.6  \\
+\CFA Coroutine Lazy	& 6		& 5.71	& 0.45   \\
+\CFA Coroutine Eager	& 708		& 706.68	& 4.82   \\
+\CFA Thread			& 1173.5	& 1176.18	& 15.18  \\
+\uC Coroutine		& 109		& 107.46	& 1.74   \\
+\uC Thread			& 526		& 530.89	& 9.73   \\
+Goroutine			& 2520.5	& 2530.93	& 61,56  \\
+Java Thread			& 91114.5	& 92272.79	& 961.58 \\
+\hline
+\end{tabular}
+\end{center}
+\caption{Creation comparison. All numbers are in nanoseconds(\si{\nano\second}).}
+\label{tab:creation}
+\end{table}
Index: doc/theses/thierry_delisle/text/together.tex
===================================================================
--- doc/theses/thierry_delisle/text/together.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/text/together.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,140 @@
+% ======================================================================
+% ======================================================================
+\chapter{Putting It All Together}
+% ======================================================================
+% ======================================================================
+
+
+\section{Threads As Monitors}
+As it was subtly alluded in section \ref{threads}, \code{thread}s in \CFA are in fact monitors, which means that all monitor features are available when using threads. For example, here is a very simple two thread pipeline that could be used for a simulator of a game engine:
+\begin{figure}[H]
+\begin{cfacode}[caption={Toy simulator using \code{thread}s and \code{monitor}s.},label={lst:engine-v1}]
+// Visualization declaration
+thread Renderer {} renderer;
+Frame * simulate( Simulator & this );
+
+// Simulation declaration
+thread Simulator{} simulator;
+void render( Renderer & this );
+
+// Blocking call used as communication
+void draw( Renderer & mutex this, Frame * frame );
+
+// Simulation loop
+void main( Simulator & this ) {
+	while( true ) {
+		Frame * frame = simulate( this );
+		draw( renderer, frame );
+	}
+}
+
+// Rendering loop
+void main( Renderer & this ) {
+	while( true ) {
+		waitfor( draw, this );
+		render( this );
+	}
+}
+\end{cfacode}
+\end{figure}
+One of the obvious complaints of the previous code snippet (other than its toy-like simplicity) is that it does not handle exit conditions and just goes on forever. Luckily, the monitor semantics can also be used to clearly enforce a shutdown order in a concise manner:
+\begin{figure}[H]
+\begin{cfacode}[caption={Same toy simulator with proper termination condition.},label={lst:engine-v2}]
+// Visualization declaration
+thread Renderer {} renderer;
+Frame * simulate( Simulator & this );
+
+// Simulation declaration
+thread Simulator{} simulator;
+void render( Renderer & this );
+
+// Blocking call used as communication
+void draw( Renderer & mutex this, Frame * frame );
+
+// Simulation loop
+void main( Simulator & this ) {
+	while( true ) {
+		Frame * frame = simulate( this );
+		draw( renderer, frame );
+
+		// Exit main loop after the last frame
+		if( frame->is_last ) break;
+	}
+}
+
+// Rendering loop
+void main( Renderer & this ) {
+	while( true ) {
+		   waitfor( draw, this );
+		or waitfor( ^?{}, this ) {
+			// Add an exit condition
+			break;
+		}
+
+		render( this );
+	}
+}
+
+// Call destructor for simulator once simulator finishes
+// Call destructor for renderer to signify shutdown
+\end{cfacode}
+\end{figure}
+
+\section{Fibers \& Threads}
+As mentioned in section \ref{preemption}, \CFA uses preemptive threads by default but can use fibers on demand. Currently, using fibers is done by adding the following line of code to the program~:
+\begin{cfacode}
+unsigned int default_preemption() {
+	return 0;
+}
+\end{cfacode}
+This function is called by the kernel to fetch the default preemption rate, where 0 signifies an infinite time-slice, i.e., no preemption. However, once clusters are fully implemented, it will be possible to create fibers and \glspl{uthread} in the same system, as in listing \ref{lst:fiber-uthread}
+\begin{figure}
+\begin{cfacode}[caption={Using fibers and \glspl{uthread} side-by-side in \CFA},label={lst:fiber-uthread}]
+//Cluster forward declaration
+struct cluster;
+
+//Processor forward declaration
+struct processor;
+
+//Construct clusters with a preemption rate
+void ?{}(cluster& this, unsigned int rate);
+//Construct processor and add it to cluster
+void ?{}(processor& this, cluster& cluster);
+//Construct thread and schedule it on cluster
+void ?{}(thread& this, cluster& cluster);
+
+//Declare two clusters
+cluster thread_cluster = { 10`ms };			//Preempt every 10 ms
+cluster fibers_cluster = { 0 };				//Never preempt
+
+//Construct 4 processors
+processor processors[4] = {
+	//2 for the thread cluster
+	thread_cluster;
+	thread_cluster;
+	//2 for the fibers cluster
+	fibers_cluster;
+	fibers_cluster;
+};
+
+//Declares thread
+thread UThread {};
+void ?{}(UThread& this) {
+	//Construct underlying thread to automatically
+	//be scheduled on the thread cluster
+	(this){ thread_cluster }
+}
+
+void main(UThread & this);
+
+//Declares fibers
+thread Fiber {};
+void ?{}(Fiber& this) {
+	//Construct underlying thread to automatically
+	//be scheduled on the fiber cluster
+	(this.__thread){ fibers_cluster }
+}
+
+void main(Fiber & this);
+\end{cfacode}
+\end{figure}
Index: doc/theses/thierry_delisle/thePlan.md
===================================================================
--- doc/theses/thierry_delisle/thePlan.md	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/thePlan.md	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,26 @@
+_Phase 1_ : Prototype
+done - Threads.
+done - Main thread is a cfa thread.
+done - SimpleBlockingLock.
+done - Synchronisation points in thread destructors.
+done - Processors & SpinLock.
+
+_Phase 2_ : Minimum Viable Product
+done - Monitor type and enter/leave mutex member routines
+done - Multi monitors calls,
+done - Monitors as a language feature (not calling enter/leave by hand)
+
+_Phase 3_ : Monitor features
+Internal scheduling
+External scheduling
+
+_Phase 4_ : Kernel features
+Preemption
+Detach thread
+Cluster migration
+
+_Phase 5_ : Performance
+Proper scheduler
+...
+
+
Index: doc/theses/thierry_delisle/thesis.tex
===================================================================
--- doc/theses/thierry_delisle/thesis.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/thesis.tex	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,146 @@
+% requires tex packages: texlive-base texlive-latex-base tex-common texlive-humanities texlive-latex-extra texlive-fonts-recommended
+
+% inline code �...� (copyright symbol) emacs: C-q M-)
+% red highlighting �...� (registered trademark symbol) emacs: C-q M-.
+% blue highlighting �...� (sharp s symbol) emacs: C-q M-_
+% green highlighting �...� (cent symbol) emacs: C-q M-"
+% LaTex escape �...� (section symbol) emacs: C-q M-'
+% keyword escape �...� (pilcrow symbol) emacs: C-q M-^
+% math escape $...$ (dollar symbol)
+
+\documentclass[letterpaper,12pt,titlepage,oneside,final]{book}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+% Latex packages used in the document.
+\usepackage[T1]{fontenc}					% allow Latin1 (extended ASCII) characters
+\usepackage{textcomp}
+\usepackage[latin1]{inputenc}
+\usepackage{fullpage,times,comment}
+\usepackage{epic,eepic}
+\usepackage{upquote}						% switch curled `'" to straight
+\usepackage{dirtytalk}
+\usepackage{calc}
+\usepackage{xspace}
+\usepackage[labelformat=simple]{subfig}
+\renewcommand{\thesubfigure}{(\alph{subfigure})}
+\usepackage{graphicx}
+\usepackage{tabularx}
+\usepackage{multicol}
+\usepackage[acronym]{glossaries}
+\usepackage{varioref}
+\usepackage{listings}						% format program code
+\usepackage[flushmargin]{footmisc}				% support label/reference in footnote
+\usepackage{latexsym}						% \Box glyph
+\usepackage{mathptmx}						% better math font with "times"
+\usepackage[usenames]{color}
+\usepackage[pagewise]{lineno}
+\renewcommand{\linenumberfont}{\scriptsize\sffamily}
+\usepackage{fancyhdr}
+\usepackage{float}
+\usepackage{siunitx}
+\sisetup{ binary-units=true }
+\input{style}							% bespoke macros used in the document
+\usepackage{url}
+\usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref}
+\usepackage{breakurl}
+\urlstyle{rm}
+
+\usepackage{tikz}
+\def\checkmark{\tikz\fill[scale=0.4](0,.35) -- (.25,0) -- (1,.7) -- (.25,.15) -- cycle;}
+
+\setlength{\topmargin}{-0.45in}				% move running title into header
+\setlength{\headsep}{0.25in}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+% Names used in the document.
+
+\newcommand{\Version}{1.0.0}
+\newcommand{\CS}{C\raisebox{-0.9ex}{\large$^\sharp$}\xspace}
+
+\newcommand{\Textbf}[2][red]{{\color{#1}{\textbf{#2}}}}
+\newcommand{\Emph}[2][red]{{\color{#1}\textbf{\emph{#2}}}}
+\newcommand{\R}[1]{\Textbf{#1}}
+\newcommand{\B}[1]{{\Textbf[blue]{#1}}}
+\newcommand{\G}[1]{{\Textbf[OliveGreen]{#1}}}
+\newcommand{\uC}{$\mu$\CC}
+\newcommand{\cit}{\textsuperscript{[Citation Needed]}\xspace}
+\newcommand{\TODO}{{\Textbf{TODO}}}
+
+\input{glossary}
+
+\newsavebox{\LstBox}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\setcounter{secnumdepth}{2}                           % number subsubsections
+\setcounter{tocdepth}{2}                              % subsubsections in table of contents
+% \linenumbers                                       	% comment out to turn off line numbering
+\makeindex
+\pagestyle{fancy}
+\fancyhf{}
+\cfoot{\thepage}
+\rfoot{v\input{version}}
+
+
+
+%======================================================================
+%   L O G I C A L    D O C U M E N T -- the content of your thesis
+%======================================================================
+\begin{document}
+
+% For a large document, it is a good idea to divide your thesis
+% into several files, each one containing one chapter.
+% To illustrate this idea, the "front pages" (i.e., title page,
+% declaration, borrowers' page, abstract, acknowledgements,
+% dedication, table of contents, list of tables, list of figures,
+% nomenclature) are contained within the file "thesis-frontpgs.tex" which is
+% included into the document by the following statement.
+%----------------------------------------------------------------------
+% FRONT MATERIAL
+%----------------------------------------------------------------------
+\input{frontpgs}
+
+%----------------------------------------------------------------------
+% MAIN BODY
+%----------------------------------------------------------------------
+
+\input{intro}
+
+\input{cforall}
+
+\input{basics}
+
+\input{concurrency}
+
+\input{parallelism}
+
+\input{internals}
+
+\input{together}
+
+\input{results}
+
+\input{future}
+
+
+\clearpage
+
+% B I B L I O G R A P H Y
+% -----------------------------
+\addcontentsline{toc}{chapter}{Bibliography}
+\bibliographystyle{plain}
+\bibliography{pl,local}
+\cleardoublepage
+\phantomsection		% allows hyperref to link to the correct page
+
+% G L O S S A R Y
+% -----------------------------
+\addcontentsline{toc}{chapter}{Glossary}
+\printglossary
+\cleardoublepage
+\phantomsection		% allows hyperref to link to the correct page
+
+
+\end{document}
Index: doc/theses/thierry_delisle/version
===================================================================
--- doc/theses/thierry_delisle/version	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
+++ doc/theses/thierry_delisle/version	(revision 5e2c348d7200f13e88bb910c1e095982dd72bc7b)
@@ -0,0 +1,1 @@
+0.11.403