Changes in / [8b590a4:1c35c78]


Ignore:
Location:
doc
Files:
48 deleted
12 edited

Legend:

Unmodified
Added
Removed
  • doc/bibliography/pl.bib

    r8b590a4 r1c35c78  
    11461146    author      = {Tarditi, David and Elliott, Archibald Samuel and Ruef, Andrew and Hicks, Michael},
    11471147    title       = {Checked C: Making C Safe by Extension},
    1148     booktitle   = {2018 IEEE Cybersecurity Development (SecDev)}
    1149     year = {2018},
    1150     month = {September},
    1151     pages = {53-60},
    1152     publisher = {IEEE},
    1153     url = {https://www.microsoft.com/en-us/research/publication/checkedc-making-c-safe-by-extension/},
    1154 }
    1155 
    1156 @misc{Clang,
    1157     keywords = {clang},
    1158     contributer = {a3moss@uwaterloo.ca},
    1159     title = {Clang: a {C} language family frontend for {LLVM}},
    1160     howpublished = {\href{https://clang.llvm.org/}{https://\-clang.llvm.org/}},
    1161     note = {Accessed 2019-02-22}
     1148    booktitle   = {2018 IEEE Cybersecurity Development (SecDev)},
     1149    publisher   = {IEEE},
     1150    year        = {2018},
     1151    month       = seep,
     1152    pages       = {53-60},
     1153    url         = {https://www.microsoft.com/en-us/research/publication/checkedc-making-c-safe-by-extension/},
    11621154}
    11631155
     
    24122404}
    24132405
    2414 @misc{Dotty-github,
    2415     keywords = {dotty,scala},
    2416     contributer = {a3moss@uwaterloo.ca},
    2417     author = {Martin Odersky},
    2418     title = {Dotty},
    2419     howpublished = {\href{https://github.com/lampepfl/dotty}{https://\-github.com/\-lampepfl/\-dotty}},
    2420     note = {Acessed: 2019-02-22}
    2421 }
    2422 
    24232406@unpublished{Duff83,
    24242407    keywords    = {C, switch statement, control flow},
     
    25262509    pages       = {325-361},
    25272510}
    2528 
    2529 @article{Tarjan75,
    2530  keywords = {union-find},
    2531  contributer = {a3moss@uwaterloo.ca},
    2532  author = {Tarjan, Robert Endre},
    2533  title = {Efficiency of a Good But Not Linear Set Union Algorithm},
    2534  journal = {J. ACM},
    2535  issue_date = {April 1975},
    2536  volume = {22},
    2537  number = {2},
    2538  month = apr,
    2539  year = {1975},
    2540  issn = {0004-5411},
    2541  pages = {215--225},
    2542  numpages = {11},
    2543  url = {http://doi.acm.org/10.1145/321879.321884},
    2544  doi = {10.1145/321879.321884},
    2545  acmid = {321884},
    2546  publisher = {ACM},
    2547  address = {New York, NY, USA},
    2548 }
    25492511
    25502512@book{Eiffel,
     
    43004262}
    43014263
    4302 @misc{Matsakis17,
    4303     keywords    = {Rust, Chalk, PROLOG},
    4304     contributer = {a3moss@uwaterloo.ca},
    4305     author      = {Nicholas Matsakis},
    4306     title       = {Lowering {Rust} traits to logic},
    4307     month       = jan,
    4308     year        = 2017,
    4309     howpublished= {\href{http://smallcultfollowing.com/babysteps/blog/2017/01/26/lowering-rust-traits-to-logic/}
    4310                   {http://smallcultfollowing.com/\-babysteps/\-blog/\-2017/\-01/\-26/\-lowering-rust-traits-to-logic/}},
    4311     optnote     = {Accessed: 2019-01},
    4312 }
    4313 
    43144264@article{Cormack89,
    43154265    keywords    = {parsing, LR, error recovery},
     
    43444294    publisher   = {Motorola},
    43454295    year        = 1992,
    4346 }
    4347 
    4348 @misc{Haberman16,
    4349     keywords    = {C++ template expansion},
    4350     contributer = {a3moss@uwaterloo.ca},
    4351     author      = {Josh Haberman},
    4352     title       = {Making arbitrarily-large binaries from fixed-size {C}{\kern-.1em\hbox{\large\texttt{+\kern-.25em+}}} code},
    4353     year        = 2016,
    4354     howpublished= {\href{http://blog.reverberate.org/2016/01/making-arbitrarily-large-binaries-from.html}
    4355                   {
    4356           {http://blog.reverberate.org/\-2016/\-01/\-making-arbitrarily-large-binaries-from.html}
    4357           }},
    4358     optnote     = {Accessed: 2016-09},
    43594296}
    43604297
  • doc/theses/aaron_moss_PhD/phd/Makefile

    r8b590a4 r1c35c78  
    22BIBDIR = ../../../bibliography
    33EVALDIR = evaluation
    4 FIGDIR = figures
    54TEXLIB = .:${BUILD}:${BIBDIR}:
    65
     
    98BIBTEX = BIBINPUTS=${TEXLIB} && export BIBINPUTS && bibtex
    109
    11 VPATH = ${EVALDIR} ${FIGDIR}
     10VPATH = ${EVALDIR}
    1211
    1312BASE = thesis
     
    2322background \
    2423generic-types \
     24type-environment \
    2525resolution-heuristics \
    26 type-environment \
    27 experiments \
    2826conclusion \
    29 generic-bench \
    30 }
    31 
    32 FIGURES = ${addsuffix .eps, \
    33 safe-conv-graph \
    34 resolution-dag \
    35 union-find-with-classes \
    36 persistent-union-find \
    3727}
    3828
    3929GRAPHS = ${addsuffix .tex, \
    4030generic-timing \
    41 tests-completed \
    42 per-prob-histo \
    43 per-prob-depth \
    44 cfa-time \
    4531}
    4632
     
    6147        dvips ${BUILD}/$< -o ${BUILD}/$@
    6248
    63 ${BASE}.dvi : Makefile ${SOURCES} ${GRAPHS} ${FIGURES} ${BIBFILE} ${BUILD}
     49${BASE}.dvi : Makefile ${SOURCES} ${GRAPHS} ${BIBFILE} ${BUILD}
    6450        ${LATEX} ${BASE}
    6551        ${BIBTEX} ${BUILD}/${BASE}
     
    6753        ${LATEX} ${BASE}
    6854
    69 generic-timing.tex : generic-timing.gp generic-timing.dat ${BUILD}
     55${GRAPHS} : generic-timing.gp generic-timing.dat ${BUILD}
    7056        gnuplot -e BUILD="'${BUILD}/'" ${EVALDIR}/generic-timing.gp
    71        
    72 tests-completed.tex : algo-summary.gp algo-summary.dat bu-summary.dat ${BUILD}
    73         gnuplot -e BUILD="'${BUILD}/'" ${EVALDIR}/algo-summary.gp
    74 
    75 per-prob-histo.tex : per-prob.gp per-prob.tsv ${BUILD}
    76         gnuplot -e BUILD="'${BUILD}/'" ${EVALDIR}/per-prob.gp
    77 
    78 per-prob-depth.tex : per-prob-scatter.gp ${BUILD}
    79         gnuplot -e BUILD="'${BUILD}/'" ${EVALDIR}/per-prob-scatter.gp
    80 
    81 cfa-time.tex : cfa-plots.gp cfa-time.tsv cfa-mem.tsv ${BUILD}
    82         gnuplot -e BUILD="'${BUILD}/'" ${EVALDIR}/cfa-plots.gp
    8357
    8458${BUILD}:
  • doc/theses/aaron_moss_PhD/phd/background.tex

    r8b590a4 r1c35c78  
    11\chapter{\CFA{}}
    2 \label{cfa-chap}
    32
    43\CFA{} adds a number of features to C, some of them providing significant increases to the expressive power of the language, but all designed to maintain the existing procedural programming paradigm of C and to be as orthogonal as possible to each other.
    54To provide background for the contributions in subsequent chapters, this chapter provides a summary of the features of \CFA{} at the time this work was conducted.
    65
    7 The core design of \CFA{} is laid out in Glen Ditchfield's 1992 PhD thesis, \emph{Contextual Polymorphism} \cite{Ditchfield92}; in that thesis, Ditchfield presents the theoretical underpinnings of the \CFA{} polymorphism model.
    8 Building on Ditchfield's design for contextual polymorphism as well as KW-C \cite{Buhr94a}, an earlier set of (largely syntactic) extensions to C, Richard Bilson \cite{Bilson03} built the first version of the \CFA{} compiler, \CFACC{}, in the early 2000's.
    9 This early \CFACC{} provided basic functionality, but incorporated a number of algorithmic choices that have failed to scale as \CFA{} has developed, lacking the runtime performance for practical use; this thesis is substantially concerned with rectifying those deficits.
    10 
    11 The \CFA{} project was revived in 2015 with the intention of building a production-ready language and compiler; at the time of this writing, both \CFA{} and \CFACC{} remain under active development.
    12 As this development has been proceeding concurrently with the work described in this thesis, the state of \CFA{} has been somewhat of a moving target; however, Moss~\etal~\cite{Moss18} provides a reasonable summary of the current design.
    13 Notable features added during this period include generic types (Chapter~\ref{generic-chap}), constructors and destructors \cite{Schluntz17}, improved support for tuples \cite{Schluntz17}, reference types \cite{Moss18}, first-class concurrent and parallel programming support \cite{Delisle18}, as well as numerous pieces of syntactic sugar and the start of an idiomatic standard library \cite{Moss18}.
     6The core design of \CFA{} is laid out in Glen Ditchfield's 1992 PhD thesis, \emph{Contextual Polymorphism}\cite{Ditchfield92}; in that thesis, Ditchfield presents the theoretical underpinnings of the \CFA{} polymorphism model.
     7Building on Ditchfield's design for contextual polymorphism as well as KW-C\cite{Buhr94a}, an earlier set of (largely syntactic) extensions to C, Richard Bilson\cite{Bilson03} built the first version of the \CFA{} compiler, \CFACC{}, in the early 2000's.
     8This early \CFACC{} provided basic functionality, but incorporated a number of poor algorithmic choices due to a rushed implementation time frame, and as such lacked the runtime performance required for practical use; this thesis is substantially concerned with rectifying those deficits.
     9
     10The \CFA{} project was revived in 2015 with the intention of building a production-ready language and compiler; at the time of this writing, both \CFA{} and \CFACC{} have been under active development continuously since.
     11As this development has been proceeding concurrently with the work described in this thesis, the state of \CFA{} has been somewhat of a moving target; however, Moss~\etal\cite{Moss18} provides a reasonable summary of the current design.
     12Notable features added during this period include generic types (Chapter~\ref{generic-chap}), constructors and destructors\cite{Schluntz17}, improved support for tuples\cite{Schluntz17}, reference types\cite{Moss18}, first-class concurrent and parallel programming support\cite{Delisle18}, as well as numerous pieces of syntactic sugar and the start of an idiomatic standard library\cite{Moss18}.
     13
     14\section{\CFA{} Features}
    1415
    1516The selection of features presented in this chapter are chosen to elucidate the design constraints of the work presented in this thesis.
    16 In some cases the interactions of multiple features make this design a significantly more complex problem than any individual feature; in other cases a feature that does not by itself add any complexity to expression resolution triggers previously rare edge cases more frequently.
    17 
    18 \section{Procedural Paradigm}
     17In some cases the interactions of multiple features make this design a significantly more complex problem than any individual feature would; in other cases a feature that does not by itself add any complexity to expression resolution triggers previously rare edge cases more frequently.
     18
     19\subsection{Procedural Paradigm}
    1920
    2021It is important to note that \CFA{} is not an object-oriented language.
    21 This is a deliberate choice intended to maintain the applicability of the programming model and language idioms already possessed by C programmers.
    22 This choice is in marked contrast to \CC{}, which is a much larger and more complex language, and requires extensive developer re-training to write idiomatic, efficient code in \CC{}'s object-oriented paradigm.
    23 
     22This is a deliberate choice intended to maintain the applicability of the mental model and language idioms already possessed by C programmers.
     23This choice is in marked contrast to \CC{}, which, though it has backward-compatibility with C on the source code level, is a much larger and more complex language, and requires extensive developer re-training to write idiomatic, efficient code in \CC{}'s object-oriented paradigm.
     24
     25\CFA{} does have a system of implicit type conversions derived from C's ``usual arithmetic conversions''; while these conversions may be thought of as something like an inheritance hierarchy, the underlying semantics are significantly different and such an analogy is loose at best.
    2426Particularly, \CFA{} has no concept of \emph{subclass}, and thus no need to integrate an inheritance-based form of polymorphism with its parametric and overloading-based polymorphism.
    25 While \CFA{} does have a system of implicit type conversions derived from C's ``usual arithmetic conversions'' \cite[\S{}6.3.1.8]{C11} and these conversions may be thought of as something like an inheritance hierarchy, the underlying semantics are significantly different and such an analogy is loose at best.
    26 The graph structure of the \CFA{} type conversions (discussed in Section~\ref{conv-cost-sec}) is also markedly different than an inheritance hierarchy; it has neither a top nor a bottom type, and does not satisfy the lattice properties typical of inheritance hierarchies.
     27The graph structure of the \CFA{} type conversions is also markedly different than an inheritance hierarchy; it has neither a top nor a bottom type, and does not satisfy the lattice properties typical of inheritance hierarchies.
    2728
    2829Another aspect of \CFA{}'s procedural paradigm is that it retains C's translation-unit-based encapsulation model, rather than class-based encapsulation such as in \CC{}.
    29 This design choice implies that separate compilation must be maintained to allow headers to act as an encapsulation boundary, rather than the header-only libraries used by \CC{} templates.
    30 
    31 \section{Name Overloading} \label{overloading-sec}
    32 
    33 In C, no more than one variable or function in the same scope may share the same name\footnote{Technically, C has multiple separated namespaces, one holding \lstinline{struct}, \lstinline{union}, and \lstinline{enum} tags, one holding labels, one holding \lstinline{typedef} names, variable, function, and enumerator identifiers, and one for each \lstinline{struct} and \lstinline{union} type holding the field names \cite[\S{}6.2.3]{C11}.}, and variable or function declarations in inner scopes with the same name as a declaration in an outer scope hide the outer declaration.
    34 This restriction makes finding the proper declaration to match to a variable expression or function application a simple matter of lexically-scoped name lookup, which can be easily and efficiently implemented.
     30This choice implies that that separate compilation must be maintained to allow headers to act as an encapsulation boundary, rather than the header-only libraries used by \CC{} templates.
     31
     32\subsection{Name Overloading} \label{overloading-sec}
     33
     34In C, no more than one variable or function in the same scope may share the same name\footnote{Technically, C has multiple separated namespaces, one holding \lstinline{struct}, \lstinline{union}, and \lstinline{enum} tags, one holding labels, one holding \lstinline{typedef} names, variable, function, and enumerator identifiers, and one for each \lstinline{struct} and \lstinline{union} type holding the field names\cit{}.}, and variable or function declarations in inner scopes with the same name as a declaration in an outer scope hide the outer declaration.
     35This restriction makes finding the proper declaration to match to a variable expression or function application a simple matter of symbol-table lookup, which can be easily and efficiently implemented.
    3536\CFA{}, on the other hand, allows overloading of variable and function names so long as the overloaded declarations do not have the same type, avoiding the multiplication of variable and function names for different types common in the C standard library, as in the following example:
    3637
     
    4950\end{cfa}
    5051
    51 The final expression in the preceding example includes a feature of \CFA{} name overloading not shared by \CC{}, the ability to disambiguate expressions based on their return type. This provides greater flexibility and power than the parameter-based overload selection of \CC{}, though at the cost of greater complexity in the resolution algorithm.
    52 
    53 While the wisdom of giving both the maximum value of a type and the function to take the maximum of two values the same name in the example above is debatable, \eg{} some developers may prefer !MAX! for the former, the guiding philosophy of \CFA{} is ``describe, don't prescribe'' --- we prefer to trust programmers with powerful tools, and there is no technical reason to restrict overloading between variables and functions.
    54 However, the expressivity of \CFA{}'s name overloading does have the consequence that simple table lookup is insufficient to match identifiers to declarations, and a type-matching algorithm must be part of expression resolution.
    55 
    56 \subsection{Operator Overloading}
     52While the wisdom of giving both the maximum value of a type and the function to take the maximum of two values the same name is debatable, \eg{} some developers may prefer !MAX! for the former, the guiding philosophy of \CFA{} is ``describe, don't prescribe'' --- we prefer to trust programmers with powerful tools, and there is no technical reason to restrict overloading between variables and functions.
     53However, the expressivity of \CFA{}'s name overloading has the consequence that simple table lookup is insufficient to match identifiers to declarations, and a type-matching algorithm must be part of expression resolution.
     54
     55\subsubsection{Operator Overloading}
    5756
    5857C does allow name overloading in one context: operator overloading.
    5958From the perspective of the type system, there is nothing special about operators as opposed to other functions, nor is it desirable to restrict the clear and readable syntax of operators to only the built-in types.
    60 For these reasons, \CFA{}, like \CC{} and many other programming languages, allows overloading of operators by writing specially-named functions where !?! stands in for the operands.
    61 This syntax is more natural than the operator overloading syntax of \CC{}, which requires ``dummy'' parameters to disambiguate overloads of similarly-named pre- and postfix operators\footnote{This example uses \CFA{}'s reference types, described in Section~\ref{type-features-sec}}:
     59For these reasons, \CFA{} also allows overloading of operators by writing specially-named functions where !?! stands in for the operands\footnote{This example uses \CFA{}'s reference types, described in Section~\ref{type-features-sec}}:
    6260
    6361\begin{cfa}
     
    7371\end{cfa}
    7472
    75 Together, \CFA{}'s backward-compatibility with C and the inclusion of this operator overloading feature imply that \CFA{} must select among function overloads using a method compatible with C's ``usual arithmetic conversions'' \cite[\S{}6.3.1.8]{C11}, so as to present user programmers with only a single set of overloading rules.
    76 
    77 \subsection{Special Literal Types}
     73Together, \CFA{}'s backward-compatibility with C and the inclusion of this operator overloading feature imply that \CFA{} must select among function overloads using a method compatible with C's ``usual arithmetic conversions''\cit{}, so as to present user programmers with only a single set of overloading rules.
     74
     75\subsubsection{Special Literal Types}
    7876
    7977Literal !0! is also used polymorphically in C; it may be either integer zero or the null value of any pointer type.
    80 \CFA{} provides a special type for the !0! literal, !zero_t!, so that users can define a zero value for their own types without being forced to create a conversion from an integer or pointer type; \CFA{} also includes implicit conversions from !zero_t! to the !int! and pointer type constructors\footnote{See Section~\ref{type-features-sec}} from !zero_t! for backward compatibility.
    81 
    82 According to the C standard \cite[\S{}6.8.4.1]{C11}, !0! is the only false value; any value that compares equal to zero is false, while any value that does not is true.
    83 By this rule, Boolean contexts such as !if ( x )! can always be equivalently rewritten as \lstinline{if ( (x) != 0 )}.
    84 \CFACC{} applies this rewriting in all Boolean contexts, so any type !T! can be made ``truthy'' (that is, given a Boolean interpretation) in \CFA{} by defining an operator overload \lstinline{int ?!=?(T, zero_t)}.
    85 \CC{} takes a different approach to user-defined truthy types, allowing definition of an implicit conversion operator to !bool!; prior to the introduction of the !explicit! keyword for conversion operators in \CCeleven{} this approach also allowed undesired implicit conversions to all other arithmetic types, a shortcoming not shared by the \CFA{} design.
     78\CFA{} provides a special type for the !0! literal, !zero_t!, so that users can define a zero value for their own types without being forced to create a conversion from an integer or pointer type (though \CFA{} also includes implicit conversions from !zero_t! to the integer and pointer types for backward compatibility).
     79
     80According to the C standard\cit{}, !0! is the only false value; any value that compares equal to zero is false, while any value that does not is true.
     81By this rule, boolean contexts such as !if ( x )! can always be equivalently rewritten as \lstinline{if ( (x) != 0 )}.
     82\CFACC{} applies this rewriting in all boolean contexts, so any type !T! can be made ``truthy'' (that is, given a boolean interpretation) in \CFA{} by defining an operator overload \lstinline{int ?!=?(T, zero_t)}; unlike \CC{} prior to the addition of explicit casts in \CCeleven{}, this design does not add comparability or convertablity to arbitrary integer types.
    8683
    8784\CFA{} also includes a special type for !1!, !one_t!; like !zero_t!, !one_t! has built-in implicit conversions to the various integral types so that !1! maintains its expected semantics in legacy code.
     
    8986
    9087\CFA{} previously allowed !0! and !1! to be the names of polymorphic variables, with separate overloads for !int 0!, !int 1!, and !forall(dtype T) T* 0!.
    91 While designing \CFA{} generic types (see Chapter~\ref{generic-chap}), it was discovered that the parametric polymorphic zero variable is not generalizable to other types; though all null pointers have the same in-memory representation, the same cannot be said of the zero values of arbitrary types.
     88As revealed in my own work on generic types (Chapter~\ref{generic-chap}), the parameteric polymorphic zero variable was not generalizable to other types; though all null pointers have the same in-memory representation, the same cannot be said of the zero values of arbitrary types.
    9289As such, variables that could represent !0! and !1! were phased out in favour of functions that could generate those values for a given type as appropriate.
    9390
    94 \section{Polymorphic Functions} \label{poly-func-sec}
    95 
    96 The most significant type-system feature \CFA{} adds is parametric-polymorphic functions.
     91\subsection{Polymorphic Functions} \label{poly-func-sec}
     92
     93The most significant feature \CFA{} adds is parametric-polymorphic functions.
    9794Such functions are written using a !forall! clause (which gives the language its name):
    9895
     
    105102The type variable !T! is transformed into a set of additional implicit parameters to !identity!, which encode sufficient information about !T! to create and return a variable of that type.
    106103\CFA{} passes the size and alignment of the type represented by an !otype! parameter, as well as a default constructor, copy constructor, assignment operator, and destructor.
    107 Types which do not have one or more of these operators visible cannot be bound to !otype! parameters, but may be bound to un-constrained !dtype! (``data type'') type variables.
    108 In this design, the runtime cost of polymorphism is spread over each polymorphic call, due to passing more arguments to polymorphic functions; the experiments in Chapter~\ref{generic-chap} indicate that this overhead is comparable to that of \CC{} virtual function calls.
    109 % \TODO{rerun experiments, possibly look at vtable variant}
     104Types which do not have one or more of these operators visible cannot be bound to !otype! parameters.
     105In this design, the runtime cost of polymorphism is spread over each polymorphic call, due to passing more arguments to polymorphic functions; experiments have shown this overhead to be similar to \CC{} virtual function calls. \TODO{rerun experiments, possibly look at vtable variant}
    110106
    111107One benefit of this design is that it allows polymorphic functions to be separately compiled.
     
    113109The fact that there is only one implementation of each polymorphic function also reduces compile times relative to the template-expansion approach taken by \CC{}, as well as reducing binary sizes and runtime pressure on instruction cache by re-using a single version of each function.
    114110
    115 \subsection{Type Assertions}
    116 
    117 Since bare polymorphic types do not provide a great range of available operations, \CFA{} provides a \emph{type assertion} mechanism to provide further information about a type\footnote{Subscript not included in source code.\label{sub-foot}}:
     111\subsubsection{Type Assertions}
     112
     113Since bare polymorphic types do not provide a great range of available operations, \CFA{} provides a \emph{type assertion} mechanism to provide further information about a type:
    118114
    119115\begin{cfa}
    120116forall(otype T `| { T twice(T); }`)
    121117T four_times(T x) { return twice( twice(x) ); }
    122 double twice$\(_1\)$(double d) { return d * 2.0; }
     118double twice(double d) { return d * 2.0; }  $\C[2.75in]{// (1)}$
    123119
    124120double ans = four_times( 10.5 );  $\C[2.75in]{// T bound to double, ans == 42.0}$
     
    129125
    130126Monomorphic specializations of polymorphic functions can themselves be used to satisfy type assertions.
    131 For instance, !twice! could have been defined like this\footref{sub-foot}:
     127For instance, !twice! could have been defined like this:
    132128
    133129\begin{cfa}
    134130forall(otype S | { S ?+?(S, S); })
    135 S twice$\(_2\)$(S x) { return x + x; }
    136 \end{cfa}
    137 
    138 Specializing this polymorphic function with !S = double! produces a monomorphic function which can  be used to satisfy the type assertion on !four_times!.
    139 \CFACC{} accomplishes this by creating a wrapper function calling !twice!$_2$ with !S! bound to !double!, then providing this wrapper function to !four_times!\footnote{\lstinline{twice}$_2$ could also have had a type parameter named \lstinline{T}; \CFA{} specifies renaming of the type parameters, which would avoid the name conflict with the type variable \lstinline{T} of \lstinline{four_times}}.
    140 However, !twice!$_2$ also works for any type !S! that has an addition operator defined for it.
     131S twice(S x) { return x + x; }  $\C[2.75in]{// (2)}
     132\end{cfa}
     133
     134This version of !twice! works for any type !S! that has an addition operator defined for it, and it could be used to satisfy the type assertion on !four_times!.
     135\CFACC{} accomplishes this by creating a wrapper function calling !twice//(2)! with !S! bound to !double!, then providing this wrapper function to !four_times!\footnote{\lstinline{twice // (2)} could also have had a type parameter named \lstinline{T}; \CFA{} specifies renaming of the type parameters, which would avoid the name conflict with the type variable \lstinline{T} of \lstinline{four_times}}.
    141136
    142137Finding appropriate functions to satisfy type assertions is essentially a recursive case of expression resolution, as it takes a name (that of the type assertion) and attempts to match it to a suitable declaration in the current scope.
    143138If a polymorphic function can be used to satisfy one of its own type assertions, this recursion may not terminate, as it is possible that that function is examined as a candidate for its own assertion unboundedly repeatedly.
    144 To avoid such infinite loops, \CFACC{} imposes a fixed limit on the possible depth of recursion, similar to that employed by most \CC{} compilers for template expansion; this restriction means that there are some otherwise semantically well-typed expressions that cannot be resolved by \CFACC{}.
    145 
    146 % \subsection{Deleted Declarations}
    147 
    148 % Particular type combinations can be exempted from matching a given polymorphic function through use of a \emph{deleted function declaration}:
    149 
    150 % \begin{cfa}
    151 % int somefn(char) = void;
    152 % \end{cfa}
    153 
    154 % This feature is based on a \CCeleven{} feature typically used to make a type non-copyable by deleting its copy constructor and assignment operator\footnote{In previous versions of \CC{}, a type could be made non-copyable by declaring a private copy constructor and assignment operator, but not defining either. This idiom is well-known, but depends on some rather subtle and \CC{}-specific rules about private members and implicitly-generated functions; the deleted function form is both clearer and less verbose.} or forbidding some interpretations of a polymorphic function by specifically deleting the forbidden overloads\footnote{Specific polymorphic function overloads can also be forbidden in previous \CC{} versions through use of template metaprogramming techniques, though this advanced usage is beyond the skills of many programmers. A similar effect can be produced on an ad-hoc basis at the appropriate call sites through use of casts to determine the function type. In both cases, the deleted-function form is clearer and more concise.}.
    155 % Deleted function declarations are implemented in \CFACC{} by adding them to the symbol table as usual, but with a flag set that indicates that the function is deleted.
    156 % If this deleted declaration is selected as the unique minimal-cost interpretation of an expression than an error is produced.
    157 
    158 \subsection{Traits}
    159 
    160 \CFA{} provides \emph{traits} as a means to name a group of type assertions, as in the example below\footnote{This example uses \CFA{}'s reference types and constructors, described in Section~\ref{type-features-sec}.}:
     139To avoid such infinite loops, \CFACC{} imposes a fixed limit on the possible depth of recursion, similar to that employed by most \CC{} compilers for template expansion; this restriction means that there are some semantically well-typed expressions that cannot be resolved by \CFACC{}.
     140\TODO{Update this with final state} One contribution made in the course of this thesis was modifying \CFACC{} to use the more flexible expression resolution algorithm for assertion matching, rather than the simpler but limited previous approach of unification on the types of the functions.
     141
     142\subsubsection{Deleted Declarations}
     143
     144Particular type combinations can be exempted from matching a given polymorphic function through use of a \emph{deleted function declaration}:
     145
     146\begin{cfa}
     147int somefn(char) = void;
     148\end{cfa}
     149
     150This feature is based on a \CCeleven{} feature typically used to make a type non-copyable by deleting its copy constructor and assignment operator\footnote{In previous versions of \CC{}, a type could be made non-copyable by declaring a private copy constructor and assignment operator, but not defining either. This idiom is well-known, but depends on some rather subtle and \CC{}-specific rules about private members and implicitly-generated functions; the deleted function form is both clearer and less verbose.} or forbidding some interpretations of a polymorphic function by specifically deleting the forbidden overloads\footnote{Specific polymorphic function overloads can also be forbidden in previous \CC{} versions through use of template metaprogramming techniques, though this advanced usage is beyond the skills of many programmers. A similar effect can be produced on an ad-hoc basis at the appropriate call sites through use of casts to determine the function type. In both cases, the deleted-function form is clearer and more concise.}.
     151Deleted function declarations are implemented in \CFACC{} by adding them to the symbol table as usual, but with a flag set that indicates that the function is deleted.
     152If this deleted declaration is selected as the unique minimal-cost interpretation of an expression than an error is produced. \TODO{Check this is implemented at print.}
     153
     154\subsubsection{Traits}
     155
     156\CFA{} provides \emph{traits} as a means to name a group of type assertions, as in the example below\footnote{This example uses \CFA{}'s reference types, constructors, and zero type, described in Section~\ref{type-features-sec}.}:
    161157
    162158\begin{cfa}
     
    175171
    176172Semantically, traits are simply a named list of type assertions, but they may be used for many of the same purposes that interfaces in Java or abstract base classes in \CC{} are used for.
    177 Unlike Java interfaces or \CC{} base classes, \CFA{} types do not explicitly state any inheritance relationship to traits they satisfy; this can be considered a form of structural inheritance, similar to interface implementation in Go, as opposed to the nominal inheritance model of Java and \CC{}.
     173Unlike Java interfaces or \CC{} base classes, \CFA{} types do not explicitly state any inheritance relationship to traits they satisfy; this can be considered a form a structural inheritance, similar to interface implementation in Go, as opposed to the nominal inheritance model of Java and \CC{}.
    178174Nominal inheritance can be simulated in \CFA{} using marker variables in traits:
    179175
     
    190186
    191187Traits, however, are significantly more powerful than nominal-inheritance interfaces; firstly, due to the scoping rules of the declarations that satisfy a trait's type assertions, a type may not satisfy a trait everywhere that that type is declared, as with !char! and the !nominal! trait above.
    192 Secondly, because \CFA{} is not object-oriented and types do not have a closed set of methods, existing C library types can be extended to implement a trait simply by writing the requisite functions\footnote{\CC{} only allows partial extension of C types, because constructors, destructors, conversions, and the assignment, indexing, and function-call operators may only be defined in a class; \CFA{} does not have any of these restrictions.}.
     188Secondly, because \CFA{} is not object-oriented and types do not have a closed set of methods, existing C library types can be extended to implement a trait simply by writing the requisite functions.
    193189Finally, traits may be used to declare a relationship among multiple types, a property that may be difficult or impossible to represent in nominal-inheritance type systems\footnote{This example uses \CFA{}'s reference types, described in Section~\ref{type-features-sec}.}:
    194190
     
    210206\end{cfa}
    211207
    212 In this example above, !(list_iterator, int)! satisfies !pointer_like! by the user-defined dereference function, and !(list_iterator, list)! also satisfies !pointer_like! by the built-in dereference operator for pointers.
     208In the example above, !(list_iterator, int)! satisfies !pointer_like! by the user-defined dereference function, and !(list_iterator, list)! also satisfies !pointer_like! by the built-in dereference operator for pointers.
    213209Given a declaration !list_iterator it!, !*it! can be either an !int! or a !list!, with the meaning disambiguated by context (\eg{} !int x = *it;! interprets !*it! as !int!, while !(*it).value = 42;! interprets !*it! as !list!).
    214210While a nominal-inheritance system with associated types could model one of those two relationships by making !El! an associated type of !Ptr! in the !pointer_like! implementation, few such systems could model both relationships simultaneously.
    215211
    216212The flexibility of \CFA{}'s implicit trait-satisfaction mechanism provides programmers with a great deal of power, but also blocks some optimization approaches for expression resolution.
    217 The ability of types to begin or cease to satisfy traits when declarations go into or out of scope makes caching of trait satisfaction judgments difficult, and the ability of traits to take multiple type parameters can lead to a combinatorial explosion of work in any attempt to pre-compute trait satisfaction relationships.
    218 
    219 \section{Implicit Conversions} \label{implicit-conv-sec}
    220 
    221 In addition to the multiple interpretations of an expression produced by name overloading and polymorphic functions, \CFA{} must support all of the implicit conversions present in C for backward compatibility, producing further candidate interpretations for expressions.
    222 As mentioned above, C does not have an inheritance hierarchy of types, but the C standard's rules for the ``usual arithmetic conversions'' \cite[\S{}6.3.1.8]{C11} define which of the built-in types are implicitly convertible to which other types, as well as which implicit conversions to apply for mixed arguments to binary operators.
     213The ability of types to begin or cease to satisfy traits when declarations go into or out of scope makes caching of trait satisfaction judgements difficult, and the ability of traits to take multiple type parameters can lead to a combinatorial explosion of work in any attempt to pre-compute trait satisfaction relationships.
     214
     215\subsection{Implicit Conversions} \label{implicit-conv-sec}
     216
     217In addition to the multiple interpretations of an expression produced by name overloading and polymorphic functions, for backward compatibility \CFA{} must support all of the implicit conversions present in C, producing further candidate interpretations for expressions.
     218As mentioned above, C does not have an inheritance hierarchy of types, but the C standard's rules for the ``usual arithmetic conversions'\cit{} define which of the built-in types are implicitly convertible to which other types, and the relative cost of any pair of such conversions from a single source type.
    223219\CFA{} adds rules to the usual arithmetic conversions defining the cost of binding a polymorphic type variable in a function call; such bindings are cheaper than any \emph{unsafe} (narrowing) conversion, \eg{} !int! to !char!, but more expensive than any \emph{safe} (widening) conversion, \eg{} !int! to !double!.
    224 One contribution of this thesis, discussed in Section~\ref{conv-cost-sec}, is a number of refinements to this cost model to more efficiently resolve polymorphic function calls.
    225 
    226 The expression resolution problem, which is the focus of Chapter~\ref{resolution-chap}, is to find the unique minimal-cost interpretation of each expression in the program, where all identifiers must be matched to a declaration, and implicit conversions or polymorphic bindings of the result of an expression may increase the cost of the expression.
    227 While semantically valid \CFA{} code always has such a unique minimal-cost interpretation, \CFACC{} must also be able to detect and report as errors expressions that have either no interpretation or multiple ambiguous minimal-cost interpretations.
     220One contribution of this thesis, discussed in Section \TODO{add to resolution chapter}, is a number of refinements to this cost model to more efficiently resolve polymorphic function calls.
     221
     222The expression resolution problem which is the focus of Chapter~\ref{resolution-chap} is to find the unique minimal-cost interpretation of each expression in the program, where all identifiers must be matched to a declaration, and implicit conversions or polymorphic bindings of the result of an expression may increase the cost of the expression.
     223While semantically valid \CFA{} code always has such a unique minimal-cost interpretation, \CFACC{} must also be able to detect and report as errors expressions which have either no interpretation or multiple ambiguous minimal-cost interpretations.
    228224Note that which subexpression interpretation is minimal-cost may require contextual information to disambiguate.
    229225For instance, in the example in Section~\ref{overloading-sec}, !max(max, -max)! cannot be unambiguously resolved, but !int m = max(max, -max)! has a single minimal-cost resolution.
    230 While the interpretation !int m = (int)max((double)max, -(double)max)! is also a valid interpretation, it is not minimal-cost due to the unsafe cast from the !double! result of !max! to !int!\footnote{The two \lstinline{double} casts function as type ascriptions selecting \lstinline{double max} rather than casts from \lstinline{int max} to \lstinline{double}, and as such are zero-cost. The \lstinline{int} to \lstinline{double} conversion could be forced if desired with two casts: \lstinline{(double)(int)max}}.
     226While the interpretation !int m = (int)max((double)max, -(double)max)! is also a valid interpretation, it is not minimal-cost due to the unsafe cast from the !double! result of !max! to !int!\footnote{The two \lstinline{double} casts function as type ascriptions selecting \lstinline{double max} rather than casts from \lstinline{int max} to \lstinline{double}, and as such are zero-cost.}.
    231227These contextual effects make the expression resolution problem for \CFA{} both theoretically and practically difficult, but the observation driving the work in Chapter~\ref{resolution-chap} is that of the many top-level expressions in a given program, most are straightforward and idiomatic so that programmers writing and maintaining the code can easily understand them; it follows that effective heuristics for common cases can bring down compiler runtime enough that a small proportion of harder-to-resolve expressions does not inordinately increase overall compiler runtime or memory usage.
    232228
    233 \section{Type Features} \label{type-features-sec}
    234 
    235 The name overloading and polymorphism features of \CFA{} have the greatest effect on language design and compiler runtime, but there are a number of other features in the type system that have a smaller effect but are useful for code examples.
     229\subsection{Type Features} \label{type-features-sec}
     230
     231The name overloading and polymorphism features of \CFA{} have the greatest effect on language design and compiler runtime, but there are a number of other features in the type system which have a smaller effect but are useful for code examples.
    236232These features are described here.
    237233
    238 \subsection{Reference Types}
    239 
    240 One of the key ergonomic improvements in \CFA{} is reference types, designed and implemented by Robert Schluntz \cite{Schluntz17}.
     234\subsubsection{Reference Types}
     235
     236One of the key ergonomic improvements in \CFA{} is reference types, designed and implemented by Robert Schluntz\cite{Schluntz17}.
    241237Given some type !T!, a !T&! (``reference to !T!'') is essentially an automatically dereferenced pointer.
    242 These types allow seamless pass-by-reference for function parameters, without the extraneous dereferencing syntax present in C; they also allow easy aliasing of nested values with a similarly convenient syntax.
    243 A particular improvement is removing syntactic special cases for operators that take or return mutable values; for example, the use !a += b! of a compound assignment operator now matches its signature, !int& ?+=?(int&, int)!, as opposed to the syntactic special cases in earlier versions of \CFA{} to automatically take the address of the first argument to !+=! and to mark its return value as mutable.
    244 
    245 The C standard makes heavy use of the concept of \emph{lvalue}, an expression with a memory address; its complement, \emph{rvalue} (a non-addressable expression) is not explicitly named in the standard.
    246 In \CFA{}, the distinction between lvalue and rvalue can be re-framed in terms of reference and non-reference types, with the benefit of being able to express the difference in user code.
     238These types allow seamless pass-by-reference for function parameters, without the extraneous dereferencing syntax present in C; they also allow easy easy aliasing of nested values with a similarly convenient syntax.
     239A particular improvement is removing syntactic special cases for operators which take or return mutable values; for example, the use !a += b! of a compound assignment operator now matches its signature, !int& ?+=?(int&, int)!, as opposed to the previous syntactic special cases to automatically take the address of the first argument to !+=! and to mark its return value as mutable.
     240
     241The C standard makes heavy use of the concept of \emph{lvalue}, an expression with a memory address; its complement, \emph{rvalue} (a non-addressable expression) is not explicitly named.
     242In \CFA{}, the distinction between lvalue and rvalue can be reframed in terms of reference and non-reference types, with the benefit of being able to express the difference in user code.
    247243\CFA{} references preserve the existing qualifier-dropping implicit lvalue-to-rvalue conversion from C (\eg{} a !const volatile int&! can be implicitly copied to a bare !int!)
    248 To make reference types more easily usable in legacy pass-by-value code, \CFA{} also adds an implicit rvalue-to-lvalue conversion, implemented by storing the value in a compiler-generated temporary variable and passing a reference to that temporary.
    249 To mitigate the ``!const! hell'' problem present in \CC{}, there is also a qualifier-dropping lvalue-to-lvalue conversion implemented by copying into a temporary:
     244To make reference types more easily usable in legacy pass-by-value code, \CFA{} also adds an implicit rvalue-to-lvalue conversion, implemented by storing the value in a fresh compiler-generated temporary variable and passing a reference to that temporary.
     245To mitigate the ``!const! hell'' problem present in \CC{}, there is also a qualifier-dropping lvalue-to-lvalue conversion, also implemented by copying into a temporary:
    250246
    251247\begin{cfa}
     
    263259The primary issue with \CC{} references is that it is impossible to extract the address of the reference variable rather than the address of the referred-to variable.
    264260This breaks a number of the usual compositional properties of the \CC{} type system, \eg{} a reference cannot be re-bound to another variable, nor is it possible to take a pointer to, array of, or reference to a reference.
    265 \CFA{} supports all of these use cases without further added syntax.
     261\CFA{} supports all of these use cases \TODO{test array} without further added syntax.
    266262The key to this syntax-free feature support is an observation made by the author that the address of a reference is a lvalue.
    267263In C, the address-of operator !&x! can only be applied to lvalue expressions, and always produces an immutable rvalue; \CFA{} supports reference re-binding by assignment to the address of a reference, and pointers to references by repeating the address-of operator:
     
    276272For better compatibility with C, the \CFA{} team has chosen not to differentiate function overloads based on top-level reference types, and as such their contribution to the difficulty of \CFA{} expression resolution is largely restricted to the implementation details of normalization conversions and adapters.
    277273
    278 \subsection{Resource Management}
    279 
    280 \CFA{} also supports the RAII (``Resource Acquisition is Initialization'') idiom originated by \CC{}, thanks to the object lifetime work of Robert Schluntz \cite{Schluntz17}.
     274\subsubsection{Resource Management}
     275
     276\CFA{} also supports the RAII (``Resource Acquisition is Initialization'') idiom originated by \CC{}, thanks to the object lifetime work of Robert Schluntz\cite{Schluntz17}.
    281277This idiom allows a safer and more principled approach to resource management by tying acquisition of a resource to object initialization, with the corresponding resource release executed automatically at object finalization.
    282278A wide variety of conceptual resources may be conveniently managed by this scheme, including heap memory, file handles, and software locks.
     
    329325\end{cfa}
    330326
    331 \subsection{Tuple Types}
     327\subsubsection{Tuple Types}
    332328
    333329\CFA{} adds \emph{tuple types} to C, a syntactic facility for referring to lists of values anonymously or with a single identifier.
    334330An identifier may name a tuple, a function may return one, and a tuple may be implicitly \emph{destructured} into its component values.
    335331The implementation of tuples in \CFACC{}'s code generation is based on the generic types introduced in Chapter~\ref{generic-chap}, with one compiler-generated generic type for each tuple arity.
    336 This reuse allows tuples to take advantage of the same runtime optimizations available to generic types, while reducing code bloat.
    337 An extended presentation of the tuple features of \CFA{} can be found in \cite{Moss18}, but the following example demonstrates the basic features:
    338 
    339 \begin{cfa}
    340 [char, char] x$\(_1\)$ = ['!', '?']; $\C{// tuple type and expression syntax}$
    341 int x$\(_2\)$ = 2;
     332This allows tuples to take advantage of the same runtime optimizations available to generic types, while reducing code bloat.
     333An extended presentation of the tuple features of \CFA{} can be found in \cite{Moss18}, but the following example shows the basics:
     334
     335\begin{cfa}
     336[char, char] x = ['!', '?']; $\C{// (1); tuple type and expression syntax}$
     337int x = 2; $\C{// (2)}$
    342338
    343339forall(otype T)
    344 [T, T] swap$\(_1\)$( T a, T b ) {
     340[T, T] swap( T a, T b ) { $\C{// (3)}$
    345341        return [b, a]; $\C{// one-line swap syntax}$
    346342}
    347343
    348 x = swap( x ); $\C{// destructure x\(_1\) into two elements}$
    349 $\C{// cannot use x\(_2\), not enough arguments}$
    350 
    351 void swap$\(_2\)$( int, char, char );
    352 
    353 swap( x, x ); $\C{// swap\(_2\)( x\(_2\), x\(_1\) )}$
    354 $\C{// not swap\(_1\)( x\(_2\), x\(_2\) ) due to polymorphism cost}$
     344x = swap( x ); $\C{// destructure [char, char] x into two elements}$
     345$\C{// cannot use int x, not enough arguments}$
     346
     347void swap( int, char, char ); $\C{// (4)}$
     348
     349swap( x, x ); $\C{// (4) on (2), (1)}$
     350$\C{// not (3) on (2), (2) due to polymorphism cost}$
    355351\end{cfa}
    356352
    357353Tuple destructuring breaks the one-to-one relationship between identifiers and values.
    358 Hence, some argument-parameter matching strategies for expression resolution are precluded, as well as cheap interpretation filters based on comparing number of parameters and arguments.
    359 As an example, in the call to !swap( x, x )! above, the second !x! can be resolved starting at the second or third parameter of !swap!$_2$, depending which interpretation of !x! is chosen for the first argument.
    360 
    361 \section{Conclusion}
    362 
    363 \CFA{} adds a significant number of features to standard C, increasing the expressivity and re-usability of \CFA{} code while maintaining backwards compatibility for both code and larger language paradigms.
    364 This flexibility does incur significant added compilation costs, however, the mitigation of which are the primary concern of this thesis.
     354This precludes some argument-parameter matching strategies for expression resolution, as well as cheap interpretation filters based on comparing number of parameters and arguments.
     355As an example, in the call to !swap( x, x )! above, the second !x! can be resolved starting at the second or third parameter of !swap!, depending which interpretation of !x! was chosen for the first argument.
  • doc/theses/aaron_moss_PhD/phd/conclusion.tex

    r8b590a4 r1c35c78  
    11\chapter{Conclusion}
    22
    3 Decades after its first standardization, the C language remains a widely-used tool and a vital part of the software development landscape.
    4 The \CFA{} language under development at the University of Waterloo represents an evolutionary modernization of C with expressive modern language features paired with strong C backwards-compatibility.
    5 This thesis has contributed to these project goals in a variety of ways, including the addition of a generic-types language feature (Chapter~\ref{generic-chap}) and refinement of the \CFA{} overload selection rules to produce a more expressive and intuitive model (Section~\ref{conv-cost-sec}).
    6 Based on the technical contribution of the resolver prototype system (Section~\ref{rp-features-sec}), I have also made significant improvements to \CFA{} compilation performance, including un-combined bottom-up expression traversal (Section~\ref{arg-parm-matching-sec}), deferred-cached assertion satisfaction (Section~\ref{assn-sat-sec}), and a novel persistent union-find type environment data structure (Section~\ref{env-persistent-union-find}).
    7 The combination of these practical improvements and added features significantly improve the viability of \CFA{} as a practical programming language.
    8 
    9 Further improvements to the \CFA{} type-system are still possible, however.
    10 One area suggested by this work is development of a scheme for user-defined conversions; to integrate properly with the \CFA{} conversion model, there would need to be a distinction between safe and unsafe conversions, and possibly a way to denote conversions as explicit-only or non-chainable.
    11 Another place for ongoing effort is improvement of compilation performance; I believe the most promising direction for that is rebuilding the \CFA{} compiler on a different framework than Bilson's \CFACC{}.
    12 The resolver prototype presented in this work has good performance and already has the basics of \CFA{} semantics implemented, as well as many of the necessary core data structures, and would be a viable candidate for a new compiler architecture.
    13 An alternate approach would be to fork an existing C compiler such as Clang~\cite{Clang}, which would need to be modified to use one of the resolution algorithms discussed here, as well as various other features introduced by Bilson~\cite{Bilson03}.
     3Wrap it up --- Done, done done.
  • doc/theses/aaron_moss_PhD/phd/evaluation/generic-timing.dat

    r8b590a4 r1c35c78  
    88"clear\npair"   2840    773     748     3511
    99"pop\npair"     3025    5414    813     23867
     10
  • doc/theses/aaron_moss_PhD/phd/frontpgs.tex

    r8b590a4 r1c35c78  
    6262        \bigskip
    6363       
    64         \noindent
    65   \begin{tabbing}
    66   Internal-External Member: \=  \kill % using longest text to define tab length
    67   External Examiner: \>  Doug Lea \\
    68   \> Professor, Computer Science Department, \\
    69   \> State Univesity of New York at Oswego \\
    70   \end{tabbing}
    71         \bigskip
     64%       \noindent
     65%   \begin{tabbing}
     66%   Internal-External Member: \=  \kill % using longest text to define tab length
     67%   External Examiner: \>  Bruce Bruce \\
     68%   \> Professor, Dept. of Philosophy of Zoology, University of Wallamaloo \\
     69%   \end{tabbing}
     70%       \bigskip
    7271       
    7372        \noindent
     
    7574  Internal-External Member: \=  \kill % using longest text to define tab length
    7675  Supervisor: \> Peter Buhr \\
    77   \> Associate Professor, School of Computer Science, \\
    78   \> University of Waterloo \\
     76  \> Professor, School of Computer Science, University of Waterloo \\
    7977  \end{tabbing}
    8078        \bigskip
     
    8381  \begin{tabbing}
    8482  Internal-External Member: \=  \kill % using longest text to define tab length
    85   Internal Members: \> Ond\v{r}ej Lhot\a'ak \\
    86   \> Associate Professor, School of Computer Science, \\
    87   \>University of Waterloo \\
    88   \\
    89   \> Gregor Richards \\
    90   \> Assistant Professor, School of Computer Science, \\
    91   \> University of Waterloo \\
     83  Internal Members: \> Gregor Richards \\
     84  \> Professor, School of Computer Science, University of Waterloo \\
     85  \> Ond\v{r}ej Lhot\a'ak \\
     86  \> Professor, School of Computer Science, University of Waterloo \\
    9287  \end{tabbing}
    93         \bigskip
     88%       \bigskip
    9489       
    95         \noindent
    96   \begin{tabbing}
    97   Internal-External Member: \=  \kill % using longest text to define tab length
    98   Internal-External Member: \> Werner Dietl \\
    99   \> Assistant Professor, Electrical and Computer Engineering, \\
    100   \> University of Waterloo \\
    101   \end{tabbing}
    102 %       \bigskip
     90%       \noindent
     91%   \begin{tabbing}
     92%   Internal-External Member: \=  \kill % using longest text to define tab length
     93%   Internal-External Member: \> Deepa Thotta \\
     94%   \> Professor, Dept. of Philosophy, University of Waterloo \\
     95%   \end{tabbing}
     96%       \bigskip
    10397       
    10498%       \noindent
     
    130124\begin{center}\textbf{Abstract}\end{center}
    131125
    132         The C programming language has been an important software development tool for decades.
    133         \CFA{} is a new programming language designed with strong backwards-compatibility to take advantage of widely distributed C programming expertise and the large deployed base of C code, paired with modern language features to improve developer productivity.
    134        
    135         This thesis presents a number of improvements to \CFA{}.
    136         The author has developed one major new language feature, generic types, in a way that integrates naturally with both the existing polymorphism features of \CFA{} and the translation-unit-based encapsulation model of C.
    137         This thesis also presents a number of smaller refinements to the \CFA{} overload resolution rules, each of which improves the expressivity or intuitive nature of the language.
    138        
    139         This thesis also includes a number of practical improvements to \CFA{} compilation performance, focused on the expression resolution pass, which is the main bottleneck.
    140         These include better algorithms for argument-parameter matching and type assertion satisfaction, as well as a new type-environment data-structure based on a novel variant of union-find.
    141         The compilation performance improvements have all been experimentally validated with a new prototype system that encapsulates the key aspects of the \CFA{} language; this prototype is a promising basis for future research and a technical contribution of this work.
     126This is the abstract.
    142127
    143128\cleardoublepage
     
    146131% -------------------------------
    147132
    148 \begin{center}\textbf{Acknowledgements}\end{center}
     133% \begin{center}\textbf{Acknowledgements}\end{center}
    149134
    150 Though a doctoral thesis is an individual project, I could not have completed it without the help and support of many members of my community.
    151 This thesis would not exist in the form it does without the mentorship of my advisor, Peter Buhr, who has ably led the \CFA{} team while giving me both the advantage of his decades of experience and the freedom to follow my own interests.
    152 
    153 My work on \CFA{} does not exist in a vaccuum, and it has been a pleasure and a privilege to collaborate with the members of the \CFA{} team: Andrew Beach, Richard Bilson, Michael Brooks, Bryan Chan, Thierry Delisle, Glen Ditchfield, Brice Dobry, Rob Schluntz, and others.
    154 I gratefully acknowledge the financial support of the National Science and Engineering Council of Canada and Huawei Ltd.\ for this project.
    155 I would also like to thank of my thesis committee, Werner Dietl, Doug Lea, Ond\v{r}ej Lhot\a'ak, and Gregor Richards, for the time and effort they have invested in providing constructive feedback to refine this work.
    156 I am indebted to Peter van Beek and Ian Munro for their algorithmic expertise and willingness to share their time with me.
    157 I have far too many colleagues in the Programming Languages Group and School of Computer Science to name, but I deeply appreciate their camaradarie; specifically with regard to the production of this thesis, I would like to thank Nathan Fish for recommending my writing soundtrack, and Sharon Choy for her unfailing supply of encouraging rabbit animations.
    158 
    159 Finally, to all my friends and family who have supported me and made Kitchener-Waterloo home these past seven years, thank you, I could not have done it without you; most especially, Christina Moss, you are the best of wives and best of women, your support has kept me going through the ups and downs of research, and your partnership is key to all my successes.
    160 
    161 \cleardoublepage
     135% I would like to thank all the little people who made this thesis possible.
     136% \cleardoublepage
    162137
    163138% D E D I C A T I O N
     
    166141% \begin{center}\textbf{Dedication}\end{center}
    167142
    168 % To Christina, who has spent too many hours politely listening to me work out the technical minutiae of this thesis, I love you, and I won't make you read it.
    169 
     143% This is dedicated to the one I love.
    170144% \cleardoublepage
    171145
  • doc/theses/aaron_moss_PhD/phd/generic-types.tex

    r8b590a4 r1c35c78  
    77While this approach is flexible and supports integration with the C type checker and tooling, it is also tedious and error prone, especially for more complex data structures.
    88A second approach is to use !void*!-based polymorphism, \eg{} the C standard library functions !bsearch! and !qsort!, which allow for the reuse of common functionality.
    9 However, basing all polymorphism on !void*! eliminates the type checker's ability to ensure that argument types are properly matched, often requiring a number of extra function parameters, pointer indirection, and dynamic allocation that is otherwise unnecessary.
     9However, basing all polymorphism on !void*! eliminates the type checker's ability to ensure that argument types are properly matched, often requiring a number of extra function parameters, pointer indirection, and dynamic allocation that is otherwise not needed.
    1010A third approach to generic code is to use preprocessor macros, which does allow the generated code to be both generic and type checked, but errors in such code may be difficult to locate and debug.
    1111Furthermore, writing and using preprocessor macros is unnatural and inflexible.
    12 Figure~\ref{bespoke-generic-fig} demonstrates the bespoke approach for a simple linked list with !insert! and !head! operations, while Figure~\ref{void-generic-fig} and Figure~\ref{macro-generic-fig} show the same example using !void*! and !#define!-based polymorphism, respectively.
     12Figure~\ref{bespoke-generic-fig} demonstrates the bespoke approach for a simple linked list with !insert! and !head! operations, while Figure~\ref{void-generic-fig} and Figure~\ref{macro-generic-fig} show the same example using !void*!- and !#define!-based polymorphism, respectively.
    1313
    1414\begin{figure}
    1515        \begin{cfa}
    16                 #include <stdlib.h> $\C{// for malloc}$
    17                 #include <stdio.h>  $\C{// for printf}$
    18 
    1916                struct int_list { int value; struct int_list* next; };
    2017
     
    5855\begin{figure}
    5956        \begin{cfa}
    60                 #include <stdlib.h> $\C{// for malloc}$
    61                 #include <stdio.h>  $\C{// for printf}$
    62 
    6357                // single code implementation
    6458
     
    10296\begin{figure}
    10397        \begin{cfa}
    104                 #include <stdlib.h> $\C{// for malloc}$
    105                 #include <stdio.h>  $\C{// for printf}$
    106 
    10798                $\C[\textwidth]{// code is nested in macros}$
    10899
     
    147138\CFA{} generic types integrate efficiently and naturally with the existing polymorphic functions in \CFA{}, while retaining backward compatibility with C in layout and support for separate compilation.
    148139A generic type can be declared in \CFA{} by placing a !forall! specifier on a !struct! or !union! declaration, and instantiated using a parenthesized list of types after the generic name.
    149 An example comparable to the C polymorphism examples in Figures~\ref{bespoke-generic-fig}, \ref{void-generic-fig}, and \ref{macro-generic-fig} can be seen in Figure~\ref{cfa-generic-fig}.
     140An example comparable to the C polymorphism examples in Figures~\ref{bespoke-generic-fig}, \ref{void-generic-fig}, and \ref{macro-generic-fig} can be seen in Figure~\ref{cfa-generic-fig} \TODO{test this code}.
    150141
    151142\begin{figure}
    152143        \begin{cfa}
    153                 #include <stdlib.hfa> $\C{// for alloc}$
    154                 #include <stdio.h>  $\C{// for printf}$
    155 
    156144                forall(otype T) struct list { T value; list(T)* next; };
    157145
     
    185173\section{Design}
    186174
    187 Though a number of languages have some implementation of generic types, backward compatibility with both C and existing \CFA{} polymorphism present some unique design constraints for \CFA{} generics.
    188 The guiding principle is to maintain an unsurprising language model for C programmers without compromising runtime efficiency.
    189 A key insight for this design is that C already possesses a handful of built-in generic types (\emph{derived types} in the language of the standard \cite[\S{}6.2.5]{C11}), notably pointer (!T*!) and array (!T[]!), and that user-definable generics should act similarly.
     175Though a number of languages have some implementation of generic types, backward compatibility with both C and existing \CFA{} polymorphism presented some unique design constraints for this project.
     176The guiding principle was to maintain an unsurprising language model for C programmers without compromising runtime efficiency.
     177A key insight for this design was that C already possesses a handful of built-in generic types (\emph{compound types} in the language of the standard\cit{}), notably pointer (!T*!) and array (!T[]!), and that user-definable generics should act similarly.
    190178
    191179\subsection{Related Work}
    192180
    193 One approach to the design of generic types is that taken by \CC{} templates \cite{C++}.
     181One approach to the design of generic types is that taken by \CC{} templates\cite{C++}.
    194182The template approach is closely related to the macro-expansion approach to C polymorphism demonstrated in Figure~\ref{macro-generic-fig}, but where the macro-expansion syntax has been given first-class language support.
    195183Template expansion has the benefit of generating code with near-optimal runtime efficiency, as distinct optimizations can be applied for each instantiation of the template.
    196 On the other hand, template expansion can also lead to significant code bloat, exponential in the worst case \cite{Haberman16}, and the costs of increased compilation time and instruction cache pressure cannot be ignored.
     184On the other hand, template expansion can also lead to significant code bloat, exponential in the worst case\cit{}, and the costs of increased instruction cache pressure at runtime and wasted developer time when compiling cannot be discounted.
    197185The most significant restriction of the \CC{} template model is that it breaks separate compilation and C's translation-unit-based encapsulation mechanisms.
    198 Because a \CC{} template is not actually code, but rather a ``recipe'' to generate code, template code must be visible at its call site to be used.
    199 Furthermore, \CC{} template code cannot be type-checked without instantiating it, a time consuming process with no hope of improvement until \CC{} concepts \cite{C++Concepts} are standardized in \CCtwenty{}.
    200 C code, by contrast, only needs a function declaration to call that function or a !struct! declaration to use (by-pointer) values of that type, desirable properties to maintain in \CFA{}.
    201 
    202 Java \cite{Java8} has another prominent implementation for generic types, introduced in Java~5 and based on a significantly different approach than \CC{}.
     186Because a \CC{} template is not actually code, but rather a sort of ``recipe'' to generate code, template code must be visible at its call site to be used.
     187Furthermore, \CC{} template code cannot be type-checked without instantiating it, a time consuming process with no hope of improvement until \CC{} concepts\cite{C++Concepts} are standardized in \CCtwenty{}.
     188C code, by contrast, only needs a !struct! or function declaration to call that function or use (by-pointer) values of that type, a desirable property to maintain for \CFA{}.
     189
     190Java\cite{Java8} has another prominent implementation for generic types, introduced in Java~5 and based on a significantly different approach than \CC{}.
    203191The Java approach has much more in common with the !void*!-polymorphism shown in Figure~\ref{void-generic-fig}; since in Java nearly all data is stored by reference, the Java approach to polymorphic data is to store pointers to arbitrary data and insert type-checked implicit casts at compile-time.
    204192This process of \emph{type erasure} has the benefit of allowing a single instantiation of polymorphic code, but relies heavily on Java's object model and garbage collector.
    205193To use this model, a more C-like language such as \CFA{} would be required to dynamically allocate internal storage for variables, track their lifetime, and properly clean them up afterward.
    206194
    207 Cyclone \cite{Grossman06} extends C and also provides capabilities for polymorphic functions and existential types which are similar to \CFA{}'s !forall! functions and generic types.
     195Cyclone\cite{Grossman06} is another language extending C, and also provides capabilities for polymorphic functions and existential types, similar to \CFA{}'s !forall! functions and generic types.
    208196Cyclone existential types can include function pointers in a construct similar to a virtual function table, but these pointers must be explicitly initialized at some point in the code, which is tedious and error-prone compared to \CFA{}'s implicit assertion satisfaction.
    209197Furthermore, Cyclone's polymorphic functions and types are restricted to abstraction over types with the same layout and calling convention as !void*!, \ie{} only pointer types and !int!.
     
    212200
    213201Many other languages include some form of generic types.
    214 As a brief survey, ML \cite{ML} was the first language to support parametric polymorphism, but unlike \CFA{} does not support the use of assertions and traits to constrain type arguments.
    215 Haskell \cite{Haskell10} combines ML-style polymorphism with the notion of type classes, similar to \CFA{} traits, but requiring an explicit association with their implementing types, unlike \CFA{}.
    216 Objective-C \cite{obj-c-book} is an extension to C which has had some industrial success; however, it did not support type-checked generics until recently \cite{xcode7}, and its garbage-collected, message-passing object-oriented model is a radical departure from C.
    217 Go \cite{Go}, and Rust \cite{Rust} are modern compiled languages with abstraction features similar to \CFA{} traits: \emph{interfaces} in Go and \emph{traits} in Rust.
    218 Go has implicit interface implementation and uses a ``fat pointer'' construct to pass polymorphic objects to functions, similar in principle to \CFA{}'s implicit forall parameters.
     202As a brief survey, ML\cite{ML} was the first language to support parameteric polymorphism, but unlike \CFA{} does not support the use of assertions and traits to constrain type arguments.
     203Haskell\cite{Haskell10} combines ML-style polymorphism with the notion of type classes, similar to \CFA{} traits, but requiring an explicit association with their implementing types, unlike \CFA{}.
     204Objective-C\cite{obj-c-book} is an extension to C which has had some industrial success; however, it did not support type-checked generics until recently\cite{xcode7}, and it's garbage-collected, message-passing object-oriented model is a radical departure from C.
     205Go\cite{Go}, and Rust\cite{Rust} are modern compiled languages with abstraction features similar to \CFA{} traits, \emph{interfaces} in Go and \emph{traits} in Rust.
     206Go has implicit interface implementation and uses a ``fat pointer'' construct to pass polymorphic objects to functions, similar in principle to \CFA{}'s implicit forall paramters.
    219207Go does not, however, allow user code to define generic types, restricting Go programmers to the small set of generic types defined by the compiler.
    220 Rust has powerful abstractions for generic programming, including explicit implementation of traits and options for both separately-compiled virtual dispatch and template-instantiated static dispatch in functions.
     208Rust has powerful abstractions for generic programming, including explicit implemenation of traits and options for both separately-compiled virtual dispatch and template-instantiated static dispatch in functions.
    221209On the other hand, the safety guarantees of Rust's \emph{lifetime} abstraction and borrow checker impose a distinctly idiosyncratic programming style and steep learning curve; \CFA{}, with its more modest safety features, allows direct ports of C code while maintaining the idiomatic style of the original source.
    222210
    223211\subsection{\CFA{} Generics}
    224212
    225 The generic types design in \CFA{} draws inspiration from both \CC{} and Java generics, capturing useful aspects of each.
    226 Like \CC{} template types, generic !struct! and !union! types in \CFA{} have macro-expanded storage layouts, but, like Java generics, \CFA{} generic types can be used with separately-compiled polymorphic functions without requiring either the type or function definition to be visible to the other.
     213The generic types design in \CFA{} draws inspiration from both \CC{} and Java generics, capturing the better aspects of each.
     214Like \CC{} template types, generic !struct!s and !union!s in \CFA{} have macro-expanded storage layouts, but, like Java generics, \CFA{} generic types can be used with separately-compiled polymorphic functions without requiring either the type or function definition to be visible to the other.
    227215The fact that the storage layout of any instantiation of a \CFA{} generic type is identical to that of the monomorphic type produced by simple macro replacement of the generic type parameters is important to provide consistent and predictable runtime performance, and to not impose any undue abstraction penalty on generic code.
    228 As an example, consider the following generic type and function:
    229 
    230 % TODO whatever the bug is with initializer-expressions not working, it affects this
     216As an example, consider the following generic type and function \TODO{test this}:
     217
    231218\begin{cfa}
    232219forall( otype R, otype S ) struct pair { R first; S second; };
    233220
    234221pair(const char*, int) with_len( const char* s ) {
    235         return (pair(const char*, int)){ s, strlen(s) };
     222        return (pair(const char*), int){ s, strlen(s) };
    236223}
    237224\end{cfa}
    238225
    239226In this example, !with_len! is defined at the same scope as !pair!, but it could be called from any context that can see the definition of !pair! and a declaration of !with_len!.
    240 If its return type were !pair(const char*, int)*!, callers of !with_len! would only need the declaration !forall(otype R, otype S) struct pair! to call it, in accordance with the usual C rules for opaque types.
    241 
    242 !with_len! is itself a monomorphic function, returning a type that is structurally identical to !struct { const char* first; int second; }!, and as such could be called from C given appropriate re-declarations and demangling flags.
    243 However, the definition of !with_len! depends on a polymorphic function call to the !pair! constructor, which only needs to be written once (in this case, implicitly by the compiler according to the usual \CFA{} constructor generation \cite{Schluntz17}) and can be re-used for a wide variety of !pair! instantiations.
    244 Since the parameters to this polymorphic constructor call are all statically known, compiler inlining can in principle eliminate any runtime overhead of this polymorphic call.
     227If its return type was !pair(const char*, int)*!, callers of !with_len! would only need the declaration !forall(otype R, otype S) struct pair! to call it, in accordance with the usual C rules for opaque types.
     228
     229!with_len! is itself a monomorphic function, returning a type that is structurally identical to !struct { const char* first; int second; }!, and as such could be called from C given an appropriate redeclaration and demangling flags.
     230However, the definition of !with_len! depends on a polymorphic function call to the !pair! constructor, which only needs to be written once (in this case, implicitly by the compiler according to the usual \CFA{} constructor generation\cite{Moss18}) and can be re-used for a wide variety of !pair! instantiations.
     231Since the parameters to this polymorphic constructor call are all statically known, compiler inlining can eliminate any runtime overhead of this polymorphic call.
    245232
    246233\CFA{} deliberately does not support \CC{}-style partial specializations of generic types.
    247 A particularly infamous example in the \CC{} standard library is !vector<bool>!, which is represented as a bit-string rather than the array representation of the other !vector! instantiations.
     234A particularly infamous example in the \CC{} standard library is !vector<bool>!, which is represented as a bitstring rather than the array representation of the other !vector! instantiations.
    248235Complications from this inconsistency (chiefly the fact that a single bit is not addressable, unlike an array element) make the \CC{} !vector! unpleasant to use in generic contexts due to the break in its public interface.
    249 Rather than attempting to plug leaks in the template specialization abstraction with a detailed method interface, \CFA{} takes the more consistent position that two types with an unrelated data layout are in fact unrelated types, and should be handled with different code.
    250 Of course, to the degree that distinct types are similar enough to share an interface, the \CFA{} !trait! system allows such an interface to be defined, and objects of types implementing that !trait! to be operated on using the same polymorphic functions.
     236Rather than attempting to plug leaks in the template specialization abstraction with a detailed method interface, \CFA{} takes the more principled position that two types with an unrelated data layout are in fact unrelated types, and should be handled with different code.
     237Of course, to the degree that distinct types are similar enough to share an interface, the \CFA{} !trait! system allows one to be defined, and objects of types implementing that !trait! to be operated on in the same polymorphic functions.
    251238
    252239Since \CFA{} polymorphic functions can operate over polymorphic generic types, functions over such types can be partially or completely specialized using the usual overload selection rules.
    253 As an example, the following generalization of !with_len! is a semantically-equivalent function which works for any type that has a !len! function declared, making use of both the ad-hoc (overloading) and parametric (!forall!) polymorphism features of \CFA{}:
     240As an example, the !with_len! function above could be an optimization of the following more general function:
    254241
    255242\begin{cfa}
     
    260247\end{cfa}
    261248
    262 \CFA{} generic types also support type constraints, as in !forall! functions.
     249\CFA{} generic types also support the type constraints from !forall! functions.
    263250For example, the following declaration of a sorted set type ensures that the set key implements equality and relational comparison:
    264251
     
    267254\end{cfa}
    268255
    269 These constraints are enforced by applying equivalent constraints to the compiler-generated constructors for this type.
     256These constraints are implemented by applying equivalent constraints to the compiler-generated constructors for this type.
    270257
    271258\section{Implementation} \label{generic-impl-sec}
    272259
    273 The ability to use generic types in polymorphic contexts means that the \CFA{} implementation must support a mechanism for accessing fields of generic types dynamically.
    274 While \CFACC{} could in principle use this same mechanism for accessing fields of generic types in monomorphic contexts as well, such an approach would throw away compiler knowledge of static types and impose an unnecessary runtime cost.
    275 Instead, my design for generic types in \CFACC{} distinguishes between \emph{concrete} generic types that have a fixed memory layout regardless of type parameters and \emph{dynamic} generic types that may vary in memory layout depending on their type parameters.
    276 
     260The ability to use generic types in polymorphic contexts means that the \CFA{} implementation in \CFACC{} must support a mechanism for accessing fields of generic types dynamically at runtime.
     261While \CFACC{} could in principle use this same mechanism for accessing fields of all generic types, such an approach would throw away compiler knowledge of static types and impose an unnecessary runtime cost, limiting the utility of the generic type design.
     262Instead, my design for generic type support in \CFACC{} distinguishes between \emph{concrete} generic types that have a fixed memory layout regardless of type parameters and \emph{dynamic} generic types that may vary in memory layout depending on their type parameters.
    277263A \emph{dtype-static} type has polymorphic parameters but is still concrete.
    278 Polymorphic pointers are an example of dtype-static types; given some type variable !T!, !T! is a polymorphic type, but !T*! has a fixed size and can therefore be represented by a !void*! in code generation.
     264Polymorphic pointers are an example of dtype-static types; given some type variable !T!, T is a polymorphic type, but !T*! has a fixed size and can therefore be represented by a !void*! in code generation.
    279265In particular, generic types where all parameters are un-!sized! (\ie{} they do not conform to the built-in !sized! trait because the compiler does not know their size and alignment) are always concrete, as there is no possibility for their layout to vary based on type parameters of unknown size and alignment.
    280266More precisely, a type is concrete if and only if all of its !sized! type parameters are concrete, and a concrete type is dtype-static if any of its type parameters are (possibly recursively) polymorphic.
    281 To illustrate, the following code using the !pair! type from above has each use of !pair! commented with its class:
    282 
    283 % TODO constructor bugs here too
     267To illustrate, the following code using the !pair! type from above \TODO{test this} has each use of !pair! commented with its class:
     268
    284269\begin{cfa}
    285270//dynamic, layout varies based on T
    286 forall(otype T) T value$\(_1\)$( pair(const char*, T) p ) { return p.second; }
     271forall(otype T) T value( pair(const char*, T) p ) { return p.second; }
    287272
    288273// dtype-static, F* and T* are concrete but recursively polymorphic
    289 forall(dtype F, otype T) T value$\(_2\)$( pair(F*, T*) ) { return *p.second; }
     274forall(dtype F, otype T) T value( pair(F*, T*) ) { return *p.second; }
    290275
    291276pair(const char*, int) p = {"magic", 42}; $\C[2.5in]{// concrete}$
    292277int i = value(p);
    293 pair(void*, int*) q = {0, &i}; $\C[2.5in]{// concrete}$
     278pair(void*, int*) q = {0, &p.second}; $\C[2.5in]{// concrete}$
    294279i = value(q);
    295280double d = 1.0;
     
    300285\subsection{Concrete Generic Types}
    301286
    302 The \CFACC{} translator template-expands concrete generic types into new structure types, affording maximal inlining.
     287The \CFACC{} translator template expands concrete generic types into new structure types, affording maximal inlining.
    303288To enable interoperation among equivalent instantiations of a generic type, \CFACC{} saves the set of instantiations currently in scope and reuses the generated structure declarations where appropriate.
    304289In particular, tuple types are implemented as a single compiler-generated generic type definition per tuple arity, and can be instantiated and reused according to the usual rules for generic types.
    305290A function declaration that accepts or returns a concrete generic type produces a declaration for the instantiated structure in the same scope, which all callers may reuse.
    306 As an example, the concrete instantiation for !pair(const char*, int)! is\footnote{Field name mangling for overloading purposes is omitted.\label{mangle-foot}}:
     291As an example, the concrete instantiation for !pair(const char*, int)! is\footnote{This omits the field name mangling performed by \CFACC{} for overloading purposes.\label{mangle-foot}}
    307292
    308293\begin{cfa}
     
    311296
    312297A concrete generic type with dtype-static parameters is also expanded to a structure type, but this type is used for all matching instantiations.
    313 In the example above, the !pair(F*, T*)! parameter to !value! is such a type; its expansion is below\footref{mangle-foot}, and it is used as the type of the variables !q! and !r! as well, with casts for member access where appropriate:
     298In the example above, the !pair(F*, T*)! parameter to !value! is such a type; its expansion is below\footref{mangle-foot}, and it is used as the type of the variables !q! and !r! as well, with casts for member access where appropriate.
    314299
    315300\begin{cfa}
     
    323308The design for generic types presented here adds an \emph{offset array} containing structure-member offsets for dynamic generic !struct! types.
    324309A dynamic generic !union! needs no such offset array, as all members are at offset 0, but size and alignment are still necessary.
    325 Access to members of a dynamic structure is provided at runtime via base-displacement addressing of the structure pointer and the member offset (similar to the !offsetof! macro), moving a compile-time offset calculation to runtime.
    326 
    327 The offset arrays are statically generated where possible.
     310Access to members of a dynamic structure is provided at runtime via base displacement addressing the structure pointer and the member offset (similar to the !offsetof! macro), moving a compile-time offset calculation to runtime.
     311
     312the offset arrays are statically generated where possible.
    328313If a dynamic generic type is passed or returned by value from a polymorphic function, \CFACC{} can safely assume that the generic type is complete (\ie{} has a known layout) at any call site, and the offset array is passed from the caller; if the generic type is concrete at the call site, the elements of this offset array can even be statically generated using the C !offsetof! macro.
    329 As an example, the body of !value!$_2$ above is implemented as:
     314As an example, the body of the second !value! function above is implemented as
    330315
    331316\begin{cfa}
     
    334319
    335320Here, !_assign_T! is passed in as an implicit parameter from !otype T! and takes two !T*! (!void*! in the generated code), a destination and a source, and !_retval! is the pointer to a caller-allocated buffer for the return value, the usual \CFA{} method to handle dynamically-sized return types.
    336 !_offsetof_pair! is the offset array passed into !value!; this array is statically generated at the call site as:
     321!_offsetof_pair! is the offset array passed into !value!; this array is generated at the call site as
    337322
    338323\begin{cfa}
     
    345330For instance, modularity is generally provided in C by including an opaque forward declaration of a structure and associated accessor and mutator functions in a header file, with the actual implementations in a separately-compiled \texttt{.c} file.
    346331\CFA{} supports this pattern for generic types, implying that the caller of a polymorphic function may not know the actual layout or size of a dynamic generic type and only holds it by pointer.
    347 \CFACC{} automatically generates \emph{layout functions} for cases where the size, alignment, and offset array of a generic struct cannot be passed into a function from that function's caller.
     332\CFACC{} automatically generates \emph{layout functions} for cases where the size, alignment, and offset array of a generic struct cannot be passed into a function from that functions's caller.
    348333These layout functions take as arguments pointers to size and alignment variables and a caller-allocated array of member offsets, as well as the size and alignment of all !sized! parameters to the generic structure.
    349 Un-!sized! parameters are not passed because they are forbidden from being used in a context that affects layout by C's usual rules about incomplete types.
    350 Similarly, the layout function can only safely be called from a context where the generic type definition is visible, because otherwise the caller does not know how large to allocate the array of member offsets.
    351 
    352 The C standard does not specify a memory layout for structs, but the POSIX ABI for x86 \cite{POSIX08} does; this memory layout is common for C implementations, but is a platform-specific issue for porting \CFA{}.
     334Un!sized! parameters not passed because they are forbidden from being used in a context that affects layout by C's usual rules about incomplete types.
     335Similarly, the layout function can only safely be called from a context where the generic type definition is visible, because otherwise the caller will not know how large to allocate the array of member offsets.
     336
     337The C standard does not specify a memory layout for structs, but the POSIX ABI for x86\cit{} does; this memory layout is common for C implementations, but is a platform-specific issue for porting \CFA{}.
    353338This algorithm, sketched below in pseudo-\CFA{}, is a straightforward mapping of consecutive fields into the first properly-aligned offset in the !struct! layout; layout functions for !union! types omit the offset array and simply calculate the maximum size and alignment over all union variants.
    354339Since \CFACC{} generates a distinct layout function for each type, constant-folding and loop unrolling are applied.
     
    357342forall(dtype T1, dtype T2, ... | sized(T1) | sized(T2) | ...)
    358343void layout(size_t* size, size_t* align, size_t* offsets) {
     344        // initialize values
    359345        *size = 0; *align = 1;
    360346        // set up members
     
    374360\end{cfa}
    375361
    376 Results of layout-function calls are cached so that they are only computed once per type per function.
    377 Layout functions also allow generic types to be used in a function definition without reflecting them in the function signature, an important implementation-hiding constraint of the design.
     362Results of layout function calls are cached so that they are only computed once per type per function.
     363Layout functions also allow generic types to be used in a function definition without reflecting them in the function signature, an important implemenation-hiding constraint of the design.
    378364For instance, a function that strips duplicate values from an unsorted !list(T)! likely has a reference to the list as its only explicit parameter, but uses some sort of !set(T)! internally to test for duplicate values.
    379365This function could acquire the layout for !set(T)! by calling its layout function, providing as an argument the layout of !T! implicitly passed into that function.
    380366
    381 Whether a type is concrete, dtype-static, or dynamic is decided solely on the basis of the type arguments and !forall! clause type parameters.
    382 This design allows opaque forward declarations of generic types, \eg{} !forall(otype T) struct Box;! like in C, all uses of !Box(T)! can be separately compiled, and callers from other translation units know the proper calling conventions.
    383 In an alternate design, where the definition of a structure type is included in deciding whether a generic type is dynamic or concrete, some further types may be recognized as dtype-static --- \eg{} !Box! could be defined with a body !{ T* p; }!, and would thus not depend on !T! for its layout.
    384 However, the existence of an !otype! parameter !T! means that !Box! \emph{could} depend on !T! for its layout if this definition is not visible, and preserving separate compilation (and the associated C compatibility) is a more important design metric.
     367Whether a type is concrete, dtype-static, or dynamic is decided solely on the basis of the type arguments and !forall! clause type paramters.
     368This design allows opaque forward declarations of generic types, \eg{} !forall(otype T) struct Box;! like in C, all uses of $Box(T)$ can be separately compiled, and callers from other translation units know the proper calling conventions to use.
     369In an alternate design where the definition of a structure type is included in deciding whether a generic type is dynamic or concrete, some further types may be recognized as dtype-static --- \eg{} !Box! could be defined with a body !{ T* p; }!, and would thus not depend on !T! for its layout.
     370However, the existence of an !otype! parameter !T! means that !Box! \emph{could} depend on !T! for its layout if this definition is not visible, and we judged preserving separate compilation (and the associated C compatibility) in the implemented design to be an acceptable trade-off.
    385371
    386372\subsection{Applications of Dtype-static Types} \label{dtype-static-sec}
     
    401387Another useful pattern enabled by reused dtype-static type instantiations is zero-cost \emph{tag structures}.
    402388Sometimes, information is only used for type checking and can be omitted at runtime.
    403 In the example below, !scalar! is a dtype-static type; hence, all uses have a single structure definition containing !unsigned long! and can share the same implementations of common functions, like !?+?!.
     389In the example below, !scalar! is a dtype-static type; hence, all uses have a single structure definition containing !unsigned long! and can share the same implementations of common functions like !?+?!.
    404390These implementations may even be separately compiled, unlike \CC{} template functions.
    405391However, the \CFA{} type checker ensures matching types are used by all calls to !?+?!, preventing nonsensical computations like adding a length to a volume.
     
    422408\section{Performance Experiments} \label{generic-performance-sec}
    423409
    424 To validate the practicality of this generic type design, microbenchmark-based tests were conducted against a number of comparable code designs in C and \CC{}, first published in \cite{Moss18}.
    425 Since all these languages are all C-based and compiled with the same compiler backend, maximal-performance benchmarks should show little runtime variance, differing only in length and clarity of source code.
     410To validate the practicality of this generic type design I have conducted microbenchmark-based tests against a number of comparable code designs in C and \CC{}, first published in \cite{Moss18}.
     411Since all these languages are compiled with the same compiler backend and share a subset essentially comprising standard C, maximal-performance benchmarks should show little runtime variance, differing only in length and clarity of source code.
    426412A more illustrative comparison measures the costs of idiomatic usage of each language's features.
    427 The code below shows the \CFA{} benchmark tests for a generic stack based on a singly-linked list; the test suite is equivalent for the other other languages, code for which is included in Appendix~\ref{generic-bench-app}.
     413The code below shows the \CFA{} benchmark tests for a generic stack based on a singly-linked list; the test suite is equivalent for the other other languages.
    428414The experiment uses element types !int! and !pair(short, char)! and pushes $N = 40M$ elements on a generic stack, copies the stack, clears one of the stacks, and finds the maximum value in the other stack.
    429415
    430416\begin{cfa}
    431 #define N 4000000
    432417int main() {
    433418        int max = 0, val = 42;
     
    450435\end{cfa}
    451436
    452 The four versions of the benchmark implemented are C with !void*!-based polymorphism, \CFA{} with parametric polymorphism, \CC{} with templates, and \CC{} using only class inheritance for polymorphism, denoted \CCV{}.
     437The four versions of the benchmark implemented are C with !void*!-based polymorphism, \CFA{} with parameteric polymorphism, \CC{} with templates, and \CC{} using only class inheritance for polymorphism, denoted \CCV{}.
    453438The \CCV{} variant illustrates an alternative object-oriented idiom where all objects inherit from a base !object! class, mimicking a Java-like interface; in particular, runtime checks are necessary to safely downcast objects.
    454439The most notable difference among the implementations is the memory layout of generic types: \CFA{} and \CC{} inline the stack and pair elements into corresponding list and pair nodes, while C and \CCV{} lack such capability and, instead, must store generic objects via pointers to separately allocated objects.
     
    464449\centering
    465450\input{generic-timing}
    466 \caption[Benchmark timing results]{Benchmark timing results (smaller is better)} \label{generic-eval-fig}
     451\caption{Benchmark timing results (smaller is better)} \label{generic-eval-fig}
    467452\end{figure}
    468453
     
    490475Use of these standard library types has minimal impact on the performance benchmarks, but shrinks the \CFA{} and \CC{} code to 39 and 42 lines, respectively.
    491476The difference between the \CFA{} and \CC{} line counts is primarily declaration duplication to implement separate compilation; a header-only \CFA{} library is similar in length to the \CC{} version.
    492 On the other hand, due to the language shortcomings mentioned at the beginning of the chapter, C does not have a generic collections library in its standard distribution, resulting in frequent re-implementation of such collection types by C programmers.
     477On the other hand, due to the language shortcomings mentioned at the beginning of the chapter, C does not have a generic collections library in its standard distribution, resulting in frequent reimplementation of such collection types by C programmers.
    493478\CCV{} does not use the \CC{} standard template library by construction, and, in fact, includes the definition of !object! and wrapper classes for !char!, !short!, and !int! in its line count, which inflates this count somewhat, as an actual object-oriented language would include these in the standard library.
    494479I justify the given line count by noting that many object-oriented languages do not allow implementing new interfaces on library types without subclassing or wrapper types, which may be similarly verbose.
     
    496481Line count is a fairly rough measure of code complexity; another important factor is how much type information the programmer must specify manually, especially where that information is not type-checked.
    497482Such unchecked type information produces a heavier documentation burden and increased potential for runtime bugs and is much less common in \CFA{} than C, with its manually specified function pointer arguments and format codes, or \CCV{}, with its extensive use of un-type-checked downcasts, \eg{} !object! to !integer! when popping a stack.
    498 To quantify this manual typing, the ``redundant type annotations'' line in Table~\ref{generic-eval-table} counts the number of lines on which the known type of a variable is re-specified, either as a format specifier, explicit downcast, type-specific function, or by name in a !sizeof!, !struct! literal, or !new! expression.
     483To quantify this manual typing, the ``redundant type annotations'' line in Table~\ref{generic-eval-table} counts the number of lines on which the known type of a variable is respecified, either as a format specifier, explicit downcast, type-specific function, or by name in a !sizeof!, !struct! literal, or !new! expression.
    499484The \CC{} benchmark uses two redundant type annotations to create new stack nodes, whereas the C and \CCV{} benchmarks have several such annotations spread throughout their code.
    500485The \CFA{} benchmark is able to eliminate \emph{all} redundant type annotations through use of the return-type polymorphic !alloc! function in the \CFA{} standard library.
     
    502487\section{Future Work}
    503488
    504 The generic types presented here are already sufficiently expressive to implement a variety of useful library types.
     489The generic types design presented here is already sufficiently expressive to implement a variety of useful library types.
    505490However, some other features based on this design could further improve \CFA{}.
    506491
    507492The most pressing addition is the ability to have non-type generic parameters.
    508 C already supports fixed-length array types, \eg{} !int[10]!; these types are essentially generic types with unsigned integer parameters (\ie{} array dimension), and allowing \CFA{} users the capability to build similar types is a requested feature.
    509 % More exotically, the ability to have these non-type parameters depend on dynamic runtime values rather than static compile-time constants opens up interesting opportunities for type-checking problematic code patterns.
    510 % For example, if a collection iterator was parameterized over the pointer to the collection it was drawn from, then a sufficiently powerful static analysis pass could ensure that that iterator was only used for that collection, eliminating one source of hard-to-find bugs.
    511 
    512 The implementation mechanisms behind generic types can also be used to add new features to \CFA{}.
    513 One such potential feature is \emph{field assertions}, an addition to the existing function and variable assertions on polymorphic type variables.
    514 These assertions could be specified using this proposed syntax:
    515 
    516 \begin{cfa}
    517 trait hasXY(dtype T) {
    518         int T.x;  $\C{// T has a field x of type int}$
    519         int T.y;  $\C{// T has a field y of type int}$
    520 };
    521 \end{cfa}
    522 
     493C already supports fixed-length array types, \eg{} !int[10]!; these types are essentially generic types with unsigned integer parameters, and allowing \CFA{} users the capability to build similar types is a requested feature.
     494More exotically, the ability to have these non-type parameters depend on dynamic runtime values rather than static compile-time constants opens up interesting opportunities for type-checking problematic code patterns.
     495For example, if a collection iterator was parameterized over the pointer to the collection it was drawn from, then a sufficiently powerful static analysis pass could ensure that that iterator was only used for that collection, eliminating one source of hard-to-find bugs.
     496
     497The implementation mechanisms behind this generic types design can also be used to add new features to \CFA{}.
     498One such potential feature would be to add \emph{field assertions} to the existing function and variable assertions on polymorphic type variables.
    523499Implementation of these field assertions would be based on the same code that supports member access by dynamic offset calculation for dynamic generic types.
    524500Simulating field access can already be done more flexibly in \CFA{} by declaring a trait containing an accessor function to be called from polymorphic code, but these accessor functions impose some overhead both to write and call, and directly providing field access via an implicit offset parameter would be both more concise and more efficient.
    525 Of course, there are language design trade-offs to such an approach, notably that providing the two similar features of field and function assertions would impose a burden of choice on programmers writing traits, with field assertions more efficient, but function assertions more general; given this open design question a decision on field assertions is deferred until \CFA{} is more mature.
    526 
    527 If field assertions are included in the language, a natural extension would be to provide a structural inheritance mechanism for every !struct! type that simply turns the list of !struct! fields into a list of field assertions, allowing monomorphic functions over that type to be generalized to polymorphic functions over other similar types with added or reordered fields, for example:
    528 
    529 \begin{cfa}
    530 struct point { int x, y; };  $\C{// traitof(point) is equivalent to hasXY above}$
    531 struct coloured_point { int x, y; enum { RED, BLACK } colour };
    532 
    533 // works for both point and coloured_point
    534 forall(dtype T | traitof(point)(T) )
    535 double hypot( T& p ) { return sqrt( p.x*p.x + p.y*p.y ); }
    536 \end{cfa}
    537 
    538 \CFA{} could also support a packed or otherwise size-optimized representation for generic types based on a similar mechanism --- nothing in the use of the offset arrays implies that the field offsets need to be monotonically increasing.
     501Of course, there are language design trade-offs to such an approach, notably that providing the two similar features of field and function assertions would impose a burden of choice on programmers writing traits, with field assertions more efficient, but function assertions more general; given this open design question we have deferred a decision on field assertions until we have more experience using \CFA{}.
     502If field assertions are included in the language, a natural extension would be to provide a structural inheritance mechanism for every !struct! type that simply turns the list of !struct! fields into a list of field assertions, allowing monomorphic functions over that type to be generalized to polymorphic functions over other similar types with added or reordered fields.
     503\CFA{} could also support a packed or otherwise size-optimized representation for generic types based on a similar mechanism --- the layout function would need to be re-written, but nothing in the use of the offset arrays implies that the field offsets need be monotonically increasing.
    539504
    540505With respect to the broader \CFA{} polymorphism design, the experimental results in Section~\ref{generic-performance-sec} demonstrate that though the runtime impact of \CFA{}'s dynamic virtual dispatch is low, it is not as low as the static dispatch of \CC{} template inlining.
    541 However, rather than subject all \CFA{} users to the compile-time costs of ubiquitous template expansion, it is better to target performance-sensitive code more precisely.
    542 Two promising approaches are an !inline! annotation at polymorphic function call sites to create a template specialization of the function (provided the code is visible) or placing a different !inline! annotation on polymorphic function definitions to instantiate a specialized version of the function for some set of types.
    543 These approaches are complementary and allow performance optimizations to be applied only when necessary, without suffering global code bloat.
     506However, rather than subject all \CFA{} users to the compile-time costs of ubiquitous template expansion, we are considering more targeted mechanisms for performance-sensitive code.
     507Two promising approaches are are an !inline! annotation at polymorphic function call sites to create a template specialization of the function (provided the code is visible) or placing a different !inline! annotation on polymorphic function definitions to instantiate a specialized version of the function for some set of types.
     508These approaches are not mutually exclusive and allow performance optimizations to be applied only when necessary, without suffering global code bloat.
     509In general, the \CFA{} team believes that separate compilation works well with loaded hardware caches by producing smaller code, which may offset the benefit of larger inlined code.
  • doc/theses/aaron_moss_PhD/phd/introduction.tex

    r8b590a4 r1c35c78  
    22
    33The C programming language has had a wide-ranging impact on the design of software and programming languages.
    4 In the 30 years since its first standardization, it has consistently been one of the most popular programming languages, with billions of lines of C code still in active use, and tens of thousands of trained programmers producing it. The TIOBE index\cite{TIOBE} tracks popularity of programming languages over time, and C has never dropped below second place:
     4In the 30 years since its first standardization, it has consistently been one of the most popular programming languages, with millions of lines of C code still in active use, and tens of thousands of trained programmers producing it. The TIOBE index\cite{TIOBE} tracks popularity of programming languages over time, and C has never dropped below second place:
    55
    66\begin{table}[h]
     
    1818\end{table}
    1919
    20 The impact of C on programming language design is also obvious from Table~\ref{tiobe-table}; with the exception of Python, all of the top five languages use C-like syntax and control structures.
    21 \CC{} is even a largely backwards-compatible extension of C.
    22 Though its lasting popularity and wide impact on programming language design point to the continued relevance of C, there is also widespread desire of programmers for languages with more expressive power and programmer-friendly features; accommodating both maintenance of legacy C code and development of the software of the future is a difficult task for a single programming language.
     20The impact of C on programming language design is also obvious from Table~\ref{tiobe-table}; with the exception of Python, all of the top five languages use C-like syntax and procedural control structures.
     21\CC{} is even a largely backwards-compatible extension of C, with development dating back nearly as far as C itself.
     22Though its lasting popularity and wide impact on programming language design point to the continued relevance of C, they also highlight the widespread desire of programmers for languages with more expressive power and programmer-friendly features; accommodating both low-impact maintenance of legacy C code and low-effort development of the software of the future is a difficult task for a single programming language.
    2323
    24 \CFA{}\footnote{Pronounced ``C-for-all'', and written \CFA{} or \CFL{}.} is an evolutionary modernization of the C programming language that aims to fulfill both these ends well.
     24\CFA{}\footnote{Pronounced ``C-for-all'', and written \CFA{} or \CFL{}.} is an evolutionary modernization of the C programming language which aims to fulfill both these ends well.
    2525\CFA{} both fixes existing design problems and adds multiple new features to C, including name overloading, user-defined operators, parametric-polymorphic routines, and type constructors and destructors, among others.
    2626The new features make \CFA{} more powerful and expressive than C, while maintaining strong backward-compatibility with both C code and the procedural paradigm expected by C programmers.
    27 Unlike other popular C extensions like \CC{} and Objective-C, \CFA{} adds modern features to C without imposing an object-oriented paradigm to use them.
    2827However, these new features do impose a compile-time cost, particularly in the expression resolver, which must evaluate the typing rules of a significantly more complex type-system.
    2928
    3029This thesis is focused on making \CFA{} a more powerful and expressive language, both by adding new features to the \CFA{} type system and ensuring that both added and existing features can be efficiently implemented in \CFACC{}, the \CFA{} reference compiler.
    31 Particular contributions of this work include:
    32 \begin{itemize}
    33 \item design and implementation of parametric-polymorphic (``generic'') types in a manner compatible with the existing polymorphism design of \CFA{} (Chapter~\ref{generic-chap}),
    34 \item a new expression resolution algorithm designed to quickly locate the optimal declarations for a \CFA{} expression (Chapter~\ref{resolution-chap}),
    35 \item a type environment data structure based on a novel variant of the union-find algorithm (Chapter~\ref{env-chap}),
    36 \item and as a technical contribution, a prototype system for compiler algorithm development which encapsulates the essential aspects of the \CFA{} type system without incurring the technical debt of the existing system or the friction-inducing necessity of maintaining a working compiler (Chapter~\ref{expr-chap}).
    37 \end{itemize}
     30Particular contributions of this work include design and implementation of
     31parametric-polymorphic (``generic'') types in a manner compatible with the existing polymorphism design of \CFA{} (Chapter~\ref{generic-chap}), a type environment data structure based on a novel variant of the union-find algorithm (Chapter~\ref{env-chap}), and a new expression resolution algorithm designed to quickly locate the optimal declarations for a \CFA{} expression (Chapter~\ref{resolution-chap}).
     32This expression resolution algorithm was designed with the aid of a purpose-built prototype system which encapsulates the essential aspects of the \CFA{} type system without incurring the technical debt of the existing system or the friction-inducing necessity of maintaining a working compiler; the goal of this prototype system was to discover effective heuristics to avoid performing unnecessary work in the process of locating the optimal \CFA{} expression resolution.
    3833
    39 Though the direction and validation of this work is fairly narrowly focused on the \CFA{} programming language, the tools used and results obtained should be of interest to a wider compiler and programming language design community.
    40 In particular, with the addition of \emph{concepts} in \CCtwenty{} \cite{C++Concepts}, conforming \CC{} compilers must support a model of type assertions very similar to that in \CFA{}, and the algorithmic techniques used here may prove useful.
    41 Type environments are also widely modelled in compiler implementations, particularly for functional languages, though also increasingly commonly for other languages (such as Rust) which perform type inference; the type environment presented here may be useful to those language implementers.
     34Though the direction and validation of this work was fairly narrowly focused on the \CFA{} programming language, the tools used and results obtained should be of interest to a wider compiler and programming language design community.
     35In particular, with the addition of \emph{concepts} in \CCtwenty{}, conforming \CC{} compilers must support a model of type assertions very similar to that in \CFA{}, and the algorithmic techniques used in the expression resolution algorithm presented here may prove useful.
     36Type environments are also widely modelled in compiler implementations, particularly of functional languages, though also increasingly commonly in other languages (such as Rust) which perform type inference; the type environment presented here may be useful to those language implementers.
  • doc/theses/aaron_moss_PhD/phd/macros.tex

    r8b590a4 r1c35c78  
    1515\newcommand{\Csharp}{C\raisebox{-0.7ex}{\Large$^\sharp$}} % C# symbolic name
    1616
    17 \newcommand{\ie}{\textit{i.e.}\@}
    18 \newcommand{\eg}{\textit{e.g.}\@}
    19 \newcommand{\etc}{\textit{etc.}\@}
    20 \newcommand{\etal}{\textit{et~al.}\@}
     17\newcommand{\ie}{\textit{i.e.}}
     18\newcommand{\eg}{\textit{e.g.}}
     19\newcommand{\etc}{\textit{etc.}}
     20\newcommand{\etal}{\textit{et~al.}}
    2121
    2222\newcommand{\myset}[1]{\left\{#1\right\}}
  • doc/theses/aaron_moss_PhD/phd/resolution-heuristics.tex

    r8b590a4 r1c35c78  
    1 \chapter{Resolution Algorithms}
     1\chapter{Resolution Heuristics}
    22\label{resolution-chap}
    33
    4 % consider using "satisfaction" throughout when talking about assertions
    5 % "valid" instead of "feasible" interpretations
     4The main task of the \CFACC{} type-checker is \emph{expression resolution}, determining which declarations the identifiers in each expression correspond to.
     5Resolution is a straightforward task in C, as each declaration has a unique identifier, but in \CFA{} the name overloading features discussed in Section~\ref{overloading-sec} generate multiple candidate declarations for each identifier.
     6I refer to a given matching between identifiers and declarations in an expression as an \emph{interpretation}; an interpretation also includes information about polymorphic type bindings and implicit casts to support the \CFA{} features discussed in Sections~\ref{poly-func-sec} and~\ref{implicit-conv-sec}, each of which increase the proportion of feasible candidate interpretations.
     7To choose between feasible interpretations, \CFA{} defines a \emph{conversion cost} to rank interpretations; the expression resolution problem is thus to find the unique minimal-cost interpretation for an expression, reporting an error if no such interpretation exists.
    68
    7 The main task of the \CFACC{} type-checker is \emph{expression resolution}, determining which declarations the identifiers in each expression correspond to.
    8 Resolution is a straightforward task in C, as no declarations share identifiers, but in \CFA{} the name overloading features discussed in Section~\ref{overloading-sec} generate multiple candidate declarations for each identifier.
    9 A given matching between identifiers and declarations in an expression is an \emph{interpretation}; an interpretation also includes information about polymorphic type bindings and implicit casts to support the \CFA{} features discussed in Sections~\ref{poly-func-sec} and~\ref{implicit-conv-sec}, each of which increase the number of valid candidate interpretations.
    10 To choose among valid interpretations, a \emph{conversion cost} is used to rank interpretations.
    11 Hence, the expression resolution problem is to find the unique minimal-cost interpretation for an expression, reporting an error if no such interpretation exists.
     9\section{Conversion Cost}
    1210
    13 \section{Expression Resolution}
    1411
    15 \subsection{Type Unification}
    1612
    17 The polymorphism features of \CFA{} require binding of concrete types to polymorphic type variables.
    18 Briefly, \CFACC{} keeps a mapping from type variables to the concrete types they are bound to as an auxiliary data structure during expression resolution; Chapter~\ref{env-chap} describes this \emph{environment} data structure in more detail.
    19 A \emph{unification} algorithm is used to simultaneously check two types for equivalence with respect to the substitutions in an environment and update that environment.
    20 Essentially, unification recursively traverses the structure of both types, checking them for equivalence, and when it encounters a type variable, it replaces it with the concrete type it is bound to; if the type variable has not yet been bound, the unification algorithm assigns the equivalent type as the bound type of the variable, after performing various consistency checks.
    21 Ditchfield~\cite{Ditchfield92} and Bilson~\cite{Bilson03} describe the semantics of \CFA{} unification in more detail.
     13% Discuss changes to cost model, as promised in Ch. 2
    2214
    23 \subsection{Conversion Cost} \label{conv-cost-sec}
    24 
    25 C does not have an explicit cost model for implicit conversions, but the ``usual arithmetic conversions'' \cite[\S{}6.3.1.8]{C11} used to decide which arithmetic operators to use define one implicitly.
    26 The only context in which C has name overloading is the arithmetic operators, and the usual arithmetic conversions define a \emph{common type} for mixed-type arguments to binary arithmetic operators.
    27 Since for backward-compatibility purposes the conversion costs of \CFA{} must produce an equivalent result to these common type rules, it is appropriate to summarize \cite[\S{}6.3.1.8]{C11} here:
    28 
    29 \begin{itemize}
    30 \item If either operand is a floating-point type, the common type is the size of the largest floating-point type. If either operand is !_Complex!, the common type is also !_Complex!.
    31 \item If both operands are of integral type, the common type has the same size\footnote{Technically, the C standard defines a notion of \emph{rank} in \cite[\S{}6.3.1.1]{C11}, a distinct value for each \lstinline{signed} and \lstinline{unsigned} pair; integral types of the same size thus may have distinct ranks. For instance, though \lstinline{int} and \lstinline{long} may have the same size, \lstinline{long} always has greater rank. The standard-defined types are declared to have greater rank than any types of the same size added as compiler extensions.} as the larger type.
    32 \item If the operands have opposite signedness, the common type is !signed! if the !signed! operand is strictly larger, or !unsigned! otherwise. If the operands have the same signedness, the common type shares it.
    33 \end{itemize}
    34 
    35 Beginning with the work of Bilson~\cite{Bilson03}, \CFA{} defines a \emph{conversion cost} for each function call in a way that generalizes C's conversion rules.
    36 Loosely defined, the conversion cost counts the implicit conversions utilized by an interpretation.
    37 With more specificity, the cost is a lexicographically-ordered tuple, where each element corresponds to a particular kind of conversion.
    38 In Bilson's design, conversion cost is a 3-tuple, $(unsafe, poly, safe)$, where $unsafe$ is the count of unsafe (narrowing) conversions, $poly$ is the count of polymorphic type bindings, and $safe$ is the sum of the degree of safe (widening) conversions.
    39 Degree of safe conversion is calculated as path weight in a directed graph of safe conversions between types; Bilson's version and the current version of this graph are in Figures~\ref{bilson-conv-fig} and~\ref{extended-conv-fig}, respectively.
    40 The safe conversion graph is designed such that the common type $c$ of two types $u$ and $v$ is compatible with the C standard definitions from \cite[\S{}6.3.1.8]{C11} and can be calculated as the unique type minimizing the sum of the path weights of $\overrightarrow{uc}$ and $\overrightarrow{vc}$.
    41 The following example lists the cost in the Bilson model of calling each of the following functions with two !int! parameters:
    42 
    43 \begin{cfa}
    44 void f$\(_1\)$(char, long); $\C{// (1,0,1)}$
    45 void f$\(_2\)$(short, long); $\C{// (1,0,1)}$
    46 forall(otype T) void f$\(_3\)$(T, long); $\C{// (0,1,1)}$
    47 void f$\(_4\)$(long, long); $\C{// (0,0,2)}$
    48 void f$\(_5\)$(int, unsigned long); $\C{// (0,0,2)}$
    49 void f$\(_6\)$(int, long); $\C{// (0,0,1)}$
    50 \end{cfa}
    51 
    52 Note that safe and unsafe conversions are handled differently; \CFA{} counts distance of safe conversions (\eg{} !int! to !long! is cheaper than !int! to !unsigned long!), while only counting the number of unsafe conversions (\eg{} !int! to !char! and !int! to !short! both have unsafe cost 1, as in !f!$_1$ and !f!$_2$ above).
    53 These costs are summed over the parameters in a call; in the example above, the cost of the two !int! to !long! conversions for !f!$_4$ sum equal to the one !int! to !unsigned long! conversion for !f!$_5$.
    54 
    55 \begin{figure}
    56         \centering
    57         \begin{subfigure}[h]{3in}
    58                 \includegraphics{figures/bilson-conv-graph}
    59                 \caption{Bilson} \label{bilson-conv-fig}
    60         \end{subfigure}~\begin{subfigure}[h]{3in}
    61                 \includegraphics{figures/extended-conv-graph}
    62                 \caption{Extended} \label{extended-conv-fig}
    63         \end{subfigure}
    64         % \includegraphics{figures/safe-conv-graph}
    65         \caption[Safe conversion graphs.]{Safe conversion graphs. In both graphs, plain arcs have cost $safe = 1, sign = 0$ while dashed sign-conversion arcs have cost $safe = 1, sign = 1$. As per \cite[\S{}6.3.1.8]{C11}, types promote to types of the same signedness with greater rank, from \lstinline{signed} to \lstinline{unsigned} with the same rank, and from \lstinline{unsigned} to \lstinline{signed} with greater size. The arc from \lstinline{unsigned long} to \lstinline{long long} (highlighted in red in \ref{bilson-conv-fig}) is deliberately omitted in \ref{extended-conv-fig}, as on the presented system \lstinline{sizeof(long) == sizeof(long long)}.}
    66         \label{safe-conv-fig}
    67 \end{figure}
    68 
    69 As part of adding reference types to \CFA{} (see Section~\ref{type-features-sec}), Schluntz added a new $reference$ element to the cost tuple, which counts the number of implicit reference-to-rvalue conversions performed so that candidate interpretations can be distinguished by how closely they match the nesting of reference types; since references are meant to act almost indistinguishably from lvalues, this $reference$ element is the least significant in the lexicographic comparison of cost tuples.
    70 
    71 I also refined the \CFA{} cost model as part of this thesis work.
    72 Bilson's \CFA{} cost model includes the cost of polymorphic type bindings from a function's type assertions in the $poly$ element of the cost tuple; this has the effect of making more-constrained functions more expensive than less-constrained functions, as in the following example, based on differing iterator types:
    73 
    74 \begin{cfa}
    75 forall(dtype T | { T& ++?(T&); }) T& advance$\(1\)$(T& i, int n);
    76 forall(dtype T | { T& ++?(T&); T& ?+=?(T&, int)}) T& advance$\(2\)$(T& i, int n);
    77 \end{cfa}
    78 
    79 In resolving a call to !advance!, the binding to the !T&! parameter in the assertions is added to the $poly$ cost in Bilson's model.
    80 However, type assertions actually make a function \emph{less} polymorphic, and as such functions with more type assertions should be preferred in type resolution.
    81 In the example above, if the meaning of !advance! is ``increment !i! !n! times'', !advance!$_1$ requires an !n!-iteration loop, while !advance!$_2$ can be implemented more efficiently with the !?+=?! operator; as such, !advance!$_2$ should be chosen over !advance!$_1$ whenever its added constraint can be satisfied.
    82 Accordingly, a $specialization$ element is now included in the \CFA{} cost tuple, the values of which are always negative.
    83 Each type assertion subtracts 1 from $specialization$, so that more-constrained functions cost less, and thus are chosen over less-constrained functions, all else being equal.
    84 A more sophisticated design would define a partial order over sets of type assertions by set inclusion (\ie{} one function would only cost less than another if it had a strict superset of assertions,  rather than just more total assertions), but I did not judge the added complexity of computing and testing this order to be worth the gain in specificity.
    85 
    86 I also incorporated an unimplemented aspect of Ditchfield's earlier cost model.
    87 In the example below, adapted from \cite[p.89]{Ditchfield92}, Bilson's cost model only distinguished between the first two cases by accounting extra cost for the extra set of !otype! parameters, which, as discussed above, is not a desirable solution:
    88 
    89 \begin{cfa}
    90 forall(otype T, otype U) void f$\(_1\)$(T, U);  $\C[3.25in]{// polymorphic}$
    91 forall(otype T) void f$\(_2\)$(T, T);  $\C[3.25in]{// less polymorphic}$
    92 forall(otype T) void f$\(_3\)$(T, int);  $\C[3.25in]{// even less polymorphic}$
    93 forall(otype T) void f$\(_4\)$(T*, int); $\C[3.25in]{// least polymorphic}$
    94 \end{cfa}
    95 
    96 The new cost model accounts for the fact that functions with more polymorphic variables are less constrained by introducing a $var$ cost element that counts the number of type variables on a candidate function.
    97 In the example above, !f!$_1$ has $var = 2$, while the others have $var = 1$.
    98 The new cost model also accounts for a nuance unhandled by Ditchfield or Bilson, in that it makes the more specific !f!$_4$ cheaper than the more generic !f!$_3$; !f!$_4$ is presumably somewhat optimized for handling pointers, but the prior \CFA{} cost model could not account for the more specific binding, as it simply counted the number of polymorphic unifications.
    99 
    100 In the modified model, each level of constraint on a polymorphic type in the parameter list results in a decrement of the $specialization$ cost element, which is shared with the count of assertions due to their common nature as constraints on polymorphic type bindings.
    101 Thus, all else equal, if both a binding to !T! and a binding to !T*! are available, the model chooses the more specific !T*! binding with $specialization = -1$.
    102 This process is recursive, such that !T**! has $specialization = -2$.
    103 This calculation works similarly for generic types, \eg{} !box(T)! also has specialization cost $-1$.
    104 For multi-argument generic types, the least-specialized polymorphic parameter sets the specialization cost, \eg{} the specialization cost of !pair(T, S*)! is $-1$ (from !T!) rather than $-2$ (from !S!).
    105 Specialization cost is not counted on the return type list; since $specialization$ is a property of the function declaration, a lower specialization cost prioritizes one declaration over another.
    106 User programmers can choose between functions with varying parameter lists by adjusting the arguments, but the same is not true in general of varying return types\footnote{In particular, as described in Section~\ref{expr-cost-sec}, cast expressions take the cheapest valid and convertable interpretation of the argument expression, and expressions are resolved as a cast to \lstinline{void}. As a result of this, including return types in the $specialization$ cost means that a function with return type \lstinline{T*} for some polymorphic type \lstinline{T} would \emph{always} be chosen over a function with the same parameter types returning \lstinline{void}, even for \lstinline{void} contexts, an unacceptably counter-intuitive result.}, so the return types are omitted from the $specialization$ element.
    107 Since both $vars$ and $specialization$ are properties of the declaration rather than any particular interpretation, they are prioritized less than the interpretation-specific conversion costs from Bilson's original 3-tuple.
    108 
    109 A final refinement I have made to the \CFA{} cost model is with regard to choosing among arithmetic conversions.
    110 The C standard \cite[\S{}6.3.1.8]{C11} states that the common type of !int! and !unsigned int! is !unsigned int! and that the common type of !int! and !long! is !long!, but does not provide guidance for making a choice among conversions.
    111 Bilson's \CFACC{} uses conversion costs based off Figure~\ref{bilson-conv-fig}.
    112 However, Bilson's design results in inconsistent and somewhat surprising costs, with conversion to the next-larger same-sign type generally (but not always) double the cost of conversion to the !unsigned! type of the same size.
    113 In the redesign, for consistency with the approach of the usual arithmetic conversions, which select a common type primarily based on size, but secondarily on sign, arcs in the new graph are annotated with whether they represent a sign change, and such sign changes are summed in a new $sign$ cost element that lexicographically succeeds $safe$.
    114 This means that sign conversions are approximately the same cost as widening conversions, but slightly more expensive (as opposed to less expensive in Bilson's graph), so maintaining the same signedness is consistently favoured.
    115 
    116 With these modifications, the current \CFA{} cost tuple is as follows:
    117 
    118 \begin{equation*}
    119         (unsafe, poly, safe, sign, vars, specialization, reference)
    120 \end{equation*}
    121 
    122 \subsection{Expression Cost} \label{expr-cost-sec}
    123 
    124 The mapping from \CFA{} expressions to cost tuples is described by Bilson in \cite{Bilson03}, and remains effectively unchanged modulo the refinements to the cost tuple described above.
    125 Nonetheless, some salient details are repeated here for the sake of completeness.
    126 
    127 On a theoretical level, the resolver treats most expressions as if they were function calls.
    128 Operators in \CFA{} (both those existing in C and added features like constructors) are all modelled as function calls.
    129 In terms of the core argument-parameter matching algorithm, overloaded variables are handled the same as zero-argument function calls, aside from a different pool of candidate declarations and setup for different code generation.
    130 Similarly, an aggregate member expression !a.m! can be modelled as a unary function !m! that takes one argument of the aggregate type.
    131 Literals do not require sophisticated resolution, as in C the syntactic form of each implies their result types (!42! is !int!, !"hello"! is !char*!, \etc{}), though struct literals (\eg{} !(S){ 1, 2, 3 }! for some struct !S!) require resolution of the implied constructor call.
    132 
    133 Since most expressions can be treated as function calls, nested function calls are the primary component of complexity in expression resolution.
    134 Each function call has an \emph{identifier} that must match the name of the corresponding declaration, and a possibly-empty list of \emph{arguments}.
    135 These arguments may be function call expressions themselves, producing a tree of function-call expressions to resolve, where the leaf expressions are generally nullary functions, variable expressions, or literals.
    136 A single instance of expression resolution consists of matching declarations to all the identifiers in the expression tree of a top-level expression, along with inserting any conversions and satisfying all assertions necessary for that matching.
    137 The cost of a function-call expression is the sum of the conversion costs of each argument type to the corresponding parameter and the total cost of each subexpression, recursively calculated.
    138 \CFA{} expression resolution must produce either the unique lowest-cost interpretation of the top-level expression, or an appropriate error message if none exists.
    139 The cost model of \CFA{} precludes a greedy bottom-up resolution pass, as constraints and costs introduced by calls higher in the expression tree can change the interpretation of those lower in the tree, as in the following example:
    140 
    141 \begin{cfa}
    142 void f(int);
    143 double g$\(_1\)$(int);
    144 int g$\(_2\)$(long);
    145 
    146 f( g(42) );
    147 \end{cfa}
    148 
    149 Considered independently, !g!$_1$!(42)! is the cheapest interpretation of !g(42)!, with cost $(0,0,0,0,0,0)$ since the argument type is an exact match.
    150 However, in context, an unsafe conversion is required to downcast the return type of !g!$_1$ to an !int! suitable for !f!, for a total cost of $(1,0,0,0,0,0)$ for !f( g!$_1$!(42) )!.
    151 If !g!$_2$ is chosen, on the other hand, there is a safe upcast from the !int! type of !42! to !long!, but no cast on the return of !g!$_2$, for a total cost of $(0,0,1,0,0,0)$ for !f( g!$_2$!(42) )!; as this is cheaper, !g!$_2$ is chosen.
    152 Due to this design, all valid interpretations of subexpressions must in general be propagated to the top of the expression tree before any can be eliminated, a lazy form of expression resolution, as opposed to the eager expression resolution allowed by C, where each expression can be resolved given only the resolution of its immediate subexpressions.
    153 
    154 If there are no valid interpretations of the top-level expression, expression resolution fails and must produce an appropriate error message.
    155 If any subexpression has no valid interpretations, the process can be short-circuited and the error produced at that time.
    156 If there are multiple valid interpretations of a top-level expression, ties are broken based on the conversion cost, calculated as above.
    157 If there are multiple minimal-cost valid interpretations of a top-level expression, that expression is said to be \emph{ambiguous}, and an error must be produced.
    158 Multiple minimal-cost interpretations of a subexpression do not necessarily imply an ambiguous top-level expression, however, as the subexpression interpretations may be disambiguated based on their return type or by selecting a more-expensive interpretation of that subexpression to reduce the overall expression cost, as in the example above.
    159 
    160 The \CFA{} resolver uses type assertions to filter out otherwise-valid subexpression interpretations.
    161 An interpretation can only be selected if all the type assertions in the !forall! clause on the corresponding declaration can be satisfied with a unique minimal-cost set of satisfying declarations.
    162 Type assertion satisfaction is tested by performing type unification on the type of the assertion and the type of the declaration satisfying the assertion.
    163 That is, a declaration that satisfies a type assertion must have the same name and type as the assertion after applying the substitutions in the type environment.
    164 Assertion-satisfying declarations may be polymorphic functions with assertions of their own that must be satisfied recursively.
    165 This recursive assertion satisfaction has the potential to introduce infinite loops into the type resolution algorithm, a situation which \CFACC{} avoids by imposing a hard limit on the depth of recursive assertion satisfaction (currently 4); this approach is also taken by \CC{} to prevent infinite recursion in template expansion, and has proven to be effective and not unduly restrictive of the expressive power of \CFA{}.
    166 
    167 Cast expressions must be treated somewhat differently than functions for backwards compatibility purposes with C.
    168 In C, cast expressions can serve two purposes, \emph{conversion} (\eg{} !(int)3.14!), which semantically converts a value to another value in a different type with a different bit representation, or \emph{coercion} (\eg{} !void* p; (int*)p;!), which assigns a different type to the same bit value.
    169 C provides a set of built-in conversions and coercions, and user programmers are able to force a coercion over a conversion if desired by casting pointers.
    170 The overloading features in \CFA{} introduce a third cast semantic, \emph{ascription} (\eg{} !int x; double x; (int)x;!), which selects the overload that most-closely matches the cast type.
    171 However, since ascription does not exist in C due to the lack of overloadable identifiers, if a cast argument has an unambiguous interpretation as a conversion argument then it must be interpreted as such, even if the ascription interpretation would have a lower overall cost.
    172 This is demonstrated in the following example, adapted from the C standard library:
    173 
    174 \begin{cfa}
    175 unsigned long long x;
    176 (unsigned)(x >> 32);
    177 \end{cfa}
    178 
    179 In C semantics, this example is unambiguously upcasting !32! to !unsigned long long!, performing the shift, then downcasting the result to !unsigned!, at total cost $(1,0,3,1,0,0,0)$.
    180 If ascription were allowed to be a first-class interpretation of a cast expression, it would be cheaper to select the !unsigned! interpretation of !?>>?! by downcasting !x! to !unsigned! and upcasting !32! to !unsigned!, at a total cost of $(1,0,1,1,0,0,0)$.
    181 However, this break from C semantics is not backwards compatibile, so to maintain C compatibility, the \CFA{} resolver selects the lowest-cost interpretation of the cast argument for which a conversion or coercion to the target type exists (upcasting to !unsigned long long! in the example above, due to the lack of unsafe downcasts), using the cost of the conversion itself only as a tie-breaker.
    182 For example, in !int x; double x; (int)x;!, both declarations have zero-cost interpretations as !x!, but the !int x! interpretation is cheaper to cast to !int!, and is thus selected.
    183 Thus, in contrast to the lazy resolution of nested function-call expressions discussed above, where final interpretations for each subexpression are not chosen until the top-level expression is reached, cast expressions introduce eager resolution of their argument subexpressions, as if that argument was itself a top-level expression.
    184 
    185 \section{Resolution Algorithms}
    186 
    187 \CFA{} expression resolution is not, in general, polynomial in the size of the input expression, as shown in Section~\ref{resn-analysis-sec}.
    188 While this theoretical result is daunting, its implications can be mitigated in practice.
    189 \CFACC{} does not solve one instance of expression resolution in the course of compiling a program, but rather thousands; therefore, if the worst case of expression resolution is sufficiently rare, worst-case instances can be amortized by more-common easy instances for an acceptable overall runtime, as shown in Section~\ref{instance-expr-sec}.
    190 Secondly, while a programmer \emph{can} deliberately generate a program designed for inefficient compilation\footnote{See for instance \cite{Haberman16}, which generates arbitrarily large \CC{} template expansions from a fixed-size source file.}, source code tends to follow common patterns.
    191 Programmers generally do not want to run the full compiler algorithm in their heads, and as such keep mental shortcuts in the form of language idioms.
    192 If the compiler can be tuned to handle idiomatic code more efficiently, then the reduction in runtime for idiomatic (but otherwise difficult) resolution instances can make a significant difference in total compiler runtime.
    193 
    194 \subsection{Worst-case Analysis} \label{resn-analysis-sec}
    195 
    196 Expression resolution has a number of components that contribute to its runtime, including argument-parameter type unification, recursive traversal of the expression tree, and satisfaction of type assertions.
    197 
    198 If the bound type for a type variable can be looked up or mutated in constant time (as asserted in Table~\ref{env-bounds-table}), then the runtime of the unification algorithm to match an argument to a parameter is usually proportional to the complexity of the types being unified.
    199 In C, complexity of type representation is bounded by the most-complex type explicitly written in a declaration, effectively a small constant; in \CFA{}, however, polymorphism can generate more-complex types:
    200 
    201 \begin{cfa}
    202         forall(otype T) pair(T) wrap(T x, T y);
    203 
    204         wrap(wrap(wrap(1, 2), wrap(3, 4)), wrap(wrap(5, 6), wrap(7, 8)));
    205 \end{cfa}
    206 
    207 To resolve the outermost !wrap!, the resolver must check that !pair(pair(int))! unifies with itself, but at three levels of nesting, !pair(pair(int))! is more complex than either !pair(T)! or !T!, the types in the declaration of !wrap!.
    208 Accordingly, the cost of a single argument-parameter unification is $O(d)$, where $d$ is the depth of the expression tree, and the cost of argument-parameter unification for a single candidate for a given function call expression is $O(pd)$, where $p$ is the number of parameters.
    209 This bound does not, however, account for the higher costs of unifying two polymorphic type variables, which may in the worst case result in a recursive unification of all type variables in the expression (as discussed in Chapter~\ref{env-chap}).
    210 Since this recursive unification reduces the number of type variables, it may happen at most once, for an added $O(p^d)$ cost for a top-level expression with $O(p^d)$ type variables.
    211 
    212 Implicit conversions are also checked in argument-parameter matching, but the cost of checking for the existence of an implicit conversion is again proportional to the complexity of the type, $O(d)$.
    213 Polymorphism also introduces a potential expense here; for a monomorphic function there is only one potential implicit conversion from argument type to parameter type, while if the parameter type is an unbound polymorphic type-variable then any implicit conversion from the argument type could potentially be considered a valid binding for that type variable.
    214 \CFA{}, however, requires exact matches for the bound type of polymorphic parameters, removing this problem.
    215 An interesting question for future work is whether loosening this requirement incurs a significant compiler runtime cost in practice; preliminary results from the prototype system described in Chapter~\ref{expr-chap} suggest it does not.
    216 
    217 Considering the recursive traversal of the expression tree, polymorphism again greatly expands the worst-case runtime.
    218 Let $i$ be the number of candidate declarations for each function call; if all of these candidates are monomorphic, then there are no more than $i$ unambiguous interpretations of the subexpression rooted at that function call.
    219 Ambiguous minimal-cost subexpression interpretations may also be collapsed into a single \emph{ambiguous interpretation}, as the presence of such a subexpression interpretation in the final solution is an error condition.
    220 One safe pruning operation during expression resolution is to discard all subexpression interpretations with greater-than-minimal cost for their return type, as such interpretations cannot beat the minimal-cost interpretation with their return type for the overall optimal solution.
    221 As such, with no polymorphism, each declaration can generate no more than one minimal-cost interpretation with its return type, so the number of possible subexpression interpretations is $O(i)$ (note that in C, which lacks overloading, $i \leq 1$).
    222 With polymorphism, however, a single declaration (like !wrap! above) can have many concrete return types after type variable substitution, and could in principle have a different concrete return type for each combination of argument interpretations.
    223 Calculated recursively, the bound on the total number of candidate interpretations is $O(i^{p^d})$, each with a distinct type.
    224 
    225 Given these calculations of number of subexpression interpretations and matching costs, the upper bound on runtime for generating candidates for a single subexpression $d$ levels up from the leaves is $O( i^{p^d} \cdot pd )$.
    226 Since there are $O(p^d)$ subexpressions in a single top-level expression, the total worst-case cost of argument-parameter matching with the overloading and polymorphism features of \CFA{} is $O( i^{p^d} \cdot pd \cdot p^d )$.
    227 Since the size of the expression is $O(p^d)$, letting $n = p^d$ this simplifies to $O(i^n \cdot n^2)$
    228 
    229 This bound does not yet account for the cost of assertion satisfaction, however.
    230 \CFA{} uses type unification on the assertion type and the candidate declaration type to test assertion satisfaction; this unification calculation has cost proportional to the complexity of the declaration type after substitution of bound type variables; as discussed above, this cost is $O(d)$.
    231 If there are $O(a)$ type assertions on each declaration, there are $O(i)$ candidates to satisfy each assertion, for a total of $O(ai)$ candidates to check for each declaration.
    232 However, each assertion candidate may generate another $O(a)$ assertions, recursively until the assertion recursion limit $r$ is reached, for a total cost of $O((ai)^r \cdot d)$.
    233 Now, $a$ and $i$ are properties of the set of declarations in scope, while $r$ is defined by the language spec, so $(ai)^r$ is essentially a constant for purposes of expression resolution, albeit a very large one.
    234 It is not uncommon in \CFA{} to have functions with dozens of assertions, and common function names (\eg{} !?{}!, the constructor) can have hundreds of overloads.
    235 
    236 It is clear that assertion satisfaction costs can be very large, and in fact a method for heuristically reducing these costs is one of the key contributions of this thesis, but it should be noted that the worst-case analysis is a particularly poor match for actual code in the case of assertions.
    237 It is reasonable to assume that most code compiles without errors, as an actively-developed project is compiled many times, generally with relatively few new errors introduced between compiles.
    238 However, the worst-case bound for assertion satisfaction is based on recursive assertion satisfaction calls exceeding the limit, which is an error case.
    239 In practice, then, the depth of recursive assertion satisfaction should be bounded by a small constant for error-free code, which accounts for the vast majority of problem instances.
    240 
    241 Similarly, uses of polymorphism like those that generate the $O(d)$ bound on unification or the $O(i^{p^d})$ bound on number of candidates are rare, but not completely absent.
    242 This analysis points to type unification, argument-parameter matching, and assertion satisfaction as potentially costly elements of expression resolution, and thus profitable targets for algorithmic investigation.
    243 Type unification is discussed in Chapter~\ref{env-chap}, while the other aspects are covered below.
    244 
    245 \subsection{Argument-Parameter Matching} \label{arg-parm-matching-sec}
    246 
    247 Pruning possible interpretations as early as possible is one way to reduce the real-world cost of expression resolution, provided that a sufficient proportion of interpretations are pruned to pay for the cost of the pruning algorithm.
    248 One opportunity for interpretation pruning is by the argument-parameter type matching, but the literature provides no clear answers on whether candidate functions should be chosen according to their available arguments, or whether argument resolution should be driven by the available function candidates.
    249 For programming languages without implicit conversions, argument-parameter matching is essentially the entirety of the expression resolution problem, and is generally referred to as ``overload resolution'' in the literature.
    250 All expression-resolution algorithms form a DAG of interpretations, some explicitly, some implicitly; in this DAG, arcs point from function-call interpretations to argument interpretations, as in Figure~\ref{res-dag-fig}
    251 
    252 \begin{figure}[h]
    253         \centering
    254         \begin{subfigure}[h]{3in}
    255         \begin{cfa}
    256         char *p$\(_1\)$;
    257         int *p$\(_2\)$;
    258        
    259         char *f$\(_1\)$(char*, int*);
    260         double *f$\(_2\)$(int*, int*);
    261        
    262         f$\(_A\)$( f$\(_B\)$( p$\(_A\)$, p$\(_B\)$ ), p$\(_C\)$ );
    263         \end{cfa}
    264         \end{subfigure}~\begin{subfigure}[h]{2.5in}
    265         \includegraphics{figures/resolution-dag}
    266         \end{subfigure}
    267         \caption[Resolution DAG for a simple expression.]{Resolution DAG for a simple expression, annotated with explanatory subscripts. Functions that do not have a valid argument matching are covered with an \textsf{X}.} \label{res-dag-fig}
    268 \end{figure}
    269 
    270 Note that some interpretations may be part of more than one super-interpretation, as with the !p!$_2$ interpretation of !p!$_B$, while some valid subexpression interpretations, like the !f!$_2$ interpretation of !f!$_B$, are not used in any interpretation of their superexpression.
    271 
    272 Overload resolution was first seriously considered in the development of compilers for the Ada programming language, with different algorithms making various numbers of passes over the expression DAG, these passes being either top-down or bottom-up.
    273 Baker's algorithm~\cite{Baker82} takes a single pass from the leaves of the expression tree up, pre-computing argument candidates at each step.
    274 For each candidate function, Baker attempts to match argument types to parameter types in sequence, failing if any parameter cannot be matched.
    275 
    276 Bilson~\cite{Bilson03} similarly pre-computes argument-candidates in a single bottom-up pass in the original \CFACC{}, but then explicitly enumerates all possible argument combinations for a multi-parameter function.
    277 These argument combinations are matched to the parameter types of the candidate function as a unit rather than individual arguments.
    278 Bilson's approach is less efficient than Baker's, as the same argument may be compared to the same parameter many times, but does allow a more straightforward handling of polymorphic type-binding and tuple-typed expressions.
    279 
    280 Unlike Baker and Bilson, Cormack's algorithm~\cite{Cormack81} requests argument candidates that match the type of each parameter of each candidate function, in a single pass from the top-level expression down; memoization of these requests is presented as an optimization.
    281 As presented, this algorithm requires the parameter to have a known type, which is a poor fit for polymorphic type parameters in \CFA{}.
    282 Cormack's algorithm can be modified to request argument interpretations of \emph{any} type when provided an unbound parameter type, but this eliminates any pruning gains that could be provided by the algorithm.
    283 
    284 Ganzinger and Ripken~\cite{Ganzinger80} propose an approach (later refined by Pennello~\etal{}~\cite{Pennello80}) that uses a top-down filtering pass followed by a bottom-up filtering pass to reduce the number of candidate interpretations; they prove that a small number of such iterations is sufficient to converge to a solution for the overload resolution problem in the Ada programming language.
    285 Persch~\etal{}~\cite{PW:overload} developed a similar two-pass approach where the bottom-up pass is followed by the top-down pass.
    286 These approaches differ from Baker, Bilson, or Cormack in that they take multiple passes over the expression tree to yield a solution by applying filtering heuristics to all expression nodes.
    287 This approach of filtering out invalid types is unsuited to \CFA{} expression resolution, however, due to the presence of polymorphic functions and implicit conversions.
    288 
    289 Baker~\cite{Baker82} left empirical comparison of different overload resolution algorithms to future work; Bilson~\cite{Bilson03} described an extension of Baker's algorithm to handle implicit conversions and polymorphism, but did not further explore the space of algorithmic approaches to handle both overloaded names and implicit conversions.
    290 This thesis closes that gap in the literature by performing performance comparisons of both top-down and bottom-up expression resolution algorithms, with results reported in Chapter~\ref{expr-chap}.
    291 
    292 \subsection{Assertion Satisfaction} \label{assn-sat-sec}
    293 
    294 The assertion satisfaction algorithm designed by Bilson~\cite{Bilson03} for the original \CFACC{} is the most-relevant prior work to this project.
    295 Before accepting any subexpression candidate, Bilson first checks that that candidate's assertions can all be resolved; this is necessary due to Bilson's addition of assertion satisfaction costs to candidate costs (discussed in Section~\ref{conv-cost-sec}).
    296 If this subexpression interpretation ends up not being used in the final resolution, than the (sometimes substantial) work of checking its assertions ends up wasted.
    297 Bilson's assertion checking function recurses on two lists, !need! and !newNeed!, the current declaration's assertion set and those implied by the assertion-satisfying declarations, respectively, as detailed in the pseudo-code below (ancillary aspects of the algorithm are omitted for clarity):
    298 
    299 \begin{cfa}
    300 List(List(Declaration)) checkAssertions(
    301                 List(Assertion) need, List(Assertion) newNeed, List(Declaration) have,
    302                 Environment env ) {
    303         if ( is_empty(need) ) {
    304                 if ( is_empty( newNeed ) ) return { have };
    305                 else return checkAssertions( newNeed, {}, have, env );
    306         }
    307 
    308         Assertion a = head(need);
    309         Type adjType = substitute( a.type, env );
    310         List(Declaration) candidates = decls_matching( a.name );
    311         List(List(Declaration)) alternatives = {}
    312         for ( Declaration c : candidates ) {
    313                 Environment newEnv = env;
    314                 if ( unify( adjType, c.type, newEnv ) ) {
    315                         append( alternatives,
    316                                 checkAssertions(
    317                                         tail(need), append(newNeed, c.need), append(have, c), newEnv ) );
    318                 }
    319         }
    320         return alternatives;
    321 }
    322 \end{cfa}
    323 
    324 One shortcoming of this approach is that if an earlier assertion has multiple valid candidates, later assertions may be checked many times due to the structure of the recursion.
    325 Satisfying declarations for assertions are not completely independent of each other, since the unification process may produce new type bindings in the environment, and these bindings may not be compatible between independently-checked assertions.
    326 Nonetheless, with the environment data-structures discussed in Chapter~\ref{env-chap}, I have found it more efficient to produce a list of possibly-satisfying declarations for each assertion once, then check their respective environments for mutual compatibility when combining lists of assertions together.
    327 
    328 Another improvement I have made to the assertion resolution scheme in \CFACC{} is to consider all assertion-satisfying combinations at one level of recursion before attempting to recursively satisfy any !newNeed! assertions.
    329 Monomorphic functions are cheaper than polymorphic functions for assertion satisfaction because they are an exact match for the environment-adjusted assertion type, whereas polymorphic functions require an extra type binding.
    330 Thus, if there is any mutually-compatible set of assertion-satisfying declarations that does not include any polymorphic functions (and associated recursive assertions), then the optimal set of assertions does not require any recursive !newNeed! satisfaction.
    331 More generally, due to the \CFA{} cost-model changes I introduced in Section~\ref{conv-cost-sec}, the conversion cost of an assertion-satisfying declaration is no longer dependent on the conversion cost of its own assertions.
    332 As such, all sets of mutually-compatible assertion-satisfying declarations can be sorted by their summed conversion costs, and the recursive !newNeed! satisfaction pass is required only to check the feasibility of the minimal-cost sets.
    333 This optimization significantly reduces wasted work relative to Bilson's approach, as well as avoiding generation of deeply-recursive assertion sets, for a significant performance improvement relative to Bilson's \CFACC{}.
    334 
    335 Making the conversion cost of an interpretation independent of the cost of satisfying its assertions has further benefits.
    336 Bilson's algorithm checks assertions for all subexpression interpretations immediately, including those that are not ultimately used; I have developed a \emph{deferred} variant of assertion checking that waits until a top-level interpretation has been generated to check any assertions.
    337 If the assertions of the minimal-cost top-level interpretation cannot be satisfied then the next-most-minimal-cost interpretation's assertions are checked, and so forth until a minimal-cost satisfiable interpretation (or ambiguous set thereof) is found, or no top-level interpretations are found to have satisfiable assertions.
    338 In the common case where the code actually does compile, this saves the work of checking assertions for ultimately-rejected interpretations, though it does rule out some pruning opportunities for subinterpretations with unsatisfiable assertions or which are more expensive than a minimal-cost polymorphic function with the same return type.
    339 The experimental results in Chapter~\ref{expr-chap} indicate that this is a worthwhile trade-off.
    340 
    341 Optimizing assertion satisfaction for common idioms has also proved effective in \CFA{}; the code below is an unexceptional print statement derived from the \CFA{} test suite that nonetheless is a very difficult instance of expression resolution:
    342 
    343 \begin{cfa}
    344 sout | "one" | 1 | "two" | 2 | "three" | 3 | "four" | 4 | "five" | 5 | "six" | 6
    345         | "seven" | 7 | "eight" | 8 | "nine" | 9 | "ten" | 10 | "end" | nl | nl;
    346 \end{cfa}
    347 
    348 The first thing that makes this expression so difficult is that it is 23 levels deep; Section~\ref{resn-analysis-sec} indicates that the worst-case bounds on expression resolution are exponential in expression depth.
    349 Secondly, the !?|?! operator is significantly overloaded in \CFA{} --- there are 74 such operators in the \CFA{} standard library, and while 9 are arithmetic operators inherited from C, the rest are polymorphic I/O operators that look similar to:
    350 
    351 \begin{cfa}
    352 forall( dtype ostype | ostream( ostype ) )
    353 ostype& ?|? ( ostype&, int );
    354 \end{cfa}
    355 
    356 Note that !ostream! is a trait with 25 type assertions, and that the output operators for the other arithmetic types are also valid for the !int!-type parameters due to implicit conversions.
    357 On this instance, deferred assertion satisfaction saves wasted work checking assertions on the wrong output operators, but does nothing about the 23 repeated checks of the 25 assertions to determine that !ofstream! (the type of !sout!) satisfies !ostream!.
    358 
    359 To solve this problem, I have developed a \emph{cached} variant of assertion checking.
    360 During the course of checking the assertions of a single top-level expression, the results are cached for each assertion checked.
    361 The search key for this cache is the assertion declaration with its type variables substituted according to the type environment to distinguish satisfaction of the same assertion for different types.
    362 This adjusted assertion declaration is then run through the \CFA{} name-mangling algorithm to produce an equivalent string-type key.
    363 
    364 The assertion satisfaction aspect of \CFA{} expression resolution bears some similarity to satisfiability problems from logic, and as such other languages with similar trait and assertion mechanisms make use of logic-program solvers in their compilers.
    365 For instance, Matsakis~\cite{Matsakis17} and the Rust team have developed a PROLOG-based engine to check satisfaction of Rust traits.
    366 The combination of the assertion satisfaction elements of the problem with the conversion-cost model of \CFA{} makes this logic-solver approach difficult to apply in \CFACC{}, however.
    367 Expressing assertion resolution as a satisfiability problem ignores the cost optimization aspect, which is necessary to decide among what are often many possible satisfying assignments of declarations to assertions.
    368 On the other hand, the deeply-recursive nature of the satisfiability problem makes it difficult to adapt to optimizing solver approaches such as linear programming.
    369 To maintain a well-defined programming language, any optimization algorithm used must provide an exact (rather than approximate) solution; this constraint also rules out a whole class of approximately-optimal generalized solvers.
    370 As such, I opted to continue Bilson's approach of designing a bespoke solver for \CFA{} assertion satisfaction, rather than attempting to re-express the problem in some more general formalism.
    371 
    372 \section{Conclusion \& Future Work} \label{resn-conclusion-sec}
    373 
    374 As the results in Chapter~\ref{expr-chap} show, the algorithmic approaches I have developed for \CFA{} expression resolution are sufficient to build a practically-performant \CFA{} compiler.
    375 This work may also be of use to other compiler construction projects, notably to members of the \CC{} community as they implement the new Concepts \cite{C++Concepts} standard, which includes type assertions similar to those used in \CFA{}, as well as the C-derived implicit conversion system already present in \CC{}.
    376 
    377 I have experimented with using expression resolution rather than type unification to check assertion satisfaction; this variant of the expression resolution problem should be investigated further in future work.
    378 This approach is more flexible than type unification, allowing for conversions to be applied to functions to satisfy assertions.
    379 Anecdotally, this flexibility matches user-programmer expectations better, as small type differences (\eg{} the presence or absence of a reference type, or the usual conversion from !int! to !long!) no longer break assertion satisfaction.
    380 Practically, the resolver prototype discussed in Chapter~\ref{expr-chap} uses this model of assertion satisfaction, with no apparent deficit in performance; the generated expressions that are resolved to satisfy the assertions are easier than the general case because they never have nested subexpressions, which eliminates much of the theoretical differences between unification and resolution.
    381 The main challenge to implement this approach in \CFACC{} is applying the implicit conversions generated by the resolution process in the code-generation for the thunk functions that \CFACC{} uses to pass type assertions to their requesting functions with the proper signatures.
    382 
    383 Though performance of the existing algorithms is promising, some further optimizations do present themselves.
    384 The refined cost model discussed in Section~\ref{conv-cost-sec} is more expressive, but requires more than twice as many fields; it may be fruitful to investigate more tightly-packed in-memory representations of the cost-tuple, as well as comparison operations that require fewer instructions than a full lexicographic comparison.
    385 Integer or vector operations on a more-packed representation may prove effective, though dealing with the negative-valued $specialization$ field may require some effort.
    386 
    387 Parallelization of various phases of expression resolution may also be useful.
    388 The algorithmic variants I have introduced for both argument-parameter matching and assertion satisfaction are essentially divide-and-conquer algorithms, which solve subproblem instances for each argument or assertion, respectively, then check mutual compatibility of the solutions.
    389 While the checks for mutual compatibility are naturally more serial, there may be some benefit to parallel resolution of the subproblem instances.
    390 
    391 The resolver prototype built for this project and described in Chapter~\ref{expr-chap} would be a suitable vehicle for many of these further experiments, and thus a technical contribution of continuing utility.
     15% Mention relevance of work to C++20 concepts
  • doc/theses/aaron_moss_PhD/phd/thesis.tex

    r8b590a4 r1c35c78  
    2828
    2929\usepackage{footmisc} % for double refs to the same footnote
    30 
    31 \usepackage{caption} % for subfigure
    32 \usepackage{subcaption}
    3330
    3431% Hyperlinks make it very easy to navigate an electronic document.
     
    132129\input{resolution-heuristics}
    133130\input{type-environment}
    134 \input{experiments}
    135131\input{conclusion}
    136132
     
    160156% \nocite{*}
    161157
    162 % APPENDICIES
    163 % -----------
    164 \appendix
    165 \input{generic-bench}
    166 
    167158\end{document}
  • doc/theses/aaron_moss_PhD/phd/type-environment.tex

    r8b590a4 r1c35c78  
    44One key data structure for expression resolution is the \emph{type environment}.
    55As discussed in Chapter~\ref{resolution-chap}, being able to efficiently determine which type variables are bound to which concrete types or whether two type environments are compatible is a core requirement of the resolution algorithm.
    6 Furthermore, expression resolution involves a search through many related possible solutions, so the ability to re-use shared subsets of type-environment data and to switch between environments quickly is desirable for performance.
    7 In this chapter, I discuss a number of type-environment data-structure variants, including some novel variations on the union-find \cite{Galler64} data structure introduced in this thesis.
    8 Chapter~\ref{expr-chap} contains empirical comparisons of the performance of these data structures when integrated into the resolution algorithm.
     6Furthermore, expression resolution involves a search through many related possible solutions, so being able to re-use shared subsets of type environment data and to switch between environments quickly is desirable for performance.
     7In this chapter I discuss and empirically compare a number of type environment data structure variants, including some novel variations on the union-find\cite{Galler64} data structure introduced in this thesis.
    98
    109\section{Definitions} \label{env-defn-sec}
    1110
    1211For purposes of this chapter, a \emph{type environment} $T$ is a set of \emph{type classes} $\myset{T_1, T_2, \cdots, T_{|T|}}$.
    13 Each type class $T_i$ contains a set of \emph{type variables} $\myset{v_{i,1}, v_{i,2}, \cdots, v_{i,|T_i|}}$.
    14 Since the type classes represent an equivalence relation over the type variables the sets of variables contained in two distinct classes in the same environment must be disjoint.
    15 Each individual type class $T_i$ may also be associated with a \emph{bound}, $b_i$; this bound contains the \emph{bound type} that the variables in the type class are replaced with, but also includes other information in \CFACC{}, including whether type conversions are permissible on the bound type and what sort of type variables are contained in the class (data types, function types, or variadic tuples).
     12Each type class $T_i$ contains a set of \emph{type variables} $\myset{v_{i,1}, v_{i,2}, \cdots, v_{i,|T_i|}}$; note that the sets of variables contained in two distinct classes in the same environment must be disjoint.
     13Each individual type class $T_i$ may also be associated with a \emph{bound}, $b_i$; this bound contains the \emph{bound type} which the variables in the type class are replaced with, but also includes other information in \CFACC{}, including whether type conversions are permissible on the bound type and what sort of type variables are contained in the class (data types, function types, or variadic tuples).
    1614
    1715\begin{table}
     
    2018\centering
    2119\begin{tabular}{r@{\hskip 0.25em}ll}
    22         \hline
    23         $find(T, v_{i,j})$ & $\rightarrow T_i~|~\mathsf{fail}$          & Locate class for variable             \\
     20        $find(T, v_{i,j})$ & $\rightarrow T_i | \bot$           & Locate class for variable             \\
    2421        $report(T_i)$ & $\rightarrow \{ v_{i,j} \cdots \}$      & List variables for class              \\
    25         $bound(T_i)$ & $\rightarrow b_i~|~\mathsf{fail}$                                & Get bound for class                   \\
    26         $insert(T, v_{i,1})$ &                                                          & New single-variable class             \\
     22        $bound(T_i)$ & $\rightarrow b_i | \bot$                         & Get bound for class                   \\
     23        $insert(v_{i,1})$ &                                                                     & New single-variable class             \\
    2724        $add(T_i, v_{i,j})$ &                                                           & Add variable to class                 \\
    2825        $bind(T_i, b_i)$ &                                                                      & Set or update class bound             \\
    29         \hline
    30         $unify(T, T_i, T_j)$ & $\rightarrow \mathsf{pass}~|~\mathsf{fail}$      & Combine two type classes              \\
     26        $unify(T, T_i, T_j)$ & $\rightarrow \top | \bot$        & Combine two type classes              \\
    3127        $split(T, T_i)$ & $\rightarrow T'$                                      & Revert the last $unify$ operation on $T_i$            \\
    32         $combine(T, T')$ & $\rightarrow \mathsf{pass}~|~\mathsf{fail}$          & Merge two environments                \\
     28        $combine(T, T')$ & $\rightarrow \top | \bot$            & Merge two environments                \\
    3329        $save(T)$ & $\rightarrow H$                                                     & Get handle for current state  \\
    34         $backtrack(T, H)$ &                                                                     & Return to handle state                \\
    35         \hline
     30        $backtrack(T, H)$ &                                                                     & Return to handle state
    3631\end{tabular}
    3732\end{table}
    3833
    3934Given this basic structure, type environments in \CFACC{} need to support eleven basic operations, summarized in Table~\ref{env-op-table}.
    40 The first six operations are straightforward queries and updates on these data structures:
    41 The lookup operation $find(T, v_{i,j})$ produces $T_i$, the type class in $T$ that contains variable $v_{i,j}$, or an invalid sentinel value for no such class.
     35The first seven operations are straightforward queries and updates on these data structures:
     36The lookup operation $find(T, v_{i,j})$ produces $T_i$, the type class in $T$ which contains variable $v_{i,j}$, or an invalid sentinel value for no such class.
    4237The other two query operations act on type classes, where $report(T_i)$ produces the set $\myset{v_{i,1}, v_{i,2}, \cdots, v_{i,|T_i|}}$ of all type variables in a class $T_i$ and $bound(T_i)$ produces the bound $b_i$ of that class, or a sentinel indicating no bound is set.
    4338
    44 The update operation $insert(T, v_{i,1})$ creates a new type class $T_i$ in $T$ that contains only the variable $v_{i,1}$ and no bound; due to the disjointness property, $v_{i,1}$ cannot belong to any other type class in $T$.
     39The update operation $insert(T, v_{i,1})$ creates a new type class $T_i$ in $T$ that contains only the variable $v_{i,1}$ and no bound; due to the disjointness property $v_{i,1}$ cannot belong to any other type class in $T$.
    4540The $add(T_i, v_{i,j})$ operation adds a new type variable $v_{i,j}$ to class $T_i$; again, $v_{i,j}$ cannot exist elsewhere in $T$.
    4641$bind(T_i, b_i)$ mutates the bound for a type class, setting or updating the current bound.
    4742
    48 The $unify$ operation is the fundamental non-trivial operation a type-environment data-structure must support.
     43The $unify$ operation is the fundamental non-trivial operation a type environment data structure must support.
    4944$unify(T, T_i, T_j)$ merges a type class $T_j$ into another $T_i$, producing a failure result and leaving $T$ in an invalid state if this merge fails.
    5045It is always possible to unify the type variables of both classes by simply taking the union of both sets; given the disjointness property, no checks for set containment are required, and the variable sets can simply be concatenated if supported by the underlying data structure.
    51 $unify$ depends on an internal $unifyBound$ operation, which may fail.
    52 In \CFACC{}, $unifyBound(b_i, b_j) \rightarrow b'_i~|~\mathsf{fail}$ checks that the type classes contain the same sort of variable, takes the tighter of the two conversion permissions, and checks if the bound types can be unified.
     46$unify$ depends on an internal $unifyBound$ operation which may fail.
     47In \CFACC{}, $unifyBound(b_i, b_j) \rightarrow b'_i|\bot$ checks that the type classes contain the same sort of variable, takes the tighter of the two conversion permissions, and checks if the bound types can be unified.
    5348If the bound types cannot be unified (\eg{} !struct A! with !int*!), then $unifyBound$ fails, while other combinations of bound types may result in recursive calls.
    54 For instance, unifying !R*! with !S*! for type variables !R! and !S! results in a call to $unify(T, find($!R!$), find($!S!$))$, while unifying !R*! with !int*! results in a call to $unifyBound$ on !int! and the bound type of the class containing !R!.
     49For instance, unifying !R*! with !S*! for type variables !R! and !S! will result in a call to $unify(T, find($!R!$), find($!S!$))$, while unifying !R*! with !int*! will result in a call to $unifyBound$ on !int! and the bound type of the class containing !R!.
    5550As such, a call to $unify(T, T_i, T_j)$ may touch every type class in $T$, not just $T_i$ and $T_j$, collapsing the entirety of $T$ into a single type class in extreme cases.
    5651For more information on \CFA{} unification, see \cite{Bilson03}.
    57 The inverse of $unify$ is $split(T, T_i)$, which produces a new environment $T'$ that is the same as $T$ except that $T_i$ has been replaced by two classes corresponding to the arguments to the previous call to $unify$ on $T_i$.
    58 If there is no prior call to $unify$ on $T_i$ (\ie{} $T_i$ is a single-element class) $T_i$ is absent in $T'$.
    59 
    60 Given the nature of the expression resolution problem as a backtracking search, caching and concurrency are both useful tools to decrease runtime.
     52The inverse of $unify$ is $split(T, T_i)$, which produces a new environment $T'$ which is the same as $T$ except that $T_i$ has been replaced by two classes corresponding to the arguments to the previous call to $unify$ on $T_i$.
     53If there has been no call to $unify$ on $T_i$ (\ie{} $T_i$ is a single-element class) $T_i$ is absent in $T'$.
     54
     55Given the nature of the expression resolution problem as backtracking search, caching and concurrency are both useful tools to decrease runtime.
    6156However, both of these approaches may produce multiple distinct descendants of the same initial type environment, which have possibly been mutated in incompatible ways.
    62 As such, to effectively employ either caching or concurrency, the type environment data structure must support an efficient method to check if two type environments are compatible and merge them if so.
    63 $combine(T,T')$ attempts to merge an environment $T'$ into another environment $T$, producing $\mathsf{pass}$ if successful or leaving $T$ in an invalid state and producing $\mathsf{fail}$ otherwise.
    64 The invalid state of $T$ on failure is not important, given that a combination failure results in the resolution algorithm backtracking to a different environment.
     57As such, to effectively employ either concurrency or caching, the type environment data structure must support an efficient method to check if two type environments are compatible and merge them if so.
     58$combine(T,T')$ attempts to merge an environment $T'$ into another environment $T$, producing $\top$ if successful or leaving $T$ in an invalid state and producing $\bot$ otherwise.
     59The invalid state of $T$ on failure is not important, given that a combination failure will result in the resolution algorithm backtracking to a different environment.
    6560$combine$ proceeds by calls to $insert$, $add$, and $unify$ as needed, and can be roughly thought of as calling $unify$ on every pair of classes in $T$ that have variables $v'_{i,j}$ and $v'_{i,k}$ in the same class $T'_i$ in $T'$.
    6661Like $unify$, $combine$ can always find a mutually-consistent partition of type variables into classes (in the extreme case, all type variables from $T$ and $T'$ in a single type class), but may fail due to inconsistent bounds on merged type classes.
     
    6964The set of mutations to a type environment across the execution of the resolution algorithm produce an implicit tree of related environments, and the backtracking search typically focuses only on one leaf of the tree at once, or at most a small number of closely-related nodes as arguments to $combine$.
    7065As such, the ability to save and restore particular type environment states is useful, and supported by the $save(T) \rightarrow H$ and $backtrack(T, H)$ operations, which produce a handle for the current environment state and mutate an environment back to a previous state, respectively.
    71 These operations can be naively implemented by a deep copy of $T$ into $H$ and vice versa, but have more efficient implementations in persistency-aware data structures such as the persistent union-find introduced in Section~\ref{env-persistent-union-find}.
    72 
    73 \section{Approaches} \label{env-approaches-sec}
    74 
    75 \subsection{Na\"{\i}ve} \label{naive-env-sec}
    76 
    77 The type environment data structure used in Bilson's~\cite{Bilson03} original implementation of \CFACC{} is a straightforward translation of the definitions in Section~\ref{env-defn-sec} to \CC{} code; a !TypeEnvironment! contains a list of !EqvClass! type equivalence classes, each of which contains the type bound information and a tree-based sorted set of type variables.
     66These operations can be naively implemented by a deep copy of $T$ into $H$ and vice versa, but have more efficient implementations in persistency-aware data structures.
     67
     68\section{Approaches}
     69
     70\subsection{Na\"{\i}ve}
     71
     72The type environment data structure used in Bilson's\cite{Bilson03} original implementation of \CFACC{} is a straightforward translation of the definitions in Section~\ref{env-defn-sec} to \CC{} code; a !TypeEnvironment! contains a list of !EqvClass! type equivalence classes, each of which contains the type bound information and a tree-based sorted set of type variables.
    7873This approach has the benefit of being easy to understand and not imposing life-cycle or inheritance constraints on its use, but, as can be seen in Table~\ref{env-bounds-table}, does not support many of the desired operations with any particular efficiency.
    79 Some variations on this structure may improve performance somewhat; for instance, replacing the !EqvClass! variable storage with a hash-based set reduces search and update times from $O(\log n)$ to amortized $O(1)$, while adding an index for the type variables in the entire environment removes the need to check each type class individually to maintain the disjointness property.
     74Some variations on this structure may improve performance somewhat; for instance, replacing the !EqvClass! variable storage with a hash-based set would reduce search and update times from $O(\log n)$ to amortized $O(1)$, while adding an index for the type variables in the entire environment would remove the need to check each type class individually to maintain the disjointness property.
    8075These improvements do not change the fundamental issues with this data structure, however.
    8176
    82 \subsection{Incremental Inheritance} \label{inc-env-sec}
    83 
    84 One more invasive modification to this data structure that I investigated is to support swifter combinations of closely-related environments in the backtracking tree by storing a reference to a \emph{parent} environment within each environment, and having that environment only store type classes that have been modified with respect to the parent.
     77\subsection{Incremental Inheritance}
     78
     79One more invasive modification to this data structure which I investigated is to support swifter combinations of closely-related environments in the backtracking tree by storing a reference to a \emph{parent} environment within each environment, and having that environment only store type classes which have been modified with respect to the parent.
    8580This approach provides constant-time copying of environments, as a new environment simply consists of an empty list of type classes and a reference to its (logically identical) parent; since many type environments are no different than their parent, this speeds backtracking in this common case.
    86 Since all mutations made to a child environment are by definition compatible with the parent environment, two descendants of a common ancestor environment can be combined by iteratively combining the changes made in one environment, then that environment's parent, until the common ancestor is reached, again re-using storage and reducing computation in many cases.
    87 
    88 For this environment, I also employed a lazily-generated index of type variables to their containing class, which could be in either the current environment or an ancestor.
    89 Any mutation of a type class in an ancestor environment causes that class to be copied into the current environment before mutation, as well as added to the index, ensuring all local changes to the type environment are listed in its index.
     81Since all mutations made to a child environment are by definition compatible with the parent environment, two descendants of a common ancestor environment can be combined by iteratively combining the changes made in one environment then that environment's parent until the common ancestor is reached, again re-using storage and reducing computation in many cases.
     82
     83For this environment I also employed a lazily-generated index of type variables to their containing class, which could be in either the current environment or an ancestor.
     84Any mutation of a type class in an ancestor environment would cause that class to be copied into the current environment before mutation, as well as added to the index, ensuring that all local changes to the type environment are listed in its index.
    9085However, not adding type variables to the index until lookup or mutation preserves the constant-time environment copy operation in the common case in which the copy is not mutated from its parent during its life-cycle.
    9186
    92 This approach imposes some performance penalty on $combine$ if related environments are not properly linked together, as the entire environment needs to be combined rather than just the difference, but is correct as long as the ``null parent'' base-case is properly handled.
     87This approach imposes some performance penalty on $combine$ if related environments are not properly linked together, as the entire environment needs to be combined rather than just the diff, but is correct as long as the ``null parent'' base case is properly handled.
    9388The life-cycle issues are somewhat more complex, as many environments may descend from a common parent, and all of these need their parent to stay alive for purposes of lookup.
    94 These issues can be solved by ``flattening'' parent nodes into their children before the parent's scope ends, but given the tree structure of the inheritance graph it is more straightforward to store the parent nodes in reference-counted or otherwise automatically garbage-collected heap storage.
     89These issues can be solved by ``flattening'' parent nodes into their children before the parents leave scope, but given the tree structure of the inheritance graph it is more straightforward to store the parent nodes in reference-counted or otherwise automatically garbage-collected heap storage.
    9590
    9691\subsection{Union-Find} \label{env-union-find-approach}
    9792
    98 Given the nature of the classes of type variables as disjoint sets, another natural approach to implementing a type environment is the union-find disjoint-set data-structure~\cite{Galler64}.
     93Given the nature of the classes of type variables as disjoint sets, another natural approach to implementing a type environment is the union-find disjoint set data structure\cite{Galler64}.
    9994Union-find efficiently implements two operations over a partition of a collection of elements into disjoint sets; $find(x)$ locates the \emph{representative} of $x$, the element which canonically names its set, while $union(r, s)$ merges two sets represented by $r$ and $s$, respectively.
    10095The union-find data structure is based on providing each element with a reference to its parent element, such that the root of a tree of elements is the representative of the set of elements contained in the tree.
    10196$find$ is then implemented by a search up to the parent, generally combined with a \emph{path compression} step that links nodes more directly to their ancestors to speed up subsequent searches.
    10297$union$ involves making the representative of one set a child of the representative of the other, generally employing a rank- or size-based heuristic to ensure that the tree remains somewhat balanced.
    103 If both path compression and a balancing heuristic are employed, both $union$ and $find$ run in amortized $O(\alpha(n))$ worst-case time; this inverse Ackermann bound is a small constant for all practical values of $n$ \cite{Tarjan75}.
     98If both path compression and a balancing heuristic are employed, both $union$ and $find$ run in amortized $O(\alpha(n))$ worst-case time; this bound by the inverse Ackermann function is a small constant for all practical values of $n$.
    10499
    105100The union-find $find$ and $union$ operations have obvious applicability to the $find$ and $unify$ type environment operations in Table~\ref{env-op-table}, but the union-find data structure must be augmented to fully implement the type environment operations.
    106 In particular, the type-class bound cannot be easily included in the union-find data structure, as the requirement to make it the class representative breaks the balancing properties of $union$, and requires too-close integration of the type environment $unifyBound$ internal operation.
    107 This issue can be solved by including a side map from class representatives to the type-class bound.
     101In particular, the type class bound cannot be easily included in the union-find data structure, as the requirement to make it the class representative breaks the balancing properties of $union$, and requires too-close integration of the type environment $unifyBound$ internal operation.
     102This issue can be solved by including a side map from class representatives to the type class bound.
    108103If placeholder values are inserted in this map for type classes without bounds than this also has the useful property that the key set of the map provides an easily obtainable list of all the class representatives, a list which cannot be derived from the union-find data structure without a linear search for class representatives through all elements.
    109104
     
    112107Another type environment operation not supported directly by the union-find data structure is $report$, which lists the type variables in a given class, and similarly $split$, which reverts a $unify$ operation.
    113108Since the union-find data structure stores only links from children to parents and not vice-versa, there is no way to reconstruct a class from one of its elements without a linear search over the entire data structure, with $find$ called on each element to check its membership in the class.
    114 The situation is even worse for the $split$ operation, which requires extra information to maintain the order that each child is added to its parent node.
    115 Unfortunately, the literature \cite{Tarjan84,Galil91,Patwary10} on union-find does not present a way to keep references to children without breaking the asymptotic time bounds of the algorithm; I have discovered a method to do so, which, despite its simplicity, seems to be novel.
     109The situation is even worse for the $split$ operation, which would require extra information to maintain the order that each child was added to its parent node.
     110Unfortunately, the literature\cite{Tarjan84,Galil91,Patwary10} on union-find does not present a way to keep references to children without breaking the asymptotic time bounds of the algorithm; I have discovered a method to do so which, despite its simplicity, seems to be novel.
     111
     112\TODO{port figure from slideshow}
    116113
    117114The core idea of this ``union-find with classes'' data structure and algorithm is to keep the members of each class stored in a circularly-linked list.
     
    120117In my version, the list data structure does not affect the layout of the union-find tree, maintaining the same asymptotic bounds as union-find.
    121118In more detail, each element is given a !next! pointer to another element in the same class; this !next! pointer initially points to the element itself.
    122 When two classes are unified, the !next! pointers of the representatives of those classes are swapped, splicing the two circularly-linked lists together as illustrated in Figure~\ref{union-find-classes-fig}.
     119When two classes are unified, the !next! pointers of the representatives of those classes are swapped, splicing the two circularly-linked lists together.
    123120Importantly, though this approach requires an extra pointer per element, it does maintain the linear space bound of union-find, and because it only requires updating the two root nodes in $union$ it does not asymptotically increase runtime either.
    124121The basic approach is compatible with all path-compression techniques, and allows the members of any class to be retrieved in time linear in the size of the class simply by following the !next! pointers from any element.
    125 
    126 \begin{figure}
    127         \centering
    128         \includegraphics{figures/union-find-with-classes}
    129         \caption[Union operation for union-find with classes.]{Union operation for union-find with classes. Solid lines indicate parent pointers, dashed lines are \lstinline{next} pointers.}
    130         \label{union-find-classes-fig}
    131 \end{figure}
    132122
    133123If the path-compression optimization is abandoned, union-find with classes also encodes a reversible history of all the $union$ operations applied to a given class.
     
    137127
    138128\begin{theorem} \label{env-reverse-thm}
    139 The !next! pointer of a class representative in the union-find with classes algorithm, without path compression, points to a leaf from the most-recently-added subtree.
     129The !next! pointer of a class representative in the union-find with classes algorithm without path compression points to a leaf from the most-recently-added subtree.
    140130\end{theorem}
    141131
     
    143133        By induction on the height of the tree. \\
    144134        \emph{Base case:} A height 1 tree by definition includes only a single item. In such a case, the representative's !next! pointer points to itself by construction, and the representative is the most-recently-added (and only) leaf in the tree. \\
    145         \emph{Inductive case:} By construction, a tree $T$ of height greater than 1 has children of the root (representative) node that were representative nodes of classes merged by $union$. By definition, the most-recently-added subtree $T'$ has a smaller height than $T$, thus by the inductive hypothesis before the most-recent $union$ operation, the !next! pointer of the root of $T'$ pointed to one of the leaf nodes of $T'$; by construction the !next! pointer of the root of $T$ points to this leaf after the $union$ operation.
     135        \emph{Inductive case:} By construction, a tree $T$ of height greater than 1 has children of the root (representative) node that were representative nodes of classes merged by $union$. By definition, the most-recently-added subtree $T'$ has a smaller height than $T$, thus by the inductive hypothesis before the most-recent $union$ operation the !next! pointer of the root of $T'$ pointed to one of the leaf nodes of $T'$; by construction the !next! pointer of the root of $T$ points to this leaf after the $union$ operation.
    146136\end{proof}
    147137
     
    149139
    150140\subsection{Persistent Union-Find}
    151 \label{env-persistent-union-find}
    152 
    153 Given the backtracking nature of the resolution algorithm discussed in Section~\ref{env-defn-sec}, the abilities to quickly switch between related versions of a type environment and to de-duplicate shared data among environments are both assets to performance.
     141
     142Given the backtracking nature of the resolution algorithm discussed in Section~\ref{env-defn-sec}, the abilities to quickly switch between related versions of a type environment and to de-duplicate shared data between environments are both assets to performance.
    154143Conchon and Filli\^{a}tre~\cite{Conchon07} present a persistent union-find data structure based on the persistent array of Baker~\cite{Baker78,Baker91}.
    155144
    156 In Baker's persistent array, an \emph{array reference} contains either a pointer to the array or a pointer to an \emph{edit node}; these edit nodes contain an array index, the value in that index, and another array reference pointing either to the array or a different edit node.
    157 By construction, these array references always point to a node more like the actual array, forming a tree of edits rooted at the actual array.
    158 Reads from the actual array at the root can be performed in constant time, as with a non-persistent array.
     145\TODO{port figure from slideshow}
     146
     147In Baker's persistent array, an array reference contains either a pointer to the array or a pointer to an \emph{edit node}; these edit nodes contain an array index, the value in that index, and another array reference pointing either to the array or a different edit node.
     148In this manner, a tree of edits is formed, rooted at the actual array.
     149Read from the actual array at the root can be performed in constant time, as with a non-persistent array.
    159150The persistent array can be mutated in constant time by directly modifying the underlying array, then replacing its array reference with an edit node containing the mutated index, the previous value at that index, and a reference to the mutated array. If the current array reference is not the root, mutation consists simply of constructing a new edit node encoding the change and referring to the current array reference. 
    160 
    161 The mutation algorithm at the root is a special case of the key operation on persistent arrays, $reroot$.
     151The mutation algorithm at the root is in some sense a special case of the key operation on persistent arrays, $reroot$.
     152
    162153A rerooting operation takes any array reference and makes it the root node of the array.
    163 This operation is accomplished by tracing the path from some edit node to actual array at the root node, recursively applying the edits to the underlying array and replacing each edit node's successor with the inverse edit.
     154This is accomplished by tracing the path from some edit node to the root node of the array (always the underlying array), recursively applying the edits to the underlying array and replacing each edit node's successor with the inverse edit.
    164155In this way, any previous state of the persistent array can be restored in time proportional to the number of edits to the current state of the array.
    165 While $reroot$ does maintain the same value mapping in every version of the persistent array, the internal mutations it performs break thread-safety, and thus it must be used behind a lock in a concurrent context.
     156While $reroot$ does maintain the same value mapping in every version of the persistent array, the internal mutations it performs means that it is not thread-safe, and must be used behind a lock in a concurrent context.
    166157Also, the root node with the actual array may in principle be anywhere in the tree, and does not provide information to report its leaf nodes, so some form of automatic garbage collection is generally required for the data structure.
    167158Since the graph of edit nodes is tree-structured, reference counting approaches suffice for garbage collection; Conchon and Filli\^{a}tre~\cite{Conchon07} also observe that if the only $reroot$ operations are for backtracking then the tail of inverse edit nodes may be elided, suggesting the possibility of stack-based memory management.
    168159
    169160While Conchon and Filli\^{a}tre~\cite{Conchon07} implement their persistent union-find data structure over a universe of integer elements in the fixed range $[1,N]$, the type environment problem needs more flexibility.
    170 In particular, an arbitrary number of type variables may be added to the environment.
    171 As such, a persistent hash table is a more suitable structure than a persistent array, providing the same expected asymptotic time bounds, while allowing a dynamic number of elements.
    172 Besides replacing the underlying array with a hash table, the other major change in this approach is to replace the two types of array references, !Array! and !Edit!, with four node types, !Table!,  !Edit!, !Add!, and !Remove!, where !Add! adds a new key-value pair, !Remove! removes a key-value pair, and !Edit! mutates an existing key-value pair.
    173 In this variant of \CFACC{}, this persistent hash-table is used as the side map discussed in Section~\ref{env-union-find-approach} for class bounds.
     161In particular, an arbitrary number of type variables must be added to the environment.
     162As such, a persistent hash table is a more suitable structure than a persistent array, providing the same expected asymptotic time bounds while allowing a dynamic number of elements.
     163Besides replacing the underlying array with a hash table, the other major change in this approach is to replace the two types of array references, !Array! and !Edit!, with four node types, !Table!,  !Edit!, !Add!, and !Remove!, where !Add! adds a new key-value pair, !Remove! removes a key, and !Edit! mutates an existing key-value pair.
     164In this variant of \CFACC{}, this persistent hash table is used as the side map discussed in Section~\ref{env-union-find-approach} for class bounds.
    174165The actual union-find data structure is slightly modified from this approach, with a !Base! node containing the root union-find data structure, !Add! nodes adding new elements, !AddTo! nodes defining the union of two type classes, and !Remove! and !RemoveFrom! nodes as inverses of the previous two elements, for purposes of maintaining the edit list.
    175 Figure~\ref{persistent-union-find-fig} demonstrates the structure of a simple example.
    176 Making !AddTo! and !RemoveFrom! single nodes provides semantic information missing from the raw array updates in Conchon and Filli\^{a}tre's data structure.
    177 !RemoveFrom! is implemented using the ``leaf of last union'' approach discussed in Section~\ref{env-union-find-classes-approach}; this does, however, preclude the use of path-compression algorithms in this persistent union-find data structure.
    178 
    179 \begin{figure}
    180         \centering
    181         \includegraphics{figures/persistent-union-find}
    182         \caption[Persistent union-find data structure.]{Persistent union-find data structure. Shows the edit nodes to reverse back to an empty structure.}
    183         \label{persistent-union-find-fig}
    184 \end{figure}
    185 
    186 This added semantic information on $union$ operations in the persistent union-find edit tree exposes a new option for combining type environments.
     166Making !AddTo! and !RemoveFrom! single nodes shortens the edit path for improved performance, while also providing semantic information missing from the raw array updates in Conchon and Filli\^{a}tre's data structure.
     167The single-node approach, does, however, break under most path-compression algorithms; !RemoveFrom! can be applied to the underlying data structure using the ``leaf of last union'' approach discussed in in Section~\ref{env-union-find-classes-approach}; this was judged an acceptable trade-off for the added semantic information and shortened paths.
     168
     169Maintaining explicit information on $union$ operations in the persistent union-find edit tree in the form of !AddTo! and !RemoveFrom! nodes exposes a new option for combining type environments.
    187170If the type environments are part of the same edit tree, one environment $T'$ can be combined with another $T$ by only testing the edits on the path from $T'$ to $T$ in both the persistent union-find data structure describing the classes and the persistent hash table containing the class bounds.
    188 This approach is generally more efficient than testing the compatibility of all type classes in $T'$, as only those that are actually different than those in $T$ must be considered.
    189 However, the improved performance comes at the cost of some flexibility, as the edit-tree link must be maintained between any two environments to be combined under this algorithm.
     171This is generally more efficient than testing the compatibility of all type classes in $T'$, as only those that are actually different than those in $T$ must be considered.
    190172
    191173The procedure for $combine(T, T')$ based on edit paths is as follows:
    192174The shared edit trees for classes and bindings are rerooted at $T$, and the path from $T'$ to $T$ is followed to create a list of actual edits.
    193175By tracking the state of each element, redundant changes such as an !Edit! followed by an !Edit! can be reduced to their form in $T'$ by dropping the later (more like $T$) !Edit! for the same key; !Add! and !Remove! cancel similarly.
    194 This procedure is repeated for both the class edit-tree and the binding edit-tree.
    195 When the list of net changes to the environment is produced, the additive changes are applied to $T$.
    196 For example, if a type class exists in $T'$ but not $T$, the corresponding !Add! edit is applied to $T$, but in the reverse situation the !Remove! edit is not applied to $T$, as the intention is to produce a new environment representing the union of the two sets of type classes; similarly, !AddTo! edits are applied to unify type-classes in $T$ that are united in $T'$, but !RemoveFrom! edits that split type classes are not.
    197 A new environment, $T''$, can always be constructed with a consistent partitioning of type variables; in the extreme case, all variables from both $T$ and $T'$ are united in a single type class in $T''$.
    198 $combine$ can fail to unify the bound types; if any class in $T'$ has a class bound that does not unify with the merged class in $T''$, then $combine$ fails.
    199 
    200 \section{Analysis} \label{env-analysis-sec}
    201 
    202 In this section, I present asymptotic analyses of the various approaches to the type environment data structure discussed in the previous section.
    203 My results are summarized in Table~\ref{env-bounds-table}; in all cases, $n$ is the number of type classes, $m$ is the maximum size of a type class, and $p$ the number of edits between two environments or one environment and the empty environment.
    204 $u(n)$ captures the recursive cost of class unification, which is kept separate so that the $O(n)$ number of recursive class unifications can be distinguished from the direct cost of each recursive step.
     176This procedure is repeated for both the class edit tree and the binding edit tree.
     177When the list of net changes to the environment has been produced, the additive changes are applied to $T$.
     178For example, if a type class exists in $T'$ but not $T$, the corresponding !Add! edit will be applied to $T$, but in the reverse situation the !Remove! edit will not be applied to $T$, as the intention is to produce a new environment representing the union of the two sets of type classes; similarly, !AddTo! edits are applied to unify type-classes in $T$ that are united in $T'$, but !RemoveFrom! edits that split type classes are not.
     179The new environment, $T''$ can always be constructed with a consistent partitioning of type variables; in the extreme case, all variables from both $T$ and $T'$ will be united in a single type class in $T''$.
     180Where $combine$ can fail is in unifying the bound types; if any class in $T'$ has a class bound which does not unify with the merged class in $T''$ than $combine$ fails.
     181
     182\section{Analysis}
     183
     184In this section I present asymptotic analyses of the various approaches to a type environment data structure discussed in the previous section.
    205185
    206186\begin{table}
     
    210190\begin{tabular}{rllll}
    211191        \hline
    212                                 & \textbf{Na\"{\i}ve} & \textbf{Incremental} & \textbf{Union-Find} & \textbf{Persistent U-F} \\
     192                                & \textbf{Na\"{\i}ve}   & \textbf{Incremental}  & \textbf{Union-Find}           & \textbf{U-F with Classes}             \\
    213193        \hline
    214         $find$          & $O(n)$                        & $O(p)$                        & $O(\alpha(m))$        & $O(\log m)$           \\
    215         $report$        & $O(m)$                        & $O(m)$                        & $O(nm\alpha(m))$      & $O(m)$                        \\
    216         $bound$         & $O(1)$                        & $O(1)$                        & $O(1)$                        & $O(1)$                        \\
    217         $insert$        & $O(1)$                        & $O(1)$                        & $O(1)$                        & $O(1)$                        \\
    218         $add$           & $O(1)$                        & $O(m)$                        & $O(1)$                        & $O(1)$                        \\
    219         $bind$          & $O(1)$                        & $O(1)$                        & $O(1)$                        & $O(1)$                        \\
    220         $unify$         & $O(m + u(n))$         & $O(m + u(n))$         & $O(1 + u(n))$         & $O(1 + u(n))$         \\
    221         $split$         & ---                           & ---                           & ---                           & $O(\log m)$           \\
    222         $combine$       & $O(n^2m $                     & $O(p^2m $                     & $O(nm\alpha(m) $      & $O(p \log m $         \\
    223                                 & $~~~+ nu(n))$         & $~~~+ pu(n))$         & $~~~+ nu(n))$         & $~~~+ pu(n))$         \\
    224         $save$          & $O(nm)$                       & $O(1)$                        & $O(nm)$                       & $O(1)$                        \\
    225         $backtrack$     & $O(nm)$                       & $O(pm)$                       & $O(nm)$                       & $O(p)$                        \\
     194        $find$          & $O(n)$                                & $O(p)$                                & $O(\alpha(m))$                        & $O(\log m)$                                   \\
     195        $report$        & $O(m)$                                & $O(m)$                                & $O(n \log m)$                         & $O(m)$                                                \\
     196        $bound$         & $O(1)$                                & $O(1)$                                & $O(1)$                                        & $O(1)$                                                \\
     197        $insert$        & $O(1)$                                & $O(1)$                                & $O(1)$                                        & $O(1)$                                                \\
     198        $add$           & $O(1)$                                & $O(1)$                                & $O(1)$                                        & $O(1)$                                                \\
     199        $bind$          & $O(1)$                                & $O(1)$                                & $O(1)$                                        & $O(1)$                                                \\
     200        $unify$         & $O(m + u(n))$                 & $O(m + u(n))$                 & $O(\log m + u(n))$            & $O(\log m + u(n))$                    \\
     201        $split$         & ---                                   & ---                                   & ---                                           & $O(\log m)$                                   \\
     202        $combine$       & $O(nm \cdot u(n))$    & $O(pm \cdot u(n))$    & $O(n \log m \cdot u(n))$      & $O(p \log m \cdot u(n))$              \\
     203        $save$          & $O(nm)$                               & $O(1)$                                & $O(nm)$                                       & $O(1)$                                                \\
     204        $backtrack$     & $O(nm)$                               & $O(pm)$                               & $O(nm)$                                       & $O(p)$                                                \\
    226205        \hline
    227206\end{tabular}
    228207\end{table}
    229208
    230 \subsection{Na\"{\i}ve and Incremental}
    231 \label{naive-incremental-analysis}
    232 
    233 The na\"{\i}ve type environment data structure does not have an environment-wide index for type variables, but does have an index for each type class, assumed to be hash-based here.
    234 As a result, every class's index must be consulted for a $find$ operation, at an overall cost of $O(n)$.
    235 The incremental variant builds an overall hash-based index as it is queried, but may need to recursively check its parent environments if its local index does not contain a type variable, and may have as many parents as times it has been modified, for cost $O(p)$.
    236 It should be noted that subsequent queries for the same variable execute in constant time.
    237 
    238 Since both na\"{\i}ve and incremental variants store complete type classes, the cost of a $report$ operation is simply the time needed to output the contained variables, $O(m)$.
    239 Since the type classes store their bounds, $bound$ and $bind$ are both $O(1)$ given a type class.
    240 Once a $find$ operation has already been performed to verify that a type variable does not exist in the environment, the data structures for both these variants support adding new type classes (the $insert$ operation) in $O(1)$.
    241 Adding a variable to a type class (the $add$ operation) can be done in $O(1)$ for the na\"{\i}ve implementation, but the incremental implementation may need to copy the edited type class from a parent at cost $O(m)$.
    242 
    243 The linear storage of type classes in both variants also leads to $O(m)$ time for the variable-merging step in $unify$, plus the usual $u(n)$ recursion term for $unifyBound$.
    244 The na\"{\i}ve $combine$ operation must traverse each of the classes of one environment, merging in any class of the other environment that shares a type variable.
    245 Since there are at most $n$ classes to unify, the unification cost is $O(nm + nu(n))$, while traversal and $find$ costs to locate classes to merge total $O(n^2m)$, for an overall cost of $O(n^2m + nu(n))$.
    246 The incremental $combine$ operation works similarly, but only needs to consider classes modified in either environment with respect to the common ancestor of both environments, allowing the $n$ cost terms to be substituted for $p$, for an overall cost of $O(p^m + pu(n))$.
    247 Neither variant supports the $split$ operation to undo a $unify$.
    248 
    249 The na\"{\i}ve environment does nothing to support $save$ and $backtrack$, so these operations must be implemented by making a deep copy of the environment on $save$, then a destructive overwrite on $backtrack$, each at a cost of $O(nm)$.
    250 The incremental environment supports $O(1)$ $save$ by simply setting aside a reference to the current environment, then proceeding with a new, empty environment with the former environment as a parent.
    251 $backtrack$ to a parent environment may involve destroying all the intermediate environments if this backtrack removes the last reference to the backtracked-from environment; this cost is $O(pm)$.
    252 
    253 \subsection{Union-Find}
    254 
    255 The union-find data structure is designed to support $find$ efficiently, and thus for any variant, the cost is simply the distance up the tree to the representative element.
    256 For basic union-find, this is amortized to the inverse Ackermann function, $O(\alpha(m))$, essentially a small constant, though the loss of path compression in persistent union-find raises this cost to $O(\log m)$.
    257 Basic union-find is not designed to support the $report$ operation, however, so it must be simulated by checking the representative of every type variable, at cost $O(nm\alpha(m))$.
    258 Persistent union-find, on the other hand, uses the ``with classes'' extension to union-find described in Section~\ref{env-union-find-classes-approach} to run $report$ in $O(m)$ time.
    259 
    260 All union-find environment variants described here use a secondary hash table to map from class representatives to bindings, and as such can perform $bound$ and $bind$ in $O(1)$, given the representative.
    261 $insert$ is also a $O(1)$ operation for both basic and persistent union-find.
    262 Since $add$ simply involves attaching a new child to the class root, it is also a $O(1)$ operation for all union-find environment variants.
    263 
    264 Union-find is also designed to support $unify$ in constant time, and as such, given class representatives, the variable-merging cost of $unify$ for both variants is $O(1)$ to make one representative the child of the other, plus the $O(u(n))$ term for $unifyBound$.
    265 Basic union-find does not support $split$, but persistent union-find can accomplish it using the mechanism described in Section~\ref{env-union-find-classes-approach} in $O(\log m)$, the cost of traversing up to the root of a class from a leaf without path compression.
    266 $combine$ on the basic union-find data structure works similarly to the data structures discussed above in Section~\ref{naive-incremental-analysis}, with a $O(nu(n))$ term for the $O(n)$ underlying $unify$ operations, and a $O(n\alpha(m))$ term to find the classes which need unification by checking the class representatives of each corresponding type variable in both environments for equality.
    267 Persistent union-find uses a different approach for $combine$, discussed in Section~\ref{env-persistent-union-find}.
    268 Discounting recursive $unify$ operations included in the $u(n)$ $unifyBound$ term, there may be at most $O(p)$ $unify$ operations performed, at cost $O(pu(n))$.
    269 Each of the $O(p)$ steps on the edit path can be processed in the $O(\log m)$ time it takes to find the current representative of the modified type class, for a total runtime of $O(p \log m + pu(n))$.
    270 
    271 In terms of backtracking operations, the basic union-find data structure only supports deep copies, for $O(nm)$ cost for both $save$ and $backtrack$.
    272 Persistent union-find, as the name suggests, is more optimized, with $O(1)$ cost to $save$ a backtrack-capable reference to the current environment state, and $O(p)$ cost to revert to that state (possibly destroying no-longer-used edit nodes along the path).
    273 
    274 \section{Conclusion \& Future Work}
    275 
    276 This chapter presents the type environment abstract data type, some type-environment data-structures optimized for workloads encountered in the expression resolution problem, and asymptotic analysis of each data structure.
    277 Chapter~\ref{expr-chap} provides experimental performance results for a representative set of these approaches.
    278 One contribution of this thesis is the union-find with classes data structure for efficient retrieval of union-find class members, along with a related algorithm for reversing the history of $union$ operations in this data structure.
    279 This reversible history contributes to the second novel contribution of this chapter, a type environment data structure based off the persistent union-find data structure of Conchon and Filli\^{a}tre~\cite{Conchon07}.
    280 This persistent union-find environment uses the $split$ operation introduced in union-find with classes and the edit history of the persistent data structure to support an environment-combining algorithm that only considers the edits between the environments to be merged.
    281 
    282 This persistent union-find data structure is efficient, but not thread-safe; as suggested in Section~\ref{resn-conclusion-sec}, it may be valuable to parallelize the \CFA{} expression resolver.
    283 However, allowing multiple threads concurrent access to the persistent data structure is likely to result in ``reroot thrashing'', as different threads reroot the data structure to their own versions of interest.
    284 This contention could be mitigated by partitioning the data structure into separate subtrees for each thread, with each subtree having its own root node, and the boundaries among them implemented with a lock-equipped !ThreadBoundary! edit node.
     209% Future work: design multi-threaded version of C&F persistent map --- core idea is some sort of thread-boundary edit node
Note: See TracChangeset for help on using the changeset viewer.