Index: doc/bibliography/cfa.bib
===================================================================
--- doc/bibliography/cfa.bib	(revision a381b46f28e115a62848831837d838570a0da573)
+++ doc/bibliography/cfa.bib	(revision c57d19356d1a87560d1ce135bdeca03758dc539a)
@@ -9,4 +9,5 @@
 %    Predefined journal names:
 %  acmcs: Computing Surveys		acta: Acta Infomatica
+@string{acta="Acta Infomatica"}
 %  cacm: Communications of the ACM
 %  ibmjrd: IBM J. Research & Development ibmsj: IBM Systems Journal
@@ -877,4 +878,5 @@
     contributer	= {a3moss@uwaterloo.ca},
     key		= {{ISO/IEC} {TS} 19217},
+    author	= {Concepts},
     title	= {Information technology -- Programming languages -- {C}{\kern-.1em\hbox{\large\texttt{+\kern-.25em+}}} Extensions for concepts},
     institution	= {International Standard Organization},
@@ -2723,10 +2725,11 @@
 
 @online{GCCExtensions,
-    contributer = {a3moss@uwaterloo.ca},
-    key = {{GNU}},
-    title = {Extensions to the {C} Language Family},
-    year = 2014,
-    url = {https://gcc.gnu.org/onlinedocs/gcc-4.7.2/gcc/C-Extensions.html},
-    urldate = {2017-04-02}
+    contributer	= {a3moss@uwaterloo.ca},
+    key		= {{GNU}},
+    author	= {{C Extensions}},
+    title	= {Extensions to the {C} Language Family},
+    year	= 2014,
+    note	= {\href{https://gcc.gnu.org/onlinedocs/gcc-4.7.2/gcc/C-Extensions.html}{https://\-gcc.gnu.org/\-onlinedocs/\-gcc-4.7.2/\-gcc/\-C\-Extensions.html}},
+    urldate	= {2017-04-02}
 }
 
@@ -2824,5 +2827,5 @@
     key		= {Fortran95},
     title	= {Fortran 95 Standard, ISO/IEC 1539},
-    organization = {Unicomp, Inc.},
+    organization= {Unicomp, Inc.},
     address	= {7660 E. Broadway, Tucson, Arizona, U.S.A, 85710},
     month	= jan,
@@ -3061,11 +3064,12 @@
 
 @online{GObject,
-    keywords = {GObject},
-    contributor = {a3moss@uwaterloo.ca},
-    author = {{The GNOME Project}},
-    title = {{GObject} Reference Manual},
-    year = 2014,
-    url = {https://developer.gnome.org/gobject/stable/},
-    urldate = {2017-04-04}
+    keywords	= {GObject},
+    contributor	= {a3moss@uwaterloo.ca},
+    author	= {{GObject}},
+    organization= {The GNOME Project},
+    title	= {{GObject} Reference Manual},
+    year	= 2014,
+    url		= {https://developer.gnome.org/gobject/stable/},
+    urldate	= {2017-04-04}
 }
 
@@ -3647,4 +3651,5 @@
     author      = {James Gosling and Bill Joy and Guy Steele and Gilad Bracha and Alex Buckley},
     title       = {{Java} Language Specification},
+    organization= {Oracle},
     publisher	= {Oracle},
     year        = 2015,
@@ -4567,5 +4572,5 @@
     keywords	= {objective-c},
     contributor	= {a3moss@uwaterloo.ca},
-    author	= {{Apple Computer}},
+    author	= {{Objective-C}},
     title	= {The {Objective-C} Programming Language},
     organization= {Apple Computer Inc.},
@@ -4577,5 +4582,5 @@
     keywords	= {objective-c},
     contributor	= {a3moss@uwaterloo.ca},
-    author	= {{Apple Computer Inc.}},
+    author	= {{Xcode}},
     title	= {{Xcode} 7 Release Notes},
     year	= 2015,
@@ -5882,5 +5887,5 @@
     keywords	= {Rust programming language},
     contributer	= {pabuhr@plg},
-    author	= {{Rust Programming Language}},
+    author	= {{Rust}},
     title	= {The {Rust} Programming Language},
     organization= {The Rust Project Developers},
@@ -6920,11 +6925,12 @@
 
 @online{Vala,
-    keywords = {GObject, Vala},
-    contributor = {a3moss@uwaterloo.ca},
-    author = {{The GNOME Project}},
-    title = {Vala Reference Manual},
-    year = 2017,
-    url = {https://wiki.gnome.org/Projects/Vala/Manual},
-    urldate = {2017-04-04}
+    keywords	= {GObject, Vala},
+    contributor	= {a3moss@uwaterloo.ca},
+    author	= {{Vala}},
+    organization= {The GNOME Project},
+    title	= {Vala Reference Manual},
+    year	= 2017,
+    url		= {https://wiki.gnome.org/Projects/Vala/Manual},
+    urldate	= {2017-04-04}
 }
 
Index: doc/generic_types/generic_types.tex
===================================================================
--- doc/generic_types/generic_types.tex	(revision a381b46f28e115a62848831837d838570a0da573)
+++ doc/generic_types/generic_types.tex	(revision c57d19356d1a87560d1ce135bdeca03758dc539a)
@@ -82,5 +82,5 @@
 % replace/adjust listing characters that look bad in sanserif
 literate={-}{\raisebox{-0.15ex}{\texttt{-}}}1 {^}{\raisebox{0.6ex}{$\scriptscriptstyle\land\,$}}1
-	{~}{\raisebox{0.3ex}{$\scriptstyle\sim\,$}}1 {_}{\makebox[1.2ex][c]{\rule{1ex}{0.1ex}}}1 % {`}{\ttfamily\upshape\hspace*{-0.1ex}`}1
+	{~}{\raisebox{0.3ex}{$\scriptstyle\sim\,$}}1%{_}{\makebox[1.2ex][c]{\rule{1ex}{0.1ex}}}1 % {`}{\ttfamily\upshape\hspace*{-0.1ex}`}1
 	{<-}{$\leftarrow$}2 {=>}{$\Rightarrow$}2,
 moredelim=**[is][\color{red}]{`}{`},
@@ -185,5 +185,5 @@
 (4) Extensions introduced by \CFA must be translated in the most efficient way possible.
 These goals ensure existing C code-bases can be converted to \CFA incrementally with minimal effort, and C programmers can productively generate \CFA code without training beyond the features being used.
-\CC is often used similarly, but has the paired disadvantages of multiple legacy design-choices that cannot be updated and active divergence of the language model from C, requiring significant effort and training to incrementally add \CC to a C-based project.
+\CC is used similarly, but has the disadvantages of multiple legacy design-choices that cannot be updated and active divergence of the language model from C, requiring significant effort and training to incrementally add \CC to a C-based project.
 
 \CFA is currently implemented as a source-to-source translator from \CFA to the GCC-dialect of C~\citep{GCCExtensions}, allowing it to leverage the portability and code optimizations provided by GCC, meeting goals (1)-(3).
@@ -199,5 +199,5 @@
 
 \CFA's polymorphism was originally formalized by \citet{Ditchfield92}, and first implemented by \citet{Bilson03}.
-The signature feature of \CFA is parametric-polymorphic functions~\citep{forceone:impl,Cormack90} where functions are generalized using a @forall@ clause (giving the language its name):
+The signature feature of \CFA is parametric-polymorphic functions~\citep{forceone:impl,Cormack90,Duggan96} with functions generalized using a @forall@ clause (giving the language its name):
 \begin{lstlisting}
 `forall( otype T )` T identity( T val ) { return val; }
@@ -209,9 +209,11 @@
 If this extra information is not needed, \eg for a pointer, the type parameter can be declared as a \emph{data type} (or @dtype@).
 
-In \CFA, the polymorphism runtime-cost is spread over each polymorphic call, due to passing more arguments to polymorphic functions; the experiments in Section~\ref{sec:eval} show this overhead is similar to \CC virtual-function calls.
-An advantage of this design is that, unlike \CC template-functions, \CFA polymorphic-functions are compatible with C \emph{separate compilation}, preventing compilation and code bloat.
+In \CFA, the polymorphism runtime-cost is spread over each polymorphic call, due to passing more arguments to polymorphic functions;
+the experiments in Section~\ref{sec:eval} show this overhead is similar to \CC virtual-function calls.
+A design advantage is that, unlike \CC template-functions, \CFA polymorphic-functions are compatible with C \emph{separate compilation}, preventing compilation and code bloat.
 
 Since bare polymorphic-types provide a restricted set of available operations, \CFA provides a \emph{type assertion}~\cite{alphard} mechanism to provide further type information, where type assertions may be variable or function declarations that depend on a polymorphic type-variable.
 For example, the function @twice@ can be defined using the \CFA syntax for operator overloading:
+\newpage
 \begin{lstlisting}
 forall( otype T `| { T ?+?(T, T); }` ) T twice( T x ) { return x + x; }	$\C{// ? denotes operands}$
@@ -220,5 +222,5 @@
 which works for any type @T@ with a matching addition operator.
 The polymorphism is achieved by creating a wrapper function for calling @+@ with @T@ bound to @double@, then passing this function to the first call of @twice@.
-There is now the option of using the same @twice@ and converting the result to @int@ on assignment, or creating another @twice@ with type parameter @T@ bound to @int@ because \CFA uses the return type, as in~\cite{Ada}, in its type analysis.
+There is now the option of using the same @twice@ and converting the result to @int@ on assignment, or creating another @twice@ with type parameter @T@ bound to @int@ because \CFA uses the return type~\cite{Cormack81,Baker82,Ada}, in its type analysis.
 The first approach has a late conversion from @double@ to @int@ on the final assignment, while the second has an eager conversion to @int@.
 \CFA minimizes the number of conversions and their potential to lose information, so it selects the first approach, which corresponds with C-programmer intuition.
@@ -367,5 +369,5 @@
 
 One of the known shortcomings of standard C is that it does not provide reusable type-safe abstractions for generic data structures and algorithms.
-Broadly speaking, there are three approaches to implement abstract data structures in C.
+Broadly speaking, there are three approaches to implement abstract data-structures in C.
 One approach is to write bespoke data structures for each context in which they are needed.
 While this approach is flexible and supports integration with the C type-checker and tooling, it is also tedious and error-prone, especially for more complex data structures.
@@ -377,5 +379,5 @@
 \CC, Java, and other languages use \emph{generic types} to produce type-safe abstract data-types.
 \CFA also implements generic types that integrate efficiently and naturally with the existing polymorphic functions, while retaining backwards compatibility with C and providing separate compilation.
-However, for known concrete parameters, the generic type definition can be inlined, like \CC templates.
+However, for known concrete parameters, the generic-type definition can be inlined, like \CC templates.
 
 A generic type can be declared by placing a @forall@ specifier on a @struct@ or @union@ declaration, and instantiated using a parenthesized list of types after the type name:
@@ -457,5 +459,5 @@
 Whether a type is concrete, dtype-static, or dynamic is decided solely on the type parameters and @forall@ clause on a declaration.
 This design allows opaque forward declarations of generic types, \eg @forall(otype T) struct Box@ -- like in C, all uses of @Box(T)@ can be separately compiled, and callers from other translation units know the proper calling conventions to use.
-If the definition of a structure type is included in deciding whether a generic type is dynamic or concrete, some further types may be recognized as dtype-static (\eg @forall(otype T) struct unique_ptr { T* p }@ does not depend on @T@ for its layout, but the existence of an @otype@ parameter means that it \emph{could}.), but preserving separate compilation (and the associated C compatibility) in the existing design is judged to be an appropriate trade-off.
+If the definition of a structure type is included in deciding whether a generic type is dynamic or concrete, some further types may be recognized as dtype-static (\eg @forall(otype T) struct unique_ptr { T * p }@ does not depend on @T@ for its layout, but the existence of an @otype@ parameter means that it \emph{could}.), but preserving separate compilation (and the associated C compatibility) in the existing design is judged to be an appropriate trade-off.
 
 
@@ -843,5 +845,5 @@
 forall( otype R, otype S ) void ?{}( pair(R, S) *, R, S );  // (1)
 forall( dtype T, ttype Params | sized(T) | { void ?{}( T *, Params ); } ) T * new( Params p ) {
-	return ((T*)malloc( sizeof(T) )){ p }; // construct into result of malloc
+	return ((T *)malloc( sizeof(T) )){ p }; // construct into result of malloc
 }
 pair( int, char ) * x = new( 42, '!' );
@@ -948,12 +950,12 @@
 \label{sec:eval}
 
-Though \CFA provides significant added functionality over C, these added features have a low runtime penalty.
+Though \CFA provides significant added functionality over C, these features have a low runtime penalty.
 In fact, \CFA's features for generic programming can enable faster runtime execution than idiomatic @void *@-based C code.
-This claim is demonstrated through a set of generic-code-based micro-benchmarks in C, \CFA, and \CC (see source code in Appendix~\ref{sec:BenchMarks}).
-Since all these languages share a subset comprising most of standard C, maximal-performance benchmarks would show little runtime variance, other than in length and clarity of source code.
-Instead, the presented benchmarks show the costs of idiomatic use of each language's features to examine common usage.
-Figure~\ref{fig:MicroBenchmark} shows the \CFA benchmark tests for a generic stack based on a singly linked-list, a generic pair-data-structure, and a variadic @print@ routine similar to that in Section~\ref{sec:variadic-tuples}.
-The benchmark tests are similar for C and \CC.
-The experiment uses element types @int@ and @pair( _Bool, char)@, and push $N=40M$ elements on a generic stack, copy the stack, clear one of the stacks, find the maximum value in the other stack, and print $N$ constant values.
+This claim is demonstrated through a set of generic-code-based micro-benchmarks in C, \CFA, and \CC (see source-code interfaces in Appendix~\ref{sec:BenchmarkInterfaces}).
+Since all these languages share a subset comprising standard C, maximal-performance benchmarks would show little runtime variance, other than in length and clarity of source code.
+A more illustrative benchmark is to show the costs of idiomatic use of each language's features covering common usage.
+Figure~\ref{fig:BenchmarkTest} shows the \CFA benchmark tests for a generic stack based on a singly linked-list, a generic pair-data-structure, and a variadic @print@ routine similar to that in Section~\ref{sec:variadic-tuples}.
+The benchmark test is similar for C and \CC.
+The experiment uses element types @int@ and @pair(_Bool, char)@, and pushes $N=40M$ elements on a generic stack, copies the stack, clears one of the stacks, finds the maximum value in the other stack, and prints $N$ constant values.
 
 The structure of each benchmark implemented is: C with @void *@-based polymorphism, \CFA with the presented features, \CC with templates, and \CC using only class inheritance for polymorphism, called \CCV.
@@ -961,8 +963,9 @@
 hence runtime checks are necessary to safely down-cast objects.
 The most notable difference among the implementations is in memory layout of generic types: \CFA and \CC inline the stack and pair elements into corresponding list and pair nodes, while C and \CCV lack such a capability and instead must store generic objects via pointers to separately-allocated objects.
-For the print benchmark, idiomatic printing is used: the C and \CFA variants used @cstdio.h@, while the \CC and \CCV variants used @iostream@; preliminary tests show this distinction has little runtime impact.
+For the print benchmark, idiomatic printing is used: the C and \CFA variants used @stdio.h@, while the \CC and \CCV variants used @iostream@; preliminary tests show this distinction has little runtime impact.
+Note, the C benchmark uses unchecked casts as there is no runtime mechanism to perform such checks, while \CFA and \CC provide type-safety statically.
 
 \begin{figure}
-\begin{lstlisting}[xleftmargin=3\parindentlnth,aboveskip=0pt,belowskip=0pt,numbers=left,numberstyle=\tt\small,numberblanklines=false]
+\begin{lstlisting}[xleftmargin=3\parindentlnth,aboveskip=0pt,belowskip=0pt]
 int main( int argc, char *argv[] ) {
 	FILE * out = fopen( "cfa-out.txt", "w" );
@@ -987,6 +990,6 @@
 }
 \end{lstlisting}
-\caption{\CFA Micro-Benchmark}
-\label{fig:MicroBenchmark}
+\caption{\CFA Benchmark Test}
+\label{fig:BenchmarkTest}
 \end{figure}
 
@@ -1002,5 +1005,5 @@
 \label{tab:eval}
 \newcommand{\CT}[1]{\multicolumn{1}{c}{#1}}
-\begin{tabular}{r|rrrr}
+\begin{tabular}{rrrrr}
 									& \CT{C}	& \CT{\CFA}	& \CT{\CC}	& \CT{\CCV}		\\ \hline
 maximum memory usage (MB)			& 10001		& 2501		& 2503		& 11253			\\
@@ -1011,20 +1014,32 @@
 \end{table}
 
-Figure~\ref{fig:eval} and Table~\ref{tab:eval} show the results of running the benchmark in Figure~\ref{fig:MicroBenchmark} and its C, \CC, and \CCV equivalents. 
-The data shown is the median of 5 consecutive runs of each program, with an initial warm-up run omitted.
-All code was compiled at \texttt{-O2} by GCC or G++ 6.2.0, with all \CC code compiled as \CCfourteen.
-The benchmarks were run on an Ubuntu 16.04 workstation with 16 GB of RAM and a 6-core AMD FX-6300 CPU with 3.5 GHz maximum clock frequency.
-The C and \CCV variants are generally the slowest and most memory-hungry, due to their less-efficient memory layout and the pointer-indirection necessary to implement generic types in these languages; this problem is exacerbated by the second level of generic types in the pair-based benchmarks.
-By contrast, the \CFA and \CC variants run in roughly equivalent time for both the integer and pair of boolean and char tests, which makes sense given that an integer is actually larger than the pair in both languages. 
-\CCV is slower than C largely due to the cost of runtime type-checking of down-casts (implemented here using the \CC @dynamic_cast@ mechanism); our C benchmark uses unchecked casts due to the lack of a language mechanism to perform such checks, while \CFA and \CC can enforce type-safety statically at compilation.
+Figure~\ref{fig:eval} and Table~\ref{tab:eval} show the results of running the benchmark in Figure~\ref{fig:BenchmarkTest} and its C, \CC, and \CCV equivalents. 
+The graph plots the median of 5 consecutive runs of each program, with an initial warm-up run omitted.
+All code is compiled at \texttt{-O2} by GCC or G++ 6.2.0, with all \CC code compiled as \CCfourteen.
+The benchmarks are run on an Ubuntu 16.04 workstation with 16 GB of RAM and a 6-core AMD FX-6300 CPU with 3.5 GHz maximum clock frequency.
+
+The C and \CCV variants are generally the slowest with the largest memory footprint, because to their less-efficient memory layout and the pointer-indirection necessary to implement generic types;
+this inefficiency is exacerbated by the second level of generic types in the pair-based benchmarks.
+By contrast, the \CFA and \CC variants run in roughly equivalent time for both the integer and pair of @_Bool@ and @char@ because the storage layout is equivalent.
+\CCV is slower than C largely due to the cost of runtime type-checking of down-casts (implemented with @dynamic_cast@);
+There are two outliers in the graph for \CFA: all prints and pop of @pair@.
+Both of these cases result from the complexity of the C-generated polymorphic code, so that the GCC compiler is unable to optimize some dead code and condense nested calls.
+A compiler for \CFA could easily perform these optimizations.
+Finally, the binary size for \CFA is larger because of static linking with the \CFA libraries.
 
 \CC performs best because it uses header-only inlined libraries (\ie no separate compilation).
-\CFA and \CC have the advantage of a pre-written generic @pair@ type to reduce line count, while C and \CCV require it to written by the programmer, as C does not have a generic collections library in its standard distribution and \CCV does not use the \CC standard template library by construction.
-The definition of @object@ and wrapper classes for @bool@, @char@, @int@, and @const char *@ are included in the line count for \CCV, which somewhat inflates its line count, as an actual object-oriented language would include these in the standard library; with their omission the \CCV line count is similar to C.
-We justify the given line count by noting that many object-oriented languages do not allow implementing new interfaces on library types without subclassing or boilerplate-filled wrapper types, which may be similarly verbose.
+\CFA and \CC have the advantage of a pre-written generic @pair@ and @stack@ type to reduce line count, while C and \CCV require it to written by the programmer, as C does not have a generic collections-library and \CCV does not use the \CC standard template library by construction.
+For \CCV, the definition of @object@ and wrapper classes for @bool@, @char@, @int@, and @const char *@ are included in the line count, which inflates its line count, as an actual object-oriented language would include these in the standard library;
+with their omission the \CCV line count is similar to C.
+We justify the given line count by noting that many object-oriented languages do not allow implementing new interfaces on library types without subclassing or wrapper types, which may be similarly verbose.
 
 Raw line-count, however, is a fairly rough measure of code complexity;
 another important factor is how much type information the programmer must manually specify, especially where that information is not checked by the compiler.
-Such un-checked type information produces a heavier documentation burden and increased potential for runtime bugs, and is much less common in \CFA than C, with its manually specified function pointers arguments and format codes, or \CCV, with its extensive use of un-type-checked downcasts (\eg @object@ to @integer@ when popping a stack, or @object@ to @printable@ when printing the elements of a @pair@). To quantify this, the ``redundant type annotations'' line in Table~\ref{tab:eval} counts the number of lines on which the type of a known variable is re-specified, either as a format specifier, explicit downcast, type-specific function, or by name in a @sizeof@, struct literal, or @new@ expression. The \CC benchmark uses two redundant type annotations to create a new stack nodes, while the C and \CCV benchmarks have several such annotations spread throughout their code. The three instances in which the \CFA benchmark still uses redundant type specifiers are to cast the result of a polymorphic @malloc@ call (the @sizeof@ argument is inferred by the compiler). These uses are similar to the @new@ expressions in \CC, though ongoing work on the \CFA compiler's type resolver should shortly render even these type casts superfluous.
+Such unchecked type information produces a heavier documentation burden and increased potential for runtime bugs, and is much less common in \CFA than C, with its manually specified function pointers arguments and format codes, or \CCV, with its extensive use of un-type-checked downcasts (\eg @object@ to @integer@ when popping a stack, or @object@ to @printable@ when printing the elements of a @pair@).
+To quantify this, the ``redundant type annotations'' line in Table~\ref{tab:eval} counts the number of lines on which the type of a known variable is re-specified, either as a format specifier, explicit downcast, type-specific function, or by name in a @sizeof@, struct literal, or @new@ expression.
+The \CC benchmark uses two redundant type annotations to create a new stack nodes, while the C and \CCV benchmarks have several such annotations spread throughout their code.
+The three instances in which the \CFA benchmark still uses redundant type specifiers are to cast the result of a polymorphic @malloc@ call (the @sizeof@ argument is inferred by the compiler).
+These uses are similar to the @new@ expressions in \CC, though ongoing work on the \CFA compiler's type resolver should shortly render even these type casts superfluous.
+
 
 \section{Related Work}
@@ -1041,5 +1056,5 @@
 In contrast, \CFA has a single facility for polymorphic code supporting type-safe separate-compilation of polymorphic functions and generic (opaque) types, which uniformly leverage the C procedural paradigm.
 The key mechanism to support separate compilation is \CFA's \emph{explicit} use of assumed properties for a type.
-Until \CC concepts~\citep{C++Concepts} are standardized (anticipated for \CCtwenty), \CC provides no way to specify the requirements of a generic function in code beyond compilation errors during template expansion;
+Until \CC~\citep{C++Concepts} are standardized (anticipated for \CCtwenty), \CC provides no way to specify the requirements of a generic function in code beyond compilation errors during template expansion;
 furthermore, \CC concepts are restricted to template polymorphism.
 
@@ -1050,10 +1065,10 @@
 While the Cyclone design provides the efficiency benefits discussed in Section~\ref{sec:generic-apps} for dtype-static polymorphism, it is more restrictive than \CFA's general model.
 
-Objective-C~\citep{obj-c-book} is an industrially successful extension to C.
+\citep{obj-c-book} is an industrially successful extension to C.
 However, Objective-C is a radical departure from C, using an object-oriented model with message-passing.
 Objective-C did not support type-checked generics until recently~\citep{xcode7}, historically using less-efficient and more error-prone runtime checking of object types.
-The GObject framework~\citep{GObject} also adds object-oriented programming with runtime type-checking and reference-counting garbage-collection to C;
+The~\citep{GObject} framework also adds object-oriented programming with runtime type-checking and reference-counting garbage-collection to C;
 these features are more intrusive additions than those provided by \CFA, in addition to the runtime overhead of reference-counting.
-Vala~\citep{Vala} compiles to GObject-based C, and so adds the burden of learning a separate language syntax to the aforementioned demerits of GObject as a modernization path for the existing C code-bases.
+\citep{Vala} compiles to GObject-based C, and so adds the burden of learning a separate language syntax to the aforementioned demerits of GObject as a modernization path for the existing C code-bases.
 Java~\citep{Java8} included generic types in Java~5;
 Java's generic types are type-checked at compilation and type-erased at runtime, similar to \CFA's.
@@ -1061,5 +1076,5 @@
 Java is also a garbage-collected, object-oriented language, with the associated resource usage and C-interoperability burdens.
 
-D~\citep{D}, Go~\citep{Go}, and Rust~\citep{Rust} are modern, compiled languages with abstraction features similar to \CFA traits, \emph{interfaces} in D and Go and \emph{traits} in Rust.
+D~\citep{D}, Go~\citep{Go}, and~\citep{Rust} are modern, compiled languages with abstraction features similar to \CFA traits, \emph{interfaces} in D and Go and \emph{traits} in Rust.
 However, each language represents a significant departure from C in terms of language model, and none has the same level of compatibility with C as \CFA.
 D and Go are garbage-collected languages, imposing the associated runtime overhead.
@@ -1077,5 +1092,5 @@
 SETL~\cite{SETL} is a high-level mathematical programming language, with tuples being one of the primary data types.
 Tuples in SETL allow subscripting, dynamic expansion, and multiple assignment.
-C provides variadic functions through @va_list@ objects, but the programmer is responsible for managing the number of arguments and their types, so the mechanism is not type-safe.
+C provides variadic functions through @va_list@ objects, but the programmer is responsible for managing the number of arguments and their types, so the mechanism is type unsafe.
 KW-C~\cite{Buhr94a}, a predecessor of \CFA, introduced tuples to C as an extension of the C syntax, taking much of its inspiration from SETL.
 The main contributions of that work were adding MRVF, tuple mass and multiple assignment, and record-field access.
@@ -1089,5 +1104,5 @@
 Go does not have tuples but supports MRVF.
 Java's variadic functions appear similar to C's but are type-safe using homogeneous arrays, which are less useful than \CFA's heterogeneously-typed variadic functions.
-Tuples are a fundamental abstraction in most functional programming languages, such as Standard ML~\cite{sml} and Scala~\cite{Scala}, which decompose tuples using pattern matching.
+Tuples are a fundamental abstraction in most functional programming languages, such as Standard ML~\cite{sml} and~\cite{Scala}, which decompose tuples using pattern matching.
 
 
@@ -1101,19 +1116,20 @@
 On the surface, the project may appear as a rehash of similar mechanisms in \CC.
 However, every \CFA feature is different than its \CC counterpart, often with extended functionality, better integration with C and its programmers, and always supporting separate compilation.
-All of these new features are being used by the \CFA development-team to build the \CFA runtime system.
+All of these new features are being used by the \CFA development-team to build the \CFA runtime-system.
 Finally, we demonstrate that \CFA performance for some idiomatic cases is better than C and close to \CC, showing the design is practically applicable.
 
-There is ongoing work on a wide range of \CFA feature extensions, including reference types, exceptions, and concurrent programming primitives.
+There is ongoing work on a wide range of \CFA feature extensions, including reference types, exceptions, concurrent primitives and modules.
+(While all examples in the paper compile and run, a public beta-release of \CFA will take another 8--12 months to finalize these addition extensions.)
 In addition, there are interesting future directions for the polymorphism design.
 Notably, \CC template functions trade compile time and code bloat for optimal runtime of individual instantiations of polymorphic functions.
 \CFA polymorphic functions, by contrast, uses a dynamic virtual dispatch.
 The runtime overhead of this approach is low, but not as low as inlining, and it may be beneficial to provide a mechanism for performance-sensitive code.
-Two promising approaches are an @inline@ annotation at polymorphic function call sites to create a template-specialization of the function (provided the code is visible) or placing an @inline@ annotation on polymorphic function-definitions to instantiate a specialized version for to some set of types.
-These approaches are not mutually exclusive, and allow performance optimizations to be applied only when necessary, without suffering global code-bloat.
-In general, we believe separate compilation producing smaller code works well with loaded hardware-caches, which may offset the benefit of larger inlined code.
+Two promising approaches are an @inline@ annotation at polymorphic function call sites to create a template-specialization of the function (provided the code is visible) or placing an @inline@ annotation on polymorphic function-definitions to instantiate a specialized version for some set of types.
+These approaches are not mutually exclusive and allow performance optimizations to be applied only when necessary, without suffering global code-bloat.
+In general, we believe separate compilation, producing smaller code, works well with loaded hardware-caches, which may offset the benefit of larger inlined-code.
 
 
 \begin{acks}
-The authors would like to recognize the design assitance of Glen Ditchfield, Richard Bilson, and Thierry Delisle on the features described in this paper. They also thank Magnus Madsen and three anonymous reviewers for valuable editorial feedback.
+The authors would like to recognize the design assistance of Glen Ditchfield, Richard Bilson, and Thierry Delisle on the features described in this paper. They also thank Magnus Madsen and three anonymous reviewers for valuable editorial feedback.
 
 This work is supported in part by a corporate partnership with \grantsponsor{Huawei}{Huawei Ltd.}{http://www.huawei.com}\ and the first author's \grantsponsor{NSERC-PGS}{NSERC PGS D}{http://www.nserc-crsng.gc.ca/Students-Etudiants/PG-CS/BellandPostgrad-BelletSuperieures_eng.asp} scholarship.
@@ -1124,9 +1140,79 @@
 \bibliography{cfa}
 
+
 \appendix
 
-\section{Benchmark Source Code}
-\label{sec:BenchMarks}
-
+\section{Benchmark Interfaces}
+\label{sec:BenchmarkInterfaces}
+
+\lstset{basicstyle=\linespread{0.9}\sf\small}
+
+\CFA
+\begin{lstlisting}[xleftmargin=2\parindentlnth,aboveskip=0pt,belowskip=0pt]
+forall(otype T) struct stack_node;
+forall(otype T) struct stack { stack_node(T) * head; };
+forall(otype T) void ?{}(stack(T) * s);
+forall(otype T) void ?{}(stack(T) * s, stack(T) t);
+forall(otype T) stack(T) ?=?(stack(T) * s, stack(T) t);
+forall(otype T) void ^?{}(stack(T) * s);
+forall(otype T) _Bool empty(const stack(T) * s);
+forall(otype T) void push(stack(T) * s, T value);
+forall(otype T) T pop(stack(T) * s);
+forall(otype T) void clear(stack(T) * s);
+
+void print( FILE * out, const char * x );
+void print( FILE * out, _Bool x );
+void print( FILE * out, char x );
+void print( FILE * out, int x );
+forall(otype T, ttype Params | { void print( FILE *, T ); void print( FILE *, Params ); })
+	void print( FILE * out, T arg, Params rest );
+forall(otype R, otype S | { void print( FILE *, R ); void print( FILE *, S ); })
+	void print( FILE * out, pair(R, S) x );
+\end{lstlisting}
+
+\medskip\noindent
+\CC
+\begin{lstlisting}[xleftmargin=2\parindentlnth,aboveskip=0pt,belowskip=0pt]
+std::pair
+std::forward_list wrapped in std::stack interface
+
+template<typename T> void print(ostream& out, const T& x) { out << x; }
+template<> void print<bool>(ostream& out, const bool& x) { out << (x ? "true" : "false"); }
+template<> void print<char>(ostream& out, const char& x ) { out << "'" << x << "'"; }
+template<typename R, typename S> ostream& operator<< (ostream& out, const pair<R, S>& x) {
+	out << "["; print(out, x.first); out << ", "; print(out, x.second); return out << "]"; }
+template<typename T, typename... Args> void print(ostream& out, const T& arg, const Args&... rest) {
+	out << arg;	print(out, rest...); }
+\end{lstlisting}
+
+\medskip\noindent
+C
+\begin{lstlisting}[xleftmargin=2\parindentlnth,aboveskip=0pt,belowskip=0pt]
+struct pair { void * first, second; };
+struct pair * new_pair( void * first, void * second );
+struct pair * copy_pair( const struct pair * src, 
+	void * (*copy_first)( const void * ), void * (*copy_second)( const void * ) );
+void free_pair( struct pair * p, void (*free_first)( void * ), void (*free_second)( void * ) );
+int cmp_pair( const struct pair * a, const struct pair * b, 
+	int (*cmp_first)( const void *, const void * ), int (*cmp_second)( const void *, const void * ) );
+
+struct stack_node;
+struct stack { struct stack_node * head; };
+struct stack new_stack();
+void copy_stack( struct stack * dst, const struct stack * src, void * (*copy)( const void * ) );
+void clear_stack( struct stack * s, void (*free_el)( void * ) );
+_Bool stack_empty( const struct stack * s );
+void push_stack( struct stack * s, void * value );
+void * pop_stack( struct stack * s );
+
+void print_string( FILE * out, const char * x );
+void print_bool( FILE * out, _Bool x );
+void print_char( FILE * out, char x );
+void print_int( FILE * out, int x );
+void print( FILE * out, const char * fmt, ... );
+\end{lstlisting}
+
+
+\begin{comment}
 Throughout, @/***/@ designates a counted redundant type annotation.
 
@@ -1223,4 +1309,5 @@
 
 \lstinputlisting[language=c++]{evaluation/cpp-vbench.cpp}
+\end{comment}
 
 \end{document}