Index: doc/papers/general/Paper.tex
===================================================================
--- doc/papers/general/Paper.tex	(revision fb11446ea727a6aa8117f8ef7d17b3ad38f1b41c)
+++ doc/papers/general/Paper.tex	(revision 4c11fce008192a6f1c8839fd6372e97b6b601ae4)
@@ -267,4 +267,5 @@
 int m = max( max, -max );					$\C{// uses (3) and (1) twice, by matching return type}$
 \end{cfa}
+
 \CFA maximizes the ability to reuse names to aggressively address the naming problem.
 In some cases, hundreds of names can be reduced to tens, resulting in a significant cognitive reduction.
@@ -285,5 +286,5 @@
 
 
-\subsection{\texorpdfstring{\LstKeywordStyle{forall} Functions}{forall Functions}}
+\subsection{\texorpdfstring{\protect\lstinline{forall} Functions}{forall Functions}}
 \label{sec:poly-fns}
 
@@ -435,5 +436,5 @@
 One approach is to write bespoke data-structures for each context in which they are needed.
 While this approach is flexible and supports integration with the C type-checker and tooling, it is also tedious and error-prone, especially for more complex data structures.
-A second approach is to use @void *@--based polymorphism, \eg the C standard-library functions @bsearch@ and @qsort@, which allow reuse of code with common functionality.
+A second approach is to use @void *@ based polymorphism, \eg the C standard-library functions @bsearch@ and @qsort@, which allow reuse of code with common functionality.
 However, basing all polymorphism on @void *@ eliminates the type-checker's ability to ensure that argument types are properly matched, often requiring a number of extra function parameters, pointer indirection, and dynamic allocation that is not otherwise needed.
 A third approach to generic code is to use preprocessor macros, which does allow the generated code to be both generic and type-checked, but errors may be difficult to interpret.
@@ -508,12 +509,12 @@
 If a dynamic generic-type is declared to be passed or returned by value from a polymorphic function, the translator can safely assume the generic type is complete (\ie has a known layout) at any call-site, and the offset array is passed from the caller;
 if the generic type is concrete at the call site, the elements of this offset array can even be statically generated using the C @offsetof@ macro.
-As an example, the body of the second @value@ function is implemented like this:
-\begin{cfa}
-_assign_T(_retval, p + _offsetof_pair[1]); $\C{// return *p.second}$
-\end{cfa}
-@_assign_T@ is passed in as an implicit parameter from @otype T@, and takes two @T*@ (@void*@ in the generated code), a destination and a source; @_retval@ is the pointer to a caller-allocated buffer for the return value, the usual \CFA method to handle dynamically-sized return types.
+As an example, the body of the second @value@ function is implemented as:
+\begin{cfa}
+_assign_T( _retval, p + _offsetof_pair[1] ); $\C{// return *p.second}$
+\end{cfa}
+@_assign_T@ is passed in as an implicit parameter from @otype T@, and takes two @T *@ (@void *@ in the generated code), a destination and a source; @_retval@ is the pointer to a caller-allocated buffer for the return value, the usual \CFA method to handle dynamically-sized return types.
 @_offsetof_pair@ is the offset array passed into @value@; this array is generated at the call site as:
 \begin{cfa}
-size_t _offsetof_pair[] = { offsetof(_pair_conc0, first), offsetof(_pair_conc0, second) }
+size_t _offsetof_pair[] = { offsetof( _pair_conc0, first ), offsetof( _pair_conc0, second ) }
 \end{cfa}
 
@@ -539,18 +540,18 @@
 The most important such pattern is using @forall(dtype T) T *@ as a type-checked replacement for @void *@, \eg creating a lexicographic comparison for pairs of pointers used by @bsearch@ or @qsort@:
 \begin{cfa}
-forall(dtype T) int lexcmp( pair( T *, T * ) * a, pair( T *, T * ) * b, int (* cmp)( T *, T * ) ) {
+forall( dtype T ) int lexcmp( pair( T *, T * ) * a, pair( T *, T * ) * b, int (* cmp)( T *, T * ) ) {
 	return cmp( a->first, b->first ) ? : cmp( a->second, b->second );
 }
 \end{cfa}
-Since @pair(T *, T * )@ is a concrete type, there are no implicit parameters passed to @lexcmp@, so the generated code is identical to a function written in standard C using @void *@, yet the \CFA version is type-checked to ensure the fields of both pairs and the arguments to the comparison function match in type.
+Since @pair( T *, T * )@ is a concrete type, there are no implicit parameters passed to @lexcmp@, so the generated code is identical to a function written in standard C using @void *@, yet the \CFA version is type-checked to ensure the fields of both pairs and the arguments to the comparison function match in type.
 
 Another useful pattern enabled by reused dtype-static type instantiations is zero-cost \newterm{tag-structures}.
 Sometimes information is only used for type-checking and can be omitted at runtime, \eg:
 \begin{cfa}
-forall(dtype Unit) struct scalar { unsigned long value; };
+forall( dtype Unit ) struct scalar { unsigned long value; };
 struct metres {};
 struct litres {};
 
-forall(dtype U) scalar(U) ?+?( scalar(U) a, scalar(U) b ) {
+forall( dtype U) scalar(U) ?+?( scalar(U) a, scalar(U) b ) {
 	return (scalar(U)){ a.value + b.value };
 }
@@ -807,5 +808,5 @@
 Due to the implicit flattening and structuring conversions involved in argument passing, @otype@ and @dtype@ parameters are restricted to matching only with non-tuple types, \eg:
 \begin{cfa}
-forall(otype T, dtype U) void f( T x, U * y );
+forall( otype T, dtype U ) void f( T x, U * y );
 f( [5, "hello"] );
 \end{cfa}
@@ -814,5 +815,5 @@
 For example, a plus operator can be written to add two triples together.
 \begin{cfa}
-forall(otype T | { T ?+?( T, T ); }) [T, T, T] ?+?( [T, T, T] x, [T, T, T] y ) {
+forall( otype T | { T ?+?( T, T ); } ) [T, T, T] ?+?( [T, T, T] x, [T, T, T] y ) {
 	return [x.0 + y.0, x.1 + y.1, x.2 + y.2];
 }
@@ -825,5 +826,5 @@
 \begin{cfa}
 int f( [int, double], double );
-forall(otype T, otype U | { T f( T, U, U ); }) void g( T, U );
+forall( otype T, otype U | { T f( T, U, U ); } ) void g( T, U );
 g( 5, 10.21 );
 \end{cfa}
@@ -852,5 +853,5 @@
 \begin{cfa}
 int sum$\(_0\)$() { return 0; }
-forall(ttype Params | { int sum( Params ); } ) int sum$\(_1\)$( int x, Params rest ) {
+forall( ttype Params | { int sum( Params ); } ) int sum$\(_1\)$( int x, Params rest ) {
 	return x + sum( rest );
 }
@@ -865,5 +866,5 @@
 \begin{cfa}
 int sum( int x, int y ) { return x + y; }
-forall(ttype Params | { int sum( int, Params ); } ) int sum( int x, int y, Params rest ) {
+forall( ttype Params | { int sum( int, Params ); } ) int sum( int x, int y, Params rest ) {
 	return sum( x + y, rest );
 }
@@ -871,11 +872,11 @@
 One more step permits the summation of any summable type with all arguments of the same type:
 \begin{cfa}
-trait summable(otype T) {
+trait summable( otype T ) {
 	T ?+?( T, T );
 };
-forall(otype R | summable( R ) ) R sum( R x, R y ) {
+forall( otype R | summable( R ) ) R sum( R x, R y ) {
 	return x + y;
 }
-forall(otype R, ttype Params | summable(R) | { R sum(R, Params); } ) R sum(R x, R y, Params rest) {
+forall( otype R, ttype Params | summable(R) | { R sum(R, Params); } ) R sum(R x, R y, Params rest) {
 	return sum( x + y, rest );
 }
@@ -888,5 +889,5 @@
 \begin{cfa}
 struct S { int x, y; };
-forall(otype T, ttype Params | { void print(T); void print(Params); }) void print(T arg, Params rest) {
+forall( otype T, ttype Params | { void print(T); void print(Params); } ) void print(T arg, Params rest) {
 	print(arg);  print(rest);
 }
@@ -927,5 +928,5 @@
 is transformed into:
 \begin{cfa}
-forall(dtype T0, dtype T1 | sized(T0) | sized(T1)) struct _tuple2 {
+forall( dtype T0, dtype T1 | sized(T0) | sized(T1) ) struct _tuple2 {
 	T0 field_0;								$\C{// generated before the first 2-tuple}$
 	T1 field_1;
@@ -933,5 +934,5 @@
 _tuple2(int, int) f() {
 	_tuple2(double, double) x;
-	forall(dtype T0, dtype T1, dtype T2 | sized(T0) | sized(T1) | sized(T2)) struct _tuple3 {
+	forall( dtype T0, dtype T1, dtype T2 | sized(T0) | sized(T1) | sized(T2) ) struct _tuple3 {
 		T0 field_0;							$\C{// generated before the first 3-tuple}$
 		T1 field_1;
@@ -941,7 +942,7 @@
 }
 \end{cfa}
-\begin{sloppypar}
+{\sloppy
 Tuple expressions are then simply converted directly into compound literals, \eg @[5, 'x', 1.24]@ becomes @(_tuple3(int, char, double)){ 5, 'x', 1.24 }@.
-\end{sloppypar}
+\par}%
 
 \begin{comment}
@@ -1007,5 +1008,5 @@
 
 
-\subsection{\texorpdfstring{\LstKeywordStyle{if} Statement}{if Statement}}
+\subsection{\texorpdfstring{\protect\lstinline{if} Statement}{if Statement}}
 
 The @if@ expression allows declarations, similar to @for@ declaration expression:
@@ -1019,5 +1020,5 @@
 
 
-\subsection{\texorpdfstring{\LstKeywordStyle{switch} Statement}{switch Statement}}
+\subsection{\texorpdfstring{\protect\lstinline{switch} Statement}{switch Statement}}
 
 There are a number of deficiencies with the C @switch@ statements: enumerating @case@ lists, placement of @case@ clauses, scope of the switch body, and fall through between case clauses.
@@ -1090,8 +1091,6 @@
 C @switch@ provides multiple entry points into the statement body, but once an entry point is selected, control continues across \emph{all} @case@ clauses until the end of the @switch@ body, called \newterm{fall through};
 @case@ clauses are made disjoint by the @break@ statement.
-While the ability to fall through \emph{is} a useful form of control flow, it does not match well with programmer intuition, resulting in many errors from missing @break@ statements.
-For backwards compatibility, \CFA provides a \emph{new} control structure, @choose@, which mimics @switch@, but reverses the meaning of fall through (see Figure~\ref{f:ChooseSwitchStatements}).
-
-Collectively, these enhancements reduce programmer burden and increase readability and safety.
+While fall through \emph{is} a useful form of control flow, it does not match well with programmer intuition, resulting in errors from missing @break@ statements.
+For backwards compatibility, \CFA provides a \emph{new} control structure, @choose@, which mimics @switch@, but reverses the meaning of fall through (see Figure~\ref{f:ChooseSwitchStatements}), similar to Go.
 
 \begin{figure}
@@ -1137,29 +1136,41 @@
 \end{figure}
 
-\begin{comment}
-Forgotten @break@ statements at the end of @switch@ cases are a persistent sort of programmer error in C, and the @break@ statements themselves introduce visual clutter and an un-C-like keyword-based block delimiter. 
-\CFA addresses this error by introducing a @choose@ statement, which works identically to a @switch@ except that its default end-of-case behaviour is to break rather than to fall through for all non-empty cases. 
-Since empty cases like @case 7:@ in @case 7: case 11:@ still have fall-through semantics and explicit @break@ is still allowed at the end of a @choose@ case, many idiomatic uses of @switch@ in standard C can be converted to @choose@ statements by simply changing the keyword. 
-Where fall-through is desired for a non-empty case, it can be specified with the new @fallthrough@ statement, making @choose@ equivalently powerful to @switch@, but more concise in the common case where most non-empty cases end with a @break@ statement, as in the example below:
-
-\begin{cfa}
-choose( i ) {
-	case 2:
-		printf("even ");
-		fallthrough;
-	case 3: case 5: case 7:
-		printf("small prime\n");
-	case 4,6,8,9:
-		printf("small composite\n");
-	case 13~19:
-		printf("teen\n");
-	default:
-		printf("something else\n");
-}
-\end{cfa}
-\end{comment}
-
-
-\subsection{\texorpdfstring{Labelled \LstKeywordStyle{continue} / \LstKeywordStyle{break}}{Labelled continue / break}}
+Finally, @fallthrough@ may appear in contexts other than terminating a @case@ clause, and have an explicit transfer label allowing separate cases but common final-code for a set of cases:
+\begin{cquote}
+\lstDeleteShortInline@%
+\begin{tabular}{@{}l@{\hspace{2\parindentlnth}}l@{}}
+\multicolumn{1}{c@{\hspace{2\parindentlnth}}}{\textbf{non-terminator}}	& \multicolumn{1}{c}{\textbf{target label}}	\\
+\begin{cfa}
+choose ( ... ) {
+  case 3:
+	if ( ... ) {
+		... `fallthrough;`  // goto case 4
+	} else {
+		...
+	}
+	// implicit break
+  case 4:
+\end{cfa}
+&
+\begin{cfa}
+choose ( ... ) {
+  case 3:
+	... `fallthrough common;`
+  case 4:
+	... `fallthrough common;`
+  common:
+	...	 // common code for cases 3 and 4
+	// implicit break
+  case 4:
+\end{cfa}
+\end{tabular}
+\lstMakeShortInline@%
+\end{cquote}
+The target label may be case @default@.
+
+Collectively, these control-structure enhancements reduce programmer burden and increase readability and safety.
+
+
+\subsection{\texorpdfstring{Labelled \protect\lstinline{continue} / \protect\lstinline{break}}{Labelled continue / break}}
 
 While C provides @continue@ and @break@ statements for altering control flow, both are restricted to one level of nesting for a particular control structure.
@@ -1270,5 +1281,5 @@
 \subsection{Exception Handling}
 
-The following framework for \CFA exception handling is in place, excluding some run-time type-information and dynamic casts.
+The following framework for \CFA exception handling is in place, excluding some runtime type-information and virtual functions.
 \CFA provides two forms of exception handling: \newterm{fix-up} and \newterm{recovery} (see Figure~\ref{f:CFAExceptionHandling})~\cite{Buhr92b,Buhr00a}.
 Both mechanisms provide dynamic call to a handler using dynamic name-lookup, where fix-up has dynamic return and recovery has static return from the handler.
@@ -1340,5 +1351,5 @@
    catch ( IOError err ) { ... }			$\C{// handler error from other files}$
 \end{cfa}
-where the throw inserts the failing file-handle in the I/O exception.
+where the throw inserts the failing file-handle into the I/O exception.
 Conditional catch cannot be trivially mimicked by other mechanisms because once an exception is caught, handler clauses in that @try@ statement are no longer eligible..
 
@@ -1348,8 +1359,8 @@
 resume( $\emph{alternate-stack}$ )
 \end{cfa}
-These overloads of @resume@ raise the specified exception or the currently propagating exception (reresume) at another \CFA coroutine or task~\cite{Delisle18}.\footnote{\CFA coroutine and concurrency features are discussed in a separately submitted paper.}
-Nonlocal raise is restricted to resumption to provide the exception handler the greatest flexibility because processing the exception does not unwind its stack, allowing it to continue after the handle returns.
-
-To facilitate nonlocal exception, \CFA provides dynamic enabling and disabling of nonlocal exception-propagation.
+These overloads of @resume@ raise the specified exception or the currently propagating exception (reresume) at another \CFA coroutine or task\footnote{\CFA coroutine and concurrency features are discussed in a separately submitted paper.}~\cite{Delisle18}.
+Nonlocal raise is restricted to resumption to provide the exception handler the greatest flexibility because processing the exception does not unwind its stack, allowing it to continue after the handler returns.
+
+To facilitate nonlocal raise, \CFA provides dynamic enabling and disabling of nonlocal exception-propagation.
 The constructs for controlling propagation of nonlocal exceptions are the @enable@ and the @disable@ blocks:
 \begin{cquote}
@@ -1358,5 +1369,5 @@
 \begin{cfa}
 enable $\emph{exception-type-list}$ {
-	// allow non-local resumption
+	// allow non-local raise
 }
 \end{cfa}
@@ -1364,5 +1375,5 @@
 \begin{cfa}
 disable $\emph{exception-type-list}$ {
-	// disallow non-local resumption
+	// disallow non-local raise
 }
 \end{cfa}
@@ -1375,5 +1386,5 @@
 Coroutines and tasks start with non-local exceptions disabled, allowing handlers to be put in place, before non-local exceptions are explicitly enabled.
 \begin{cfa}
-void main( mytask & c ) {					$\C{// thread starts here}$
+void main( mytask & t ) {					$\C{// thread starts here}$
 	// non-local exceptions disabled
 	try {									$\C{// establish handles for non-local exceptions}$
@@ -1401,5 +1412,5 @@
 
 
-\subsection{\texorpdfstring{\LstKeywordStyle{with} Clause / Statement}{with Clause / Statement}}
+\subsection{\texorpdfstring{\protect\lstinline{with} Clause / Statement}{with Clause / Statement}}
 \label{s:WithClauseStatement}
 
@@ -2724,4 +2735,5 @@
 user defined: D, Objective-C
 
+
 \section{Conclusion and Future Work}
 
@@ -2736,5 +2748,5 @@
 Finally, we demonstrate that \CFA performance for some idiomatic cases is better than C and close to \CC, showing the design is practically applicable.
 
-There is ongoing work on a wide range of \CFA feature extensions, including arrays with size, user-defined conversions, concurrent primitives, and modules.
+There is ongoing work on a wide range of \CFA feature extensions, including arrays with size, runtime type-information, virtual functions, user-defined conversions, concurrent primitives, and modules.
 (While all examples in the paper compile and run, a public beta-release of \CFA will take another 8--12 months to finalize these additional extensions.)
 In addition, there are interesting future directions for the polymorphism design.
@@ -2771,14 +2783,14 @@
 \CFA
 \begin{cfa}[xleftmargin=2\parindentlnth,aboveskip=0pt,belowskip=0pt]
-forall(otype T) struct stack_node;
-forall(otype T) struct stack {
+forall( otype T ) struct stack_node;
+forall( otype T ) struct stack {
 	stack_node(T) * head;
 };
-forall(otype T) struct stack_node {
+forall( otype T ) struct stack_node {
 	T value;
 	stack_node(T) * next;
 };
-forall(otype T) void ?{}( stack(T) & s ) { (s.head){ 0 }; }
-forall(otype T) void ?{}( stack(T) & s, stack(T) t ) {
+forall( otype T) void ?{}( stack(T) & s ) { (s.head){ 0 }; }
+forall( otype T) void ?{}( stack(T) & s, stack(T) t ) {
 	stack_node(T) ** crnt = &s.head;
 	for ( stack_node(T) * next = t.head; next; next = next->next ) {
@@ -2791,5 +2803,5 @@
 	*crnt = 0;
 }
-forall(otype T) stack(T) ?=?( stack(T) & s, stack(T) t ) {
+forall( otype T ) stack(T) ?=?( stack(T) & s, stack(T) t ) {
 	if ( s.head == t.head ) return s;
 	clear( s );
@@ -2797,12 +2809,12 @@
 	return s;
 }
-forall(otype T) void ^?{}( stack(T) & s) { clear( s ); }
-forall(otype T) _Bool empty( const stack(T) & s ) { return s.head == 0; }
-forall(otype T) void push( stack(T) & s, T value ) {
+forall( otype T ) void ^?{}( stack(T) & s) { clear( s ); }
+forall( otype T ) _Bool empty( const stack(T) & s ) { return s.head == 0; }
+forall( otype T ) void push( stack(T) & s, T value ) {
 	stack_node(T) * new_node = ((stack_node(T)*)malloc());
 	(*new_node){ value, s.head }; /***/
 	s.head = new_node;
 }
-forall(otype T) T pop( stack(T) & s ) {
+forall( otype T ) T pop( stack(T) & s ) {
 	stack_node(T) * n = s.head;
 	s.head = n->next;
@@ -2811,5 +2823,5 @@
 	return v;
 }
-forall(otype T) void clear( stack(T) & s ) {
+forall( otype T ) void clear( stack(T) & s ) {
 	for ( stack_node(T) * next = s.head; next; ) {
 		stack_node(T) * crnt = next;