Index: doc/bibliography/pl.bib
===================================================================
--- doc/bibliography/pl.bib	(revision 69c37ccfbd57ffcac000650a919c9708faf8c701)
+++ doc/bibliography/pl.bib	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -1163,16 +1163,16 @@
     title	= {Checked C: Making C Safe by Extension},
     booktitle	= {2018 IEEE Cybersecurity Development (SecDev)},
-    year = {2018},
-    month = {September},
-    pages = {53-60},
-    publisher = {IEEE},
-    url = {https://www.microsoft.com/en-us/research/publication/checkedc-making-c-safe-by-extension/},
+    year	= {2018},
+    month	= {September},
+    pages	= {53-60},
+    publisher	= {IEEE},
+    url		= {https://www.microsoft.com/en-us/research/publication/checkedc-making-c-safe-by-extension/},
 }
 
 @misc{Clang,
-    keywords = {clang},
-    contributer = {a3moss@uwaterloo.ca},
-    title = {Clang: a {C} language family frontend for {LLVM}},
-    howpublished = {\href{https://clang.llvm.org/}{https://\-clang.llvm.org/}}
+    keywords	= {clang},
+    contributer	= {a3moss@uwaterloo.ca},
+    title	= {Clang: a {C} language family frontend for {LLVM}},
+    howpublished= {\href{https://clang.llvm.org/}{https://\-clang.llvm.org/}}
 }
 
@@ -2347,4 +2347,20 @@
 }
 
+@article{Ritchie93,
+    keywords	= {C, history},
+    contributer	= {pabuhr@plg},
+    author	= {Ritchie, Dennis M.},
+    title	= {The Development of the {C} Language},
+    journal	= sigplan,
+    volume	= 28,
+    number	= 3,
+    month	= mar,
+    year	= 1993,
+    pages	= {201--208},
+    url		= {http://doi.acm.org/10.1145/155360.155580},
+    publisher	= {ACM},
+    address	= {New York, NY, USA},
+} 
+
 @article{design,
     keywords	= {Smalltalk, designing classes},
@@ -2354,5 +2370,7 @@
     journal	= joop,
     year	= 1988,
-    volume	= 1, number = 2, pages = {22-35},
+    volume	= 1,
+    number	= 2,
+    pages	= {22-35},
     comment	= {
 	Abstract classes represent standard protocols.  ``It is better to
@@ -3789,4 +3807,16 @@
     optaddress	= {Waterloo, Ontario, Canada, N2L 3G1},
     note	= {\href{https://uwspace.uwaterloo.ca/handle/10012/13935}{https://\-uwspace.uwaterloo.ca/\-handle/\-10012/\-13935}},
+}
+
+@article{Swift05,
+   contributer	= {pabuhr@plg},
+   author	= {Michael M. Swift and Brian N. Bershad and Henry M. Levy},
+   title	= {Improving the Reliability of Commodity Operating Systems},
+   journal	= tocs,
+   volume	= 23,
+   number	= 1,
+   month	= feb,
+   year		= 2005,
+   pages	= {77-110},
 }
 
Index: doc/papers/concurrency/Paper.tex
===================================================================
--- doc/papers/concurrency/Paper.tex	(revision 69c37ccfbd57ffcac000650a919c9708faf8c701)
+++ doc/papers/concurrency/Paper.tex	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -215,5 +215,8 @@
 {}
 \lstnewenvironment{Go}[1][]
-{\lstset{#1}}
+{\lstset{language=go,moredelim=**[is][\protect\color{red}]{`}{`},#1}\lstset{#1}}
+{}
+\lstnewenvironment{python}[1][]
+{\lstset{language=python,moredelim=**[is][\protect\color{red}]{`}{`},#1}\lstset{#1}}
 {}
 
@@ -228,5 +231,5 @@
 }
 
-\title{\texorpdfstring{Advanced Control-flow in \protect\CFA}{Advanced Control-flow in Cforall}}
+\title{\texorpdfstring{Advanced Control-flow and Concurrency in \protect\CFA}{Advanced Control-flow in Cforall}}
 
 \author[1]{Thierry Delisle}
@@ -242,9 +245,9 @@
 \abstract[Summary]{
 \CFA is a modern, polymorphic, non-object-oriented, backwards-compatible extension of the C programming language.
-This paper discusses the advanced control-flow features in \CFA, which include concurrency and parallelism, and its supporting runtime system.
-These features are created from scratch as ISO C's concurrency is low-level and unimplemented, so C programmers continue to rely on the C pthreads library.
-\CFA provides high-level control-flow mechanisms, like coroutines and user-level threads, and monitors for mutual exclusion and synchronization.
-A unique contribution of this work is allowing multiple monitors to be safely acquired \emph{simultaneously} (deadlock free), while integrating this capability with all monitor synchronization mechanisms.
-All features respect the expectations of C programmers, while being fully integrate with the \CFA polymorphic type-system and other language features.
+This paper discusses some advanced control-flow and concurrency/parallelism features in \CFA, along with the supporting runtime.
+These features are created from scratch because they do not exist in ISO C, or are low-level and/or unimplemented, so C programmers continue to rely on library features, like C pthreads.
+\CFA introduces language-level control-flow mechanisms, like coroutines, user-level threading, and monitors for mutual exclusion and synchronization.
+A unique contribution of this work is allowing multiple monitors to be safely acquired \emph{simultaneously} (deadlock free), while integrating this capability with monitor synchronization mechanisms.
+These features also integrate with the \CFA polymorphic type-system and exception handling, while respecting the expectations and style of C programmers.
 Experimental results show comparable performance of the new features with similar mechanisms in other concurrent programming-languages.
 }%
@@ -261,15 +264,16 @@
 \section{Introduction}
 
-This paper discusses the design of advanced, high-level control-flow extensions (especially concurrency and parallelism) in \CFA and its runtime.
+This paper discusses the design of language-level control-flow and concurrency/parallelism extensions in \CFA and its runtime.
 \CFA is a modern, polymorphic, non-object-oriented\footnote{
 \CFA has features often associated with object-oriented programming languages, such as constructors, destructors, virtuals and simple inheritance.
 However, functions \emph{cannot} be nested in structures, so there is no lexical binding between a structure and set of functions (member/method) implemented by an implicit \lstinline@this@ (receiver) parameter.},
 backwards-compatible extension of the C programming language~\cite{Moss18}.
-Within the \CFA framework, new control-flow features were created from scratch.
-ISO \Celeven defines only a subset of the \CFA extensions, and with respect to concurrency~\cite[\S~7.26]{C11}, the features are largely wrappers for a subset of the pthreads library~\cite{Butenhof97,Pthreads}.
-Furthermore, \Celeven and pthreads concurrency is basic, based on thread fork/join in a function and a few locks, which is low-level and error prone;
-no high-level language concurrency features exist.
-Interestingly, almost a decade after publication of the \Celeven standard, neither gcc-8, clang-8 nor msvc-19 (most recent versions) support the \Celeven include @threads.h@, indicating little interest in the C concurrency approach.
-Finally, while the \Celeven standard does not state a concurrent threading-model, the historical association with pthreads suggests the threading model is kernel-level threading (1:1)~\cite{ThreadModel}.
+Within the \CFA framework, new control-flow features are created from scratch.
+ISO \Celeven defines only a subset of the \CFA extensions, where the overlapping features are concurrency~\cite[\S~7.26]{C11}.
+However, \Celeven concurrency is largely wrappers for a subset of the pthreads library~\cite{Butenhof97,Pthreads}.
+Furthermore, \Celeven and pthreads concurrency is simple, based on thread fork/join in a function and a few locks, which is low-level and error prone;
+no high-level language concurrency features are defined.
+Interestingly, almost a decade after publication of the \Celeven standard, neither gcc-8, clang-8 nor msvc-19 (most recent versions) support the \Celeven include @threads.h@, indicating little interest in the C11 concurrency approach.
+Finally, while the \Celeven standard does not state a concurrent threading-model, the historical association with pthreads suggests implementations would adopt kernel-level threading (1:1)~\cite{ThreadModel}.
 
 In contrast, there has been a renewed interest during the past decade in user-level (M:N, green) threading in old and new programming languages.
@@ -284,13 +288,13 @@
 
 A further effort over the past decade is the development of language memory-models to deal with the conflict between certain language features and compiler/hardware optimizations.
-This issue can be rephrased as some features are pervasive (language and runtime) and cannot be safely added via a library to prevent invalidation by sequential optimizations~\cite{Buhr95a,Boehm05}.
+This issue can be rephrased as: some language features are pervasive (language and runtime) and cannot be safely added via a library to prevent invalidation by sequential optimizations~\cite{Buhr95a,Boehm05}.
 The consequence is that a language must be cognizant of these features and provide sufficient tools to program around any safety issues.
 For example, C created the @volatile@ qualifier to provide correct execution for @setjmp@/@logjmp@ (concurrency came later).
-The simplest solution is to provide a handful of complex qualifiers and functions (e.g., @volatile@ and atomics) allowing programmers to write consistent/race-free programs, often in the sequentially-consistent memory-model~\cite{Boehm12}.
+The common solution is to provide a handful of complex qualifiers and functions (e.g., @volatile@ and atomics) allowing programmers to write consistent/race-free programs, often in the sequentially-consistent memory-model~\cite{Boehm12}.
 
 While having a sufficient memory-model allows sound libraries to be constructed, writing these libraries can quickly become awkward and error prone, and using these low-level libraries has the same issues.
 Essentially, using low-level explicit locks is the concurrent equivalent of assembler programming.
-Just as most assembler programming is replaced with programming in a high-level language, explicit locks can be replaced with high-level concurrency constructs in a programming language.
-The goal is to get the compiler to check for correct usage and follow any complex coding conventions implicitly.
+Just as most assembler programming is replaced with high-level programming, explicit locks can be replaced with high-level concurrency in a programming language.
+Then the goal is for the compiler to check for correct usage and follow any complex coding conventions implicitly.
 The drawback is that language constructs may preclude certain specialized techniques, therefore introducing inefficiency or inhibiting concurrency.
 For most concurrent programs, these drawbacks are insignificant in comparison to the speed of composition, and subsequent reliability and maintainability of the high-level concurrent program.
@@ -299,5 +303,5 @@
 As stated, this observation applies to non-concurrent forms of complex control-flow, like exception handling and coroutines.
 
-Adapting the programming language allows matching the control-flow model with the programming-language style, versus adapting to one general (sound) library/paradigm.
+Adapting the programming language to these features also allows matching the control-flow model with the programming-language style, versus adopting one general (sound) library/paradigm.
 For example, it is possible to provide exceptions, coroutines, monitors, and tasks as specialized types in an object-oriented language, integrating these constructs to allow leveraging the type-system (static type-checking) and all other object-oriented capabilities~\cite{uC++}.
 It is also possible to leverage call/return for blocking communication via new control structures, versus switching to alternative communication paradigms, like channels or message passing.
@@ -307,8 +311,17 @@
 however, the reverse is seldom true, i.e., given implicit concurrency, e.g., actors, it is virtually impossible to create explicit concurrency, e.g., blocking thread objects.}
 Finally, with extended language features and user-level threading it is possible to discretely fold locking and non-blocking I/O multiplexing into the language's I/O libraries, so threading implicitly dovetails with the I/O subsystem.
-
-\CFA embraces language extensions and user-level threading to provide advanced control-flow and concurrency.
-We attempt to show the \CFA extensions and runtime are demonstrably better than those proposed for \CC and other concurrent, imperative programming languages.
-The contributions of this work are:
+\CFA embraces language extensions and user-level threading to provide advanced control-flow (exception handling\footnote{
+\CFA exception handling will be presented in a separate paper.
+The key feature that dovetails with this paper is non-local exceptions allowing exceptions to be raised across stacks, with synchronous exceptions raised among coroutines and asynchronous exceptions raised among threads, similar to that in \uC~\cite[\S~5]{uC++}
+} and coroutines) and concurrency.
+
+Most augmented traditional (Fortran 18~\cite{Fortran18}, Cobol 14~\cite{Cobol14}, Ada 12~\cite{Ada12}, Java 11~\cite{Java11}) and new languages (Go~\cite{Go}, Rust~\cite{Rust}, and D~\cite{D}), except \CC, diverge from C with different syntax and semantics, only interoperate indirectly with C, and are not systems languages, for those with managed memory.
+As a result, there is a significant learning curve to move to these languages, and C legacy-code must be rewritten.
+While \CC, like \CFA, takes an evolutionary approach to extend C, \CC's constantly growing complex and interdependent features-set (e.g., objects, inheritance, templates, etc.) mean idiomatic \CC code is difficult to use from C, and C programmers must expend significant effort learning \CC.
+Hence, rewriting and retraining costs for these languages, even \CC, are prohibitive for companies with a large C software-base.
+\CFA with its orthogonal feature-set, its high-performance runtime, and direct access to all existing C libraries circumvents these problems.
+
+We present comparative examples so the reader can judge if the \CFA control-flow extensions are equivalent or better than those in or proposed for \Celeven, \CC and other concurrent, imperative programming languages, and perform experiments to show the \CFA runtime is competitive with other similar mechanisms.
+The detailed contributions of this work are:
 \begin{itemize}
 \item
@@ -615,7 +628,7 @@
 
 
-\section{Coroutines: A Stepping Stone}\label{coroutine}
-
-Advanced controlWhile the focus of this discussion is concurrency and parallelism, it is important to address coroutines, which are a significant building block of a concurrency system (but not concurrent among themselves).
+\section{Coroutines: Stepping Stone}
+\label{coroutine}
+
 Coroutines are generalized routines allowing execution to be temporarily suspended and later resumed.
 Hence, unlike a normal routine, a coroutine may not terminate when it returns to its caller, allowing it to be restarted with the values and execution location present at the point of suspension.
@@ -641,20 +654,43 @@
 \centering
 \newbox\myboxA
+% \begin{lrbox}{\myboxA}
+% \begin{cfa}[aboveskip=0pt,belowskip=0pt]
+% `int fn1, fn2, state = 1;`   // single global variables
+% int fib() {
+% 	int fn;
+% 	`switch ( state )` {  // explicit execution state
+% 	  case 1: fn = 0;  fn1 = fn;  state = 2;  break;
+% 	  case 2: fn = 1;  fn2 = fn1;  fn1 = fn;  state = 3;  break;
+% 	  case 3: fn = fn1 + fn2;  fn2 = fn1;  fn1 = fn;  break;
+% 	}
+% 	return fn;
+% }
+% int main() {
+% 
+% 	for ( int i = 0; i < 10; i += 1 ) {
+% 		printf( "%d\n", fib() );
+% 	}
+% }
+% \end{cfa}
+% \end{lrbox}
 \begin{lrbox}{\myboxA}
 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
-`int f1, f2, state = 1;`   // single global variables
-int fib() {
-	int fn;
-	`switch ( state )` {  // explicit execution state
-	  case 1: fn = 0;  f1 = fn;  state = 2;  break;
-	  case 2: fn = 1;  f2 = f1;  f1 = fn;  state = 3;  break;
-	  case 3: fn = f1 + f2;  f2 = f1;  f1 = fn;  break;
-	}
-	return fn;
-}
+#define FIB_INIT { 0, 1 }
+typedef struct { int fn1, fn; } Fib;
+int fib( Fib * f ) {
+
+	int ret = f->fn1;
+	f->fn1 = f->fn;
+	f->fn = ret + f->fn;
+	return ret;
+}
+
+
+
 int main() {
-
+	Fib f1 = FIB_INIT, f2 = FIB_INIT;
 	for ( int i = 0; i < 10; i += 1 ) {
-		printf( "%d\n", fib() );
+		printf( "%d %d\n",
+				fib( &f1 ), fib( &f2 ) );
 	}
 }
@@ -665,84 +701,106 @@
 \begin{lrbox}{\myboxB}
 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
-#define FIB_INIT `{ 0, 1 }`
-typedef struct { int f2, f1; } Fib;
-int fib( Fib * f ) {
-
-	int ret = f->f2;
-	int fn = f->f1 + f->f2;
-	f->f2 = f->f1; f->f1 = fn;
-
-	return ret;
-}
-int main() {
-	Fib f1 = FIB_INIT, f2 = FIB_INIT;
-	for ( int i = 0; i < 10; i += 1 ) {
-		printf( "%d %d\n", fib( &f1 ), fib( &f2 ) );
+`coroutine` Fib { int fn1; };
+void main( Fib & fib ) with( fib ) {
+	int fn;
+	[fn1, fn] = [0, 1];
+	for () {
+		`suspend();`
+		[fn1, fn] = [fn, fn1 + fn];
 	}
 }
-\end{cfa}
-\end{lrbox}
-
-\subfloat[3 States: global variables]{\label{f:GlobalVariables}\usebox\myboxA}
-\qquad
-\subfloat[1 State: external variables]{\label{f:ExternalState}\usebox\myboxB}
-\caption{C Fibonacci Implementations}
-\label{f:C-fibonacci}
-
-\bigskip
-
-\newbox\myboxA
-\begin{lrbox}{\myboxA}
-\begin{cfa}[aboveskip=0pt,belowskip=0pt]
-`coroutine` Fib { int fn; };
-void main( Fib & fib ) with( fib ) {
-	int f1, f2;
-	fn = 0;  f1 = fn;  `suspend()`;
-	fn = 1;  f2 = f1;  f1 = fn;  `suspend()`;
-	for ( ;; ) {
-		fn = f1 + f2;  f2 = f1;  f1 = fn;  `suspend()`;
-	}
-}
-int next( Fib & fib ) with( fib ) {
-	`resume( fib );`
-	return fn;
+int ?()( Fib & fib ) with( fib ) {
+	`resume( fib );`  return fn1;
 }
 int main() {
 	Fib f1, f2;
-	for ( int i = 1; i <= 10; i += 1 ) {
-		sout | next( f1 ) | next( f2 );
-	}
-}
+	for ( 10 ) {
+		sout | f1() | f2();
+}
+
+
 \end{cfa}
 \end{lrbox}
-\newbox\myboxB
-\begin{lrbox}{\myboxB}
-\begin{cfa}[aboveskip=0pt,belowskip=0pt]
-`coroutine` Fib { int ret; };
-void main( Fib & f ) with( fib ) {
-	int fn, f1 = 1, f2 = 0;
-	for ( ;; ) {
-		ret = f2;
-
-		fn = f1 + f2;  f2 = f1;  f1 = fn; `suspend();`
-	}
-}
-int next( Fib & fib ) with( fib ) {
-	`resume( fib );`
-	return ret;
-}
-
-
-
-
-
-
-\end{cfa}
+
+\newbox\myboxC
+\begin{lrbox}{\myboxC}
+\begin{python}[aboveskip=0pt,belowskip=0pt]
+
+def Fib():
+
+    fn1, fn = 0, 1
+    while True:
+        `yield fn1`
+        fn1, fn = fn, fn1 + fn
+
+
+// next prewritten
+
+
+f1 = Fib()
+f2 = Fib()
+for i in range( 10 ):
+	print( next( f1 ), next( f2 ) )
+
+
+
+\end{python}
 \end{lrbox}
-\subfloat[3 States, internal variables]{\label{f:Coroutine3States}\usebox\myboxA}
-\qquad\qquad
-\subfloat[1 State, internal variables]{\label{f:Coroutine1State}\usebox\myboxB}
-\caption{\CFA Coroutine Fibonacci Implementations}
-\label{f:cfa-fibonacci}
+
+\subfloat[C]{\label{f:GlobalVariables}\usebox\myboxA}
+\hspace{3pt}
+\vrule
+\hspace{3pt}
+\subfloat[\CFA]{\label{f:ExternalState}\usebox\myboxB}
+\hspace{3pt}
+\vrule
+\hspace{3pt}
+\subfloat[Python]{\label{f:ExternalState}\usebox\myboxC}
+\caption{Fibonacci Generator}
+\label{f:C-fibonacci}
+
+% \bigskip
+% 
+% \newbox\myboxA
+% \begin{lrbox}{\myboxA}
+% \begin{cfa}[aboveskip=0pt,belowskip=0pt]
+% `coroutine` Fib { int fn; };
+% void main( Fib & fib ) with( fib ) {
+% 	fn = 0;  int fn1 = fn; `suspend()`;
+% 	fn = 1;  int fn2 = fn1;  fn1 = fn; `suspend()`;
+% 	for () {
+% 		fn = fn1 + fn2; fn2 = fn1; fn1 = fn; `suspend()`; }
+% }
+% int next( Fib & fib ) with( fib ) { `resume( fib );` return fn; }
+% int main() {
+% 	Fib f1, f2;
+% 	for ( 10 )
+% 		sout | next( f1 ) | next( f2 );
+% }
+% \end{cfa}
+% \end{lrbox}
+% \newbox\myboxB
+% \begin{lrbox}{\myboxB}
+% \begin{python}[aboveskip=0pt,belowskip=0pt]
+% 
+% def Fibonacci():
+% 	fn = 0;	fn1 = fn; `yield fn`  # suspend
+% 	fn = 1; fn2 = fn1; fn1 = fn; `yield fn`
+% 	while True:
+% 		fn = fn1 + fn2; fn2 = fn1; fn1 = fn; `yield fn`
+% 
+% 
+% f1 = Fibonacci()
+% f2 = Fibonacci()
+% for i in range( 10 ):
+% 	print( `next( f1 )`, `next( f2 )` ) # resume
+% 
+% \end{python}
+% \end{lrbox}
+% \subfloat[\CFA]{\label{f:Coroutine3States}\usebox\myboxA}
+% \qquad
+% \subfloat[Python]{\label{f:Coroutine1State}\usebox\myboxB}
+% \caption{Fibonacci input coroutine, 3 states, internal variables}
+% \label{f:cfa-fibonacci}
 \end{figure}
 
@@ -784,34 +842,27 @@
 \begin{lrbox}{\myboxA}
 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
-`coroutine` Format {
-	char ch;   // used for communication
-	int g, b;  // global because used in destructor
+`coroutine` Fmt {
+	char ch;   // communication variables
+	int g, b;   // needed in destructor
 };
-void main( Format & fmt ) with( fmt ) {
-	for ( ;; ) {
-		for ( g = 0; g < 5; g += 1 ) {      // group
-			for ( b = 0; b < 4; b += 1 ) { // block
+void main( Fmt & fmt ) with( fmt ) {
+	for () {
+		for ( g = 0; g < 5; g += 1 ) { // groups
+			for ( b = 0; b < 4; b += 1 ) { // blocks
 				`suspend();`
-				sout | ch;              // separator
-			}
-			sout | "  ";               // separator
-		}
-		sout | nl;
-	}
-}
-void ?{}( Format & fmt ) { `resume( fmt );` }
-void ^?{}( Format & fmt ) with( fmt ) {
-	if ( g != 0 || b != 0 ) sout | nl;
-}
-void format( Format & fmt ) {
-	`resume( fmt );`
-}
+				sout | ch; } // print character
+			sout | "  "; } // block separator
+		sout | nl; }  // group separator
+}
+void ?{}( Fmt & fmt ) { `resume( fmt );` } // prime
+void ^?{}( Fmt & fmt ) with( fmt ) { // destructor
+	if ( g != 0 || b != 0 )	// special case
+		sout | nl; }
+void send( Fmt & fmt, char c ) { fmt.ch = c; `resume( fmt )`; }
 int main() {
-	Format fmt;
-	eof: for ( ;; ) {
-		sin | fmt.ch;
-	  if ( eof( sin ) ) break eof;
-		format( fmt );
-	}
+	Fmt fmt;
+ 	sout | nlOff;	// turn off auto newline
+	for ( 41 )
+		send( fmt, 'a' );
 }
 \end{cfa}
@@ -820,42 +871,35 @@
 \newbox\myboxB
 \begin{lrbox}{\myboxB}
-\begin{cfa}[aboveskip=0pt,belowskip=0pt]
-struct Format {
-	char ch;
-	int g, b;
-};
-void format( struct Format * fmt ) {
-	if ( fmt->ch != -1 ) {      // not EOF ?
-		printf( "%c", fmt->ch );
-		fmt->b += 1;
-		if ( fmt->b == 4 ) {  // block
-			printf( "  " );      // separator
-			fmt->b = 0;
-			fmt->g += 1;
-		}
-		if ( fmt->g == 5 ) {  // group
-			printf( "\n" );     // separator
-			fmt->g = 0;
-		}
-	} else {
-		if ( fmt->g != 0 || fmt->b != 0 ) printf( "\n" );
-	}
-}
-int main() {
-	struct Format fmt = { 0, 0, 0 };
-	for ( ;; ) {
-		scanf( "%c", &fmt.ch );
-	  if ( feof( stdin ) ) break;
-		format( &fmt );
-	}
-	fmt.ch = -1;
-	format( &fmt );
-}
-\end{cfa}
+\begin{python}[aboveskip=0pt,belowskip=0pt]
+
+
+
+def Fmt():
+	try:
+		while True:
+			for g in range( 5 ):
+				for b in range( 4 ):
+
+					print( `(yield)`, end='' )
+				print( '  ', end='' )
+			print()
+
+
+	except GeneratorExit:
+		if g != 0 | b != 0:
+			print()
+
+
+fmt = Fmt()
+`next( fmt )`			 # prime
+for i in range( 41 ):
+	`fmt.send( 'a' );`	# send to yield
+
+\end{python}
 \end{lrbox}
-\subfloat[\CFA Coroutine]{\label{f:CFAFmt}\usebox\myboxA}
+\subfloat[\CFA]{\label{f:CFAFmt}\usebox\myboxA}
 \qquad
-\subfloat[C Linearized]{\label{f:CFmt}\usebox\myboxB}
-\caption{Formatting text into lines of 5 blocks of 4 characters.}
+\subfloat[Python]{\label{f:CFmt}\usebox\myboxB}
+\caption{Output formatting text}
 \label{f:fmt-line}
 \end{figure}
@@ -878,5 +922,5 @@
 void main( Prod & prod ) with( prod ) {
 	// 1st resume starts here
-	for ( int i = 0; i < N; i += 1 ) {
+	for ( i; N ) {
 		int p1 = random( 100 ), p2 = random( 100 );
 		sout | p1 | " " | p2;
@@ -894,5 +938,5 @@
 }
 void start( Prod & prod, int N, Cons &c ) {
-	&prod.c = &c;
+	&prod.c = &c; // reassignable reference
 	prod.[N, receipt] = [N, 0];
 	`resume( prod );`
@@ -909,8 +953,8 @@
 	Prod & p;
 	int p1, p2, status;
-	_Bool done;
+	bool done;
 };
 void ?{}( Cons & cons, Prod & p ) {
-	&cons.p = &p;
+	&cons.p = &p; // reassignable reference
 	cons.[status, done ] = [0, false];
 }
@@ -969,4 +1013,59 @@
 The program main restarts after the resume in @start@.
 @start@ returns and the program main terminates.
+
+One \emph{killer} application for a coroutine is device drivers, which at one time caused 70\%-85\% of failures in Windows/Linux~\cite{Swift05}.
+Many device drivers are a finite state-machine parsing a protocol, e.g.:
+\begin{tabbing}
+\ldots STX \= \ldots message \ldots \= ESC \= ETX \= \ldots message \ldots  \= ETX \= 2-byte crc \= \ldots	\kill
+\ldots STX \> \ldots message \ldots \> ESC \> ETX \> \ldots message \ldots  \> ETX \> 2-byte crc \> \ldots
+\end{tabbing}
+where a network message begins with the control character STX and ends with an ETX, followed by a 2-byte cyclic-redundancy check.
+Control characters may appear in a message if preceded by an ESC.
+Because FSMs can be complex and occur frequently in important domains, direct support of the coroutine is crucial in a systems programminglanguage.
+
+\begin{figure}
+\begin{cfa}
+enum Status { CONT, MSG, ESTX, ELNTH, ECRC };
+`coroutine` Driver {
+	Status status;
+	char * msg, byte;
+};
+void ?{}( Driver & d, char * m ) { d.msg = m; }		$\C[3.0in]{// constructor}$
+Status next( Driver & d, char b ) with( d ) {		$\C{// 'with' opens scope}$
+	byte = b; `resume( d );` return status;
+}
+void main( Driver & d ) with( d ) {
+	enum { STX = '\002', ESC = '\033', ETX = '\003', MaxMsg = 64 };
+	unsigned short int crc;							$\C{// error checking}$
+  msg: for () {										$\C{// parse message}$
+		status = CONT;
+		unsigned int lnth = 0, sum = 0;
+		while ( byte != STX ) `suspend();`
+	  emsg: for () {
+			`suspend();`							$\C{// process byte}$
+			choose ( byte ) {						$\C{// switch with default break}$
+			  case STX:
+				status = ESTX; `suspend();` continue msg;
+			  case ETX:
+				break emsg;
+			  case ESC:
+				suspend();
+			} // choose
+			if ( lnth >= MaxMsg ) {					$\C{// buffer full ?}$
+				status = ELNTH; `suspend();` continue msg; }
+			msg[lnth++] = byte;
+			sum += byte;
+		} // for
+		msg[lnth] = '\0';							$\C{// terminate string}\CRT$
+		`suspend();`
+		crc = (unsigned char)byte << 8;	// prevent sign extension for signed char
+		`suspend();`
+		status = (crc | (unsigned char)byte) == sum ? MSG : ECRC;
+		`suspend();`
+	} // for
+}
+\end{cfa}
+\caption{Device driver for simple communication protocol}
+\end{figure}
 
 
Index: doc/papers/concurrency/examples/C++Cor-ts.cpp
===================================================================
--- doc/papers/concurrency/examples/C++Cor-ts.cpp	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
+++ doc/papers/concurrency/examples/C++Cor-ts.cpp	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -0,0 +1,42 @@
+
+
+auto result = co_await expression;
+
+//   |  |
+//   |  |
+//  \    /
+//   \  /
+//    \/
+
+auto&& __a = expression;
+if (!__a.await_ready()) {
+	__a.await_suspend(coroutine-handle)
+	// ...suspend/resume point...
+}
+auto result = __a.await_resume();
+
+//==================================================
+
+co_yield i;
+
+//   |  |
+//   |  |
+//  \    /
+//   \  /
+//    \/
+
+co_await __promise.yield_value(i);
+
+//==================================================
+
+... coroutine() {
+	__coroutine_context* __context = new __coroutine_context{};
+	__return = __context->_promise.get_return_object();
+	co_await   __context->_promise.initial_suspend();
+
+	...
+
+__final_suspend_label:
+	co_await __context->promise.final_suspend();
+	delete __context;
+}
Index: doc/papers/concurrency/examples/Fib.c
===================================================================
--- doc/papers/concurrency/examples/Fib.c	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
+++ doc/papers/concurrency/examples/Fib.c	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -0,0 +1,23 @@
+#include <stdio.h>
+
+#define FIB_INIT { 0, 1 }
+typedef struct { int fn1, fn; } Fib;
+int fib( Fib * f ) {
+
+	int ret = f->fn1;
+	f->fn1 = f->fn;
+	f->fn = ret + f->fn;
+
+	return ret;
+}
+int main() {
+	Fib f1 = FIB_INIT, f2 = FIB_INIT;
+	for ( int i = 0; i < 10; i += 1 ) {
+		printf( "%d %d\n", fib( &f1 ), fib( &f2 ) );
+	}
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "gcc Fib.c" //
+// End: //
Index: doc/papers/concurrency/examples/Fib.cfa
===================================================================
--- doc/papers/concurrency/examples/Fib.cfa	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
+++ doc/papers/concurrency/examples/Fib.cfa	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -0,0 +1,72 @@
+#include <thread.hfa>
+#include <fstream.hfa>
+
+int fn1, fn2, state = 1;
+int fib_gvar() {
+	int fn;
+	choose ( state ) {
+	  case 1: fn = 0;  fn1 = fn;  state = 2;
+	  case 2: fn = 1;  fn2 = fn1;  fn1 = fn;  state = 3;
+	  case 3: fn = fn1 + fn2;  fn2 = fn1;  fn1 = fn;
+	}
+	return fn;
+}
+
+#define FIB_INIT { 0, 1 }
+typedef struct { int fn1, fn2; } Fib;
+int fib_state( Fib & f ) with( f ) {
+	int ret = fn2;
+	int fn = fn1 + fn2;
+	fn2 = fn1; fn1 = fn;
+	return ret;
+}
+
+coroutine Fib1 { int fn; };						// used for communication
+void main( Fib1 & fib ) with( fib ) {			// called on first resume
+	fn = 0;  int fn1 = fn; suspend();
+	fn = 1;  int fn2 = fn1;  fn1 = fn; suspend();
+	for () {
+		fn = fn1 + fn2;  fn2 = fn1;  fn1 = fn; suspend();
+	}
+}
+int next( Fib1 & fib ) with( fib ) { resume( fib ); return fn; }
+
+coroutine Fib2 { int ret; };					// used for communication
+void main( Fib2 & fib ) with( fib ) {			// called on first resume
+	int fn, fn1 = 1, fn2 = 0;					// precompute first two states
+	for () {
+		ret = fn2;
+		fn = fn1 + fn2;  fn2 = fn1;  fn1 = fn;	// general case
+		suspend();								// restart last resume
+	}
+}
+int next( Fib2 & fib ) with( fib ) {
+	resume( fib );								// restart last suspend
+	return ret;
+}
+
+int main() {
+	for ( 10 )
+		sout | fib_gvar();
+	sout | nl;
+
+	Fib f1 = FIB_INIT, f2 = FIB_INIT;
+	for ( 10 )
+		sout | fib_state( f1 ) | fib_state( f2 );
+	sout | nl;
+
+	Fib1 f1, f2;
+	for ( 10 )
+		sout | next( f1 ) | next( f2 );
+	sout | nl;
+
+	Fib2 f1, f2;
+	for ( 10 )
+		sout | next( (Fib2 &)f1 ) | next( (Fib2 &)f2 );
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// fill-column: 120 //
+// compile-command: "cfa Fib.cfa" //
+// End: //
Index: doc/papers/concurrency/examples/Fib.cpp
===================================================================
--- doc/papers/concurrency/examples/Fib.cpp	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
+++ doc/papers/concurrency/examples/Fib.cpp	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -0,0 +1,120 @@
+#include <algorithm>
+#include <iostream>
+#include <iterator>
+#include <vector>
+
+#include <experimental/coroutine>
+
+template<typename T>
+struct cor_range {
+	struct promise_type {
+		T _value;
+
+		cor_range get_return_object() {
+			return cor_range(std::experimental::coroutine_handle<promise_type>::from_promise(*this));
+		}
+
+		auto initial_suspend() { return std::experimental::suspend_always(); }
+		auto final_suspend()   { return std::experimental::suspend_always(); }
+
+		void return_value(T value) {
+			_value = value;
+		}
+
+		auto yield_value(T value) {
+			_value = value;
+			return std::experimental::suspend_always();
+		}
+
+		void unhandled_exception() {}
+	};
+
+	std::experimental::coroutine_handle<promise_type> _coroutine = nullptr;
+
+	explicit cor_range(std::experimental::coroutine_handle<promise_type> coroutine)
+		: _coroutine(coroutine)
+	{}
+
+	~cor_range() {
+		if(_coroutine) { _coroutine.destroy(); }
+	}
+
+	cor_range() = default;
+	cor_range(cor_range const &) = delete;
+	cor_range& operator=(cor_range const &) = delete;
+
+	cor_range(cor_range&& other) {
+		std::swap(_coroutine, other._coroutine);
+	}
+
+	cor_range& operator=(cor_range&& other) {
+		if(&other != this) {
+			_coroutine = other._coroutine;
+			other._coroutine = nullptr;
+		}
+		return *this;
+	}
+
+	T next() {
+		_coroutine.resume();
+		return _coroutine.promise()._value;
+	}
+
+	struct iterator : std::iterator<std::input_iterator_tag, T> {
+		std::experimental::coroutine_handle<promise_type> _coroutine = nullptr;
+
+		iterator() = default;
+		explicit iterator(std::experimental::coroutine_handle<promise_type> coroutine)
+			: _coroutine(coroutine)
+		{}
+
+		iterator& operator++() {
+			_coroutine.resume();
+			return *this;
+		}
+
+		T const & operator*() const {
+			return _coroutine.promise()._value;
+		}
+	};
+
+	iterator begin() {
+		if(_coroutine) {
+			_coroutine.resume();
+			if(_coroutine.done()) { return end(); }
+		}
+
+		return iterator{ _coroutine };
+	}
+
+	iterator end() { return iterator{}; }
+};
+
+cor_range<int> fib() {
+	int fn;
+	fn = 0; int fn1 = fn; co_yield fn;
+	fn = 1; int fn2 = fn1; fn1 = fn; co_yield fn;
+	for(;;) {
+		fn = fn1 + fn2; fn2 = fn1; fn1 = fn; co_yield fn;
+	}
+}
+
+int main() {
+	{
+		auto f1 = fib();
+		auto f2 = fib();
+		for(int i = 0; i < 10; i++) {
+			std::cout << f1.next() << " " << f2.next() << std::endl;
+		}
+	}
+
+	{
+		auto f1 = fib();
+		std::vector<int> fibs;
+		std::copy_n(f1.begin(), 10, std::back_inserter(fibs));
+
+		for(auto i : fibs) {
+			std::cout << i << std::endl;
+		}
+	}
+}
Index: doc/papers/concurrency/examples/Fib.py
===================================================================
--- doc/papers/concurrency/examples/Fib.py	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
+++ doc/papers/concurrency/examples/Fib.py	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -0,0 +1,17 @@
+def Fib():
+	fn = 0;	fn1 = fn; yield fn  # suspend
+	fn = 1; fn2 = fn1; fn1 = fn; yield fn
+	while True:
+		fn = fn1 + fn2; fn2 = fn1; fn1 = fn; yield fn
+
+
+
+f1 = Fib()
+f2 = Fib()
+for i in range( 10 ):
+	print( next( f1 ), next( f2 ) )  # resume
+
+# Local Variables: #
+# tab-width: 4 #
+# compile-command: "python3.5 Fib.py" #
+# End: #
Index: doc/papers/concurrency/examples/Fib.sim
===================================================================
--- doc/papers/concurrency/examples/Fib.sim	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
+++ doc/papers/concurrency/examples/Fib.sim	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -0,0 +1,47 @@
+BEGIN
+	CLASS Fibonacci;
+		HIDDEN fn, main;					! private members;
+	BEGIN
+		INTEGER fn;							! communication;
+
+		PROCEDURE main;						! mimic uC++ coroutine main;
+		BEGIN
+			INTEGER fn1, fn2;
+
+			fn := 0; fn1 := fn;
+			Detach;							! suspend();
+			fn := 1; fn2 := fn1; fn1 := fn;
+			Detach;							! suspend();
+			WHILE TRUE DO BEGIN
+				fn := fn1 + fn2; fn2 := fn1; fn1 := fn;
+				Detach;						! suspend();
+			END;
+		END;
+
+		INTEGER PROCEDURE next;
+		BEGIN
+			Call( THIS Fibonacci );			! resume();
+			next := fn;
+		END;
+		! Fibonacci constructor code;
+		Detach;								! return to declaration;
+		main;								! call main as last line of constructor;
+	END Fibonacci;
+	! program main equivalent;
+	REF(Fibonacci) f1, f2;					! objects are references;
+	INTEGER i;
+
+	f1 :- NEW Fibonacci;
+	f2 :- NEW Fibonacci;
+	FOR i := 1 STEP 1 UNTIL 10 DO BEGIN
+		OutInt( f1.next, 3 );
+		OutText( " " );
+		OutInt( f2.next, 3 );
+		OutImage;
+	END
+END;
+
+! Local Variables: ;
+! tab-width: 4 ;
+! compile-command: "cim Fib.sim" ;
+! End: ;
Index: doc/papers/concurrency/examples/Fib2.cfa
===================================================================
--- doc/papers/concurrency/examples/Fib2.cfa	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
+++ doc/papers/concurrency/examples/Fib2.cfa	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -0,0 +1,45 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2018 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// fibonacci_1.cfa -- 1-state finite-state machine: precomputed first two states returning f(n - 1)
+//
+// Author           : Peter A. Buhr
+// Created On       : Thu Apr 26 23:20:08 2018
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Fri Mar 22 17:26:41 2019
+// Update Count     : 28
+// 
+
+#include <fstream.hfa>
+#include <coroutine.hfa>
+
+coroutine Fibonacci { int fn1; };						// used for communication
+
+void main( Fibonacci & fib ) with( fib ) {				// called on first resume
+	int fn;
+	[fn1, fn] = [0, 1];									// precompute first two states
+	for () {
+		suspend();										// restart last resume
+		[fn1, fn] = [fn, fn1 + fn];						// general case
+	} // for
+}
+
+int ?()( Fibonacci & fib ) with( fib ) {				// function call operator
+	resume( fib );										// restart last suspend
+	return fn1;
+}
+
+int main() {
+	Fibonacci f1, f2;
+	for ( 10 ) {										// print N Fibonacci values
+		sout | f1() | f2();
+	} // for
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "cfa fibonacci_1.cfa" //
+// End: //
Index: doc/papers/concurrency/examples/Fib2.cpp
===================================================================
--- doc/papers/concurrency/examples/Fib2.cpp	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
+++ doc/papers/concurrency/examples/Fib2.cpp	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -0,0 +1,121 @@
+#include <algorithm>
+#include <iostream>
+#include <iterator>
+#include <vector>
+
+#include <experimental/coroutine>
+
+template<typename T>
+struct cor_range {
+	struct promise_type {
+		T _value;
+
+		cor_range get_return_object() {
+			return cor_range(std::experimental::coroutine_handle<promise_type>::from_promise(*this));
+		}
+
+		auto initial_suspend() { return std::experimental::suspend_always(); }
+		auto final_suspend()   { return std::experimental::suspend_always(); }
+
+		void return_value(T value) {
+			_value = value;
+		}
+
+		auto yield_value(T value) {
+			_value = value;
+			return std::experimental::suspend_always();
+		}
+
+		void unhandled_exception() {}
+	};
+
+	std::experimental::coroutine_handle<promise_type> _coroutine = nullptr;
+
+	explicit cor_range(std::experimental::coroutine_handle<promise_type> coroutine)
+		: _coroutine(coroutine)
+	{}
+
+	~cor_range() {
+		if(_coroutine) { _coroutine.destroy(); }
+	}
+
+	cor_range() = default;
+	cor_range(cor_range const &) = delete;
+	cor_range& operator=(cor_range const &) = delete;
+
+	cor_range(cor_range&& other) {
+		std::swap(_coroutine, other._coroutine);
+	}
+
+	cor_range& operator=(cor_range&& other) {
+		if(&other != this) {
+			_coroutine = other._coroutine;
+			other._coroutine = nullptr;
+		}
+		return *this;
+	}
+
+	T next() {
+		_coroutine.resume();
+		return _coroutine.promise()._value;
+	}
+
+	struct iterator : std::iterator<std::input_iterator_tag, T> {
+		std::experimental::coroutine_handle<promise_type> _coroutine = nullptr;
+
+		iterator() = default;
+		explicit iterator(std::experimental::coroutine_handle<promise_type> coroutine)
+			: _coroutine(coroutine)
+		{}
+
+		iterator& operator++() {
+			_coroutine.resume();
+			return *this;
+		}
+
+		T const & operator*() const {
+			return _coroutine.promise()._value;
+		}
+	};
+
+	iterator begin() {
+		if(_coroutine) {
+			_coroutine.resume();
+			if(_coroutine.done()) { return end(); }
+		}
+
+		return iterator{ _coroutine };
+	}
+
+	iterator end() { return iterator{}; }
+};
+
+cor_range<int> fib() {
+	int fn1 = 0, fn = 1;
+	for(;;) {
+		co_yield fn1;
+		int temp = fn1;
+		fn1 = fn;
+		fn = temp + fn;
+	}
+}
+
+int main() {
+	{
+		auto f1 = fib();
+		auto f2 = fib();
+		for(int i = 0; i < 10; i++) {
+			std::cout << f1.next() << " " << f2.next() << std::endl;
+		}
+	}
+
+	{
+		auto f1 = fib();
+		std::vector<int> fibs;
+		std::copy_n(f1.begin(), 10, std::back_inserter(fibs));
+
+		for(auto i : fibs) {
+			std::cout << i << std::endl;
+		}
+	}
+}
Index: doc/papers/concurrency/examples/Fib2.py
===================================================================
--- doc/papers/concurrency/examples/Fib2.py	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
+++ doc/papers/concurrency/examples/Fib2.py	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -0,0 +1,15 @@
+def Fib():
+    fn1, fn = 0, 1
+    while True:
+        yield fn1
+        fn1, fn = fn, fn1 + fn
+
+f1 = Fib()
+f2 = Fib()
+for i in range( 10 ):
+	print( next( f1 ), next( f2 ) )  # resume
+
+# Local Variables: #
+# tab-width: 4 #
+# compile-command: "python3.5 Fib2.py" #
+# End: #
Index: doc/papers/concurrency/examples/FibRefactor.py
===================================================================
--- doc/papers/concurrency/examples/FibRefactor.py	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
+++ doc/papers/concurrency/examples/FibRefactor.py	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -0,0 +1,25 @@
+def Fib():
+	def Refactor():
+		nonlocal fn, fn1
+		fn = 0;	fn1 = fn
+		yield fn						# suspend
+
+	def Refactor2():
+		nonlocal fn, fn1, fn2
+		fn = 1; fn2 = fn1; fn1 = fn
+		yield fn						# suspend
+
+	yield from Refactor()
+	yield from Refactor2()
+	while True:
+		fn = fn1 + fn2; fn2 = fn1; fn1 = fn; yield fn
+
+f1 = Fib()
+f2 = Fib()
+for i in range( 10 ):
+	print( next( f1 ), next( f2 ) )		# resume
+
+# Local Variables: #
+# tab-width: 4 #
+# compile-command: "python3.5 FibRefactor.py" #
+# End: #
Index: doc/papers/concurrency/examples/Fmt.sim
===================================================================
--- doc/papers/concurrency/examples/Fmt.sim	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
+++ doc/papers/concurrency/examples/Fmt.sim	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -0,0 +1,46 @@
+BEGIN
+	CLASS FmtLines;
+		HIDDEN ch, main;					! private members;
+	BEGIN
+		CHARACTER ch;						! communication;
+
+		PROCEDURE main;						! mimic uC++ coroutine main;
+		BEGIN
+			INTEGER g, b;
+
+			WHILE TRUE DO BEGIN				! for as many characters;
+				FOR g := 1 STEP 1 UNTIL 5 DO BEGIN		! groups of 5;
+					FOR b := 1 STEP 1 UNTIL 4 DO BEGIN	! blocks of 4;
+						OutChar( ch );
+						Detach;				! suspend();
+					END;
+					OutText( "  " );
+				END;
+				OutImage;					! start newline;
+			END;
+		END;
+
+		PROCEDURE prt( chp );
+			CHARACTER chp;
+		BEGIN
+			ch := chp;						! communication;
+			Call( THIS FmtLines );			! resume();
+		END;
+		! FmtLines constructor code;
+		Detach;								! return to declaration;
+		main;								! call main as last line of constructor;
+	END FmtLines;
+	! program main equivalent;
+	REF(FmtLines) fmt;						! objects are references;
+	INTEGER i;
+
+	fmt :- NEW FmtLines;
+	FOR i := Rank( ' ' ) STEP 1 UNTIL Rank( 'z' ) DO BEGIN
+		fmt.prt( Char( i ) );
+	END
+END;
+
+! Local Variables: ;
+! tab-width: 4 ;
+! compile-command: "cim Fmt.sim" ;
+! End: ;
Index: doc/papers/concurrency/examples/Format.cc
===================================================================
--- doc/papers/concurrency/examples/Format.cc	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
+++ doc/papers/concurrency/examples/Format.cc	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -0,0 +1,34 @@
+_Coroutine Format {
+	char ch; // used for communication
+	int g, b; // global because used in destructor
+	void main() {
+		for ( ;; ) { // for as many characters
+			for ( g = 0; g < 5; g += 1 ) { // groups of 5 blocks
+				for ( b = 0; b < 4; b += 1 ) { // blocks of 4 characters
+//					for ( ;; ) { // for newline characters
+						suspend();
+//						if ( ch != '\n' ) break; // ignore newline
+//					}
+//					cout << ch; // print character
+				}
+//				cout << " "; // print block separator
+			}
+//			cout << endl; // print group separator
+		}
+	}
+  public:
+	Format() { resume(); } // start coroutine
+//	~Format() { if ( g != 0 | | b != 0 ) cout << endl; }
+	void prt( char ch ) { Format::ch = ch; resume(); }
+};
+
+int main() {
+	Format fmt;
+	for ( long int i = 0; i < 1000000000; i += 1 )
+		fmt.prt( 'a' );
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "u++-work -O2 -nodebubg Format.cc" //
+// End: //
Index: doc/papers/concurrency/examples/Format.cfa
===================================================================
--- doc/papers/concurrency/examples/Format.cfa	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
+++ doc/papers/concurrency/examples/Format.cfa	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -0,0 +1,84 @@
+#if 1
+#include <fstream.hfa>
+#include <coroutine.hfa>
+
+coroutine Fmt {
+	char ch;								// communication variables
+	int g, b;								// needed in destructor
+};
+void main( Fmt & fmt ) with( fmt ) {
+	for () {								// for as many characters
+		for ( g = 0; g < 5; g += 1 ) {		// groups of 5 blocks
+			for ( b = 0; b < 4; b += 1 ) {	// blocks of 4 characters
+//				do {
+					suspend();
+//				} while ( ch == '\n' || ch == '\t' );
+				sout | ch;					// print character
+			}
+			sout | "  ";					// block separator
+		}
+		sout | nl;							// group separator
+	}
+}
+void ?{}( Fmt & fmt ) { resume( fmt ); } // prime (start) coroutine
+void ^?{}( Fmt & fmt ) with( fmt ) { // destructor
+	if ( g != 0 || b != 0 )	// special case
+		sout | nl; }
+void send( Fmt & fmt, char c ) { fmt.ch = c; resume( fmt ); }
+int main() {
+	Fmt fmt;
+ 	sout | nlOff;							// turn off auto newline
+	for ( 41 )
+		send( fmt, 'a' );
+  // 	sout | nlOff;							// turn off auto newline
+  // eof: for () {								// read until end of file
+  // 		sin | fmt.ch;						// read one character
+  // 	  if ( eof( sin ) ) break eof;			// eof ?
+  // 		format( fmt );						// push character for formatting
+  // 	}
+}
+#else
+
+#include <stdio.h>
+
+struct Format {
+	char ch;								// used for communication
+	int g, b;								// global because used in destructor
+};
+
+void format( struct Format * fmt ) {
+	if ( fmt->ch != -1 ) { // not EOF ?
+//		if ( fmt->ch == '\n' || fmt->ch == '\t' ) return;
+//		printf( "%c %d %d", fmt->ch, fmt->g, fmt->b );		// character
+		printf( "%c", fmt->ch );		// character
+		fmt->b += 1;
+		if ( fmt->b == 4 ) {	// block ?
+			printf( "  " ); // separator
+			fmt->b = 0;
+			fmt->g += 1;
+		}
+		if ( fmt->g == 5 ) {	// group ?
+			printf( "\n" ); // separator
+			fmt->g = 0;
+		}
+	} else {
+		if ( fmt->g != 0 || fmt->b != 0 ) printf( "\n" );
+	}
+}
+int main() {
+	struct Format fmt = { 0, 0, 0 };
+	for ( ;; ) {
+		scanf( "%c", &fmt.ch );
+	  if ( feof( stdin ) ) break;
+		format( &fmt );
+	}
+	fmt.ch = -1;
+	format( &fmt );
+}
+#endif
+
+// Local Variables: //
+// tab-width: 4 //
+// fill-column: 120 //
+// compile-command: "cfa Format.cfa" //
+// End: //
Index: doc/papers/concurrency/examples/Format.cpp
===================================================================
--- doc/papers/concurrency/examples/Format.cpp	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
+++ doc/papers/concurrency/examples/Format.cpp	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -0,0 +1,96 @@
+#include <iostream>
+#include <experimental/coroutine>
+
+struct fmt_cor {
+	struct promise_type {
+		char _value;
+
+		fmt_cor get_return_object() {
+			return fmt_cor(std::experimental::coroutine_handle<promise_type>::from_promise(*this));
+		}
+
+		auto initial_suspend() { return std::experimental::suspend_never(); }
+		auto final_suspend()   { return std::experimental::suspend_always(); }
+
+		void return_void() {}
+		void unhandled_exception() {}
+	};
+
+	struct get {
+		promise_type * _promise = nullptr;
+
+		bool await_ready() noexcept {
+			return false;
+		}
+
+		void await_suspend(std::experimental::coroutine_handle<promise_type> _coroutine) noexcept {
+			_promise = &_coroutine.promise();
+		}
+		char await_resume() noexcept {
+			assert(_promise);
+			return _promise->_value;
+		}
+	};
+
+	std::experimental::coroutine_handle<promise_type> _coroutine = nullptr;
+
+	explicit fmt_cor(std::experimental::coroutine_handle<promise_type> coroutine)
+		: _coroutine(coroutine)
+	{}
+
+	~fmt_cor() {
+		if(_coroutine) {
+			_coroutine.destroy();
+
+		}
+	}
+
+	fmt_cor() = default;
+	fmt_cor(fmt_cor const &) = delete;
+	fmt_cor& operator=(fmt_cor const &) = delete;
+
+	fmt_cor(fmt_cor&& other) {
+		std::swap(_coroutine, other._coroutine);
+	}
+
+	fmt_cor& operator=(fmt_cor&& other) {
+		if(&other != this) {
+			_coroutine = other._coroutine;
+			other._coroutine = nullptr;
+		}
+		return *this;
+	}
+
+	void send(char a) {
+		_coroutine.promise()._value = a;
+		_coroutine.resume();
+	}
+};
+
+fmt_cor Fmt() {
+	struct locals {
+		int g, b;
+
+		~locals() {
+			if (g != 0 | b != 0) {
+				std::cout << "\n";
+			}
+		}
+	} l;
+	for(;;) {
+		for(l.g = 0; l.g < 5; l.g++) {
+			for(l.b = 0; l.b < 4; l.b++) {
+				std::cout << co_await fmt_cor::get();
+			}
+			std::cout << "  ";
+		}
+		std::cout << std::endl;
+	}
+}
+
+int main() {
+	auto fmt = Fmt();
+	for(int i = 0; i < 41; i++) {
+		fmt.send('a');
+	}
+}
Index: doc/papers/concurrency/examples/Format.py
===================================================================
--- doc/papers/concurrency/examples/Format.py	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
+++ doc/papers/concurrency/examples/Format.py	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -0,0 +1,21 @@
+def Format():
+	try:
+		while True:
+			for g in range( 5 ): 	# groups of 5 blocks
+				for b in range( 4 ): # blocks of 4 characters
+					print( (yield), end='' ) # receive from send
+				print( '  ', end='' ) # block separator
+			print()					# group separator
+	except GeneratorExit:			# destructor
+		if g != 0 | b != 0:			# special case
+			print()
+
+fmt = Format()
+next( fmt )							# prime generator
+for i in range( 41 ):
+	fmt.send( 'a' );				# send to yield
+
+# Local Variables: #
+# tab-width: 4 #
+# compile-command: "python3.5 Format.py" #
+# End: #
Index: doc/papers/concurrency/examples/Pingpong.cc
===================================================================
--- doc/papers/concurrency/examples/Pingpong.cc	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
+++ doc/papers/concurrency/examples/Pingpong.cc	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -0,0 +1,31 @@
+#include <iostream>
+using namespace std;
+
+_Coroutine PingPong {
+	const char * name;
+	const unsigned int N;
+	PingPong * part;
+	void main() { // ping’s starter ::main, pong’s starter ping
+		for ( unsigned int i = 0; i < N; i += 1 ) {
+			cout << name << endl;
+			part->cycle();
+		}
+	}
+  public:
+	PingPong( const char * name, unsigned int N, PingPong & part )
+		: name( name ), N( N ), part( & part ) {}
+	PingPong( const char * name, unsigned int N ) : name( name ), N( N ) {}
+	void partner( PingPong & part ) { PingPong::part = &part; }
+	void cycle() { resume(); }
+};
+int main() {
+	enum { N = 10 };
+	PingPong ping( "ping", N ), pong( "pong", N, ping );
+	ping.partner( pong );
+	ping.cycle();
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "u++-work -O2 -nodebug Pingpong.cc" //
+// End: //
Index: doc/papers/concurrency/examples/Pingpong.cfa
===================================================================
--- doc/papers/concurrency/examples/Pingpong.cfa	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
+++ doc/papers/concurrency/examples/Pingpong.cfa	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -0,0 +1,38 @@
+#include <coroutine.hfa>
+#include <fstream.hfa>
+
+coroutine PingPong {
+	const char * name;
+	unsigned int N;
+	PingPong & part;
+};
+
+void ?{}( PingPong & this, const char * name, unsigned int N, PingPong & part ) {
+	this.[name, N] = [name, N];  &this.part = &part;
+}
+void ?{}( PingPong & this, const char * name, unsigned int N ) {
+	this{ name, N, *0p };								// call first constructor
+}
+void cycle( PingPong & pingpong ) {
+	resume( pingpong );
+}
+void partner( PingPong & this, PingPong & part ) {
+	&this.part = &part;
+	resume( this );
+}
+void main( PingPong & pingpong ) with(pingpong) {		// ping's starter ::main, pong's starter ping
+	for ( N ) {											// N ping-pongs
+		sout | name;
+		cycle( part );
+	} // for
+}
+int main() {
+	enum { N = 5 };
+	PingPong ping = { "ping", N }, pong = { "pong", N, ping };
+	partner( ping, pong );
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "cfa Pingpong.cfa" //
+// End: //
Index: doc/papers/concurrency/examples/Pingpong.py
===================================================================
--- doc/papers/concurrency/examples/Pingpong.py	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
+++ doc/papers/concurrency/examples/Pingpong.py	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -0,0 +1,32 @@
+def PingPong( name, N ):
+	partner = (yield)           # get partner
+	yield                       # resume scheduler
+	for i in range( N ):
+		print( name )
+		yield partner           # execute next
+	print( "end", name )
+
+def Scheduler():
+	n = (yield)                 # starting coroutine
+	while True:
+		n = next( n )           # schedule coroutine
+
+pi = PingPong( "ping", 5 )
+po = PingPong( "pong", 5 )
+next( pi )                      # prime
+pi.send( po )                   # send partner
+next( po )                      # prime
+po.send( pi )                   # send partner
+
+s = Scheduler();
+next( s )                       # prime
+try:
+	s.send( pi )				# start cycle
+except StopIteration:
+	print( "scheduler stop" )
+print( "stop" )
+
+# Local Variables: #
+# tab-width: 4 #
+# compile-command: "python3.5 Pingpong.py" #
+# End: #
Index: doc/papers/concurrency/examples/ProdCons.cfa
===================================================================
--- doc/papers/concurrency/examples/ProdCons.cfa	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
+++ doc/papers/concurrency/examples/ProdCons.cfa	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -0,0 +1,76 @@
+#include <fstream.hfa>
+#include <coroutine.hfa>
+#include <stdlib.hfa>									// random
+#include <unistd.h>										// getpid
+coroutine Cons;											// forward
+int delivery( Cons & cons, int p1, int p2 );
+void stop( Cons & cons );
+
+coroutine Prod {
+	Cons & c;
+	int N, money, receipt;
+};
+void main( Prod & prod ) with( prod ) {					// starter ::main
+	// 1st resume starts here
+	for ( i; N ) {										// N pairs of values
+		int p1 = random( 100 ), p2 = random( 100 );
+		sout | p1 | " " | p2;
+		int status = delivery( c, p1, p2 );
+		sout | " $" | money | nl | status;
+		receipt += 1;
+	}
+	stop( c );
+	sout | "prod stops";
+}
+int payment( Prod & prod, int m ) with(prod) {
+	money = m;
+	resume( prod );										// main 1st time, then
+	return receipt;										// prod in delivery
+}
+void start( Prod & prod, int N, Cons &c ) {
+	&prod.c = &c;
+	prod.[N, receipt] = [N, 0];
+	resume( prod );										// activate main
+}
+coroutine Cons {
+	Prod & p;
+	int p1, p2, status;
+	bool done;
+};
+void ?{}( Cons & cons, Prod & p ) {
+	&cons.p = &p;
+	cons.[status, done ] = [0, false];
+}
+void ^?{}( Cons & cons ) {}
+void main( Cons & cons ) with( cons ) {					// starter prod
+	// 1st resume starts here
+	int money = 1, receipt;
+	for ( ; ! done; ) {
+		sout | p1 | " " | p2 | nl | " $" | money;
+		status += 1;
+		receipt = payment( p, money );
+		sout | " #" | receipt;
+		money += 1;
+	}
+	sout | "cons stops";
+}
+int delivery( Cons & cons, int p1, int p2 ) {
+	cons.[p1, p2] = [p1, p2];
+	resume( cons );										// main 1st time, then
+	return cons.status;									// cons in payment
+}
+void stop( Cons & cons ) {
+	cons.done = true;
+	resume( cons );										// activate payment
+}
+int main() {
+	Prod prod;
+	Cons cons = { prod };
+	srandom( getpid() );
+	start( prod, 5, cons );
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "cfa ProdCons.cfa" //
+// End: //
Index: doc/papers/concurrency/examples/ProdCons.cpp
===================================================================
--- doc/papers/concurrency/examples/ProdCons.cpp	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
+++ doc/papers/concurrency/examples/ProdCons.cpp	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -0,0 +1,244 @@
+#include <cassert>
+#include <cstdlib>
+#include <iostream>
+#include <experimental/coroutine>
+#include <unistd.h>
+
+int random(int max) {
+	return std::rand() % max;
+}
+
+struct Prod;
+struct Cons;
+
+struct resumable {
+	virtual resumable * resume() = 0;
+};
+
+struct Prod : public resumable {
+	struct local {
+		Cons * c;
+		int N, money, receipt;
+	};
+
+	struct promise_type {
+		local _l;
+		resumable * next;
+
+		Prod get_return_object() {
+			return Prod(std::experimental::coroutine_handle<promise_type>::from_promise(*this));
+		}
+
+		auto initial_suspend() { return std::experimental::suspend_never(); }
+		auto final_suspend()   { return std::experimental::suspend_always(); }
+
+		void return_void() {}
+		void unhandled_exception() {}
+	};
+
+	struct data {
+		promise_type * _promise = nullptr;
+		bool await_ready() noexcept { return false; }
+		void await_suspend(std::experimental::coroutine_handle<promise_type> _coroutine) noexcept {
+			_promise = &_coroutine.promise();
+		}
+		local & await_resume() noexcept { assert(_promise); return _promise->_l; }
+	};
+
+	std::experimental::coroutine_handle<promise_type> _coroutine = nullptr;
+
+	explicit Prod(std::experimental::coroutine_handle<promise_type> coroutine)
+		: _coroutine(coroutine)
+	{}
+
+	~Prod() {
+		if(_coroutine) { _coroutine.destroy(); }
+	}
+
+	Prod() = default;
+	Prod(Prod const &) = delete;
+	Prod& operator=(Prod const &) = delete;
+
+	Prod(Prod&& other) {
+		std::swap(_coroutine, other._coroutine);
+	}
+
+	Prod& operator=(Prod&& other) {
+		if(&other != this) {
+			_coroutine = other._coroutine;
+			other._coroutine = nullptr;
+		}
+		return *this;
+	}
+
+	static Prod main();
+
+	struct payment_return;
+
+	payment_return payment(int money);
+
+	auto start(int N, Cons & c) {
+		_coroutine.promise()._l.c = &c;
+		_coroutine.promise()._l.N = N;
+		_coroutine.promise()._l.receipt = 0;
+	}
+
+	virtual resumable * resume() override final {
+		_coroutine.resume();
+		return _coroutine.promise().next;
+	}
+};
+
+struct Cons : public resumable {
+	struct local {
+		Prod * p;
+		int p1, p2, status;
+		bool done;
+	};
+
+	struct promise_type {
+		local _l;
+		resumable * next;
+
+		Cons get_return_object() {
+			return Cons(std::experimental::coroutine_handle<promise_type>::from_promise(*this));
+		}
+
+		auto initial_suspend() { return std::experimental::suspend_never(); }
+		auto final_suspend()   { return std::experimental::suspend_always(); }
+
+		void return_void() {}
+		void unhandled_exception() {}
+	};
+
+	struct data {
+		Prod * _p;
+		data(Prod & prod) : _p(&prod) {}
+		promise_type * _promise = nullptr;
+		bool await_ready() noexcept { return false; }
+		void await_suspend(std::experimental::coroutine_handle<promise_type> _coroutine) noexcept {
+			_promise = &_coroutine.promise();
+		}
+		local & await_resume() noexcept { assert(_promise); _promise->_l.p = _p; return _promise->_l; }
+	};
+
+	std::experimental::coroutine_handle<promise_type> _coroutine = nullptr;
+
+	explicit Cons(std::experimental::coroutine_handle<promise_type> coroutine)
+		: _coroutine(coroutine)
+	{}
+
+	~Cons() {
+		if(_coroutine) { _coroutine.destroy(); }
+	}
+
+	Cons() = default;
+	Cons(Cons const &) = delete;
+	Cons& operator=(Cons const &) = delete;
+
+	Cons(Cons&& other) {
+		std::swap(_coroutine, other._coroutine);
+	}
+
+	Cons& operator=(Cons&& other) {
+		if(&other != this) {
+			_coroutine = other._coroutine;
+			other._coroutine = nullptr;
+		}
+		return *this;
+	}
+
+	static Cons main( Prod & prod );
+
+	auto deliver(int p1, int p2) {
+		_coroutine.promise()._l.p1 = p1;
+		_coroutine.promise()._l.p2 = p2;
+
+		struct ret {
+			int _status;
+			Cons * c;
+			bool await_ready() { return false; }
+			void await_suspend(std::experimental::coroutine_handle<Prod::promise_type> _coroutine) {
+				_coroutine.promise().next = c;
+			}
+			int await_resume() { return _status; }
+		};
+		return ret{ _coroutine.promise()._l.status, this };
+	}
+
+	auto stop() {
+		_coroutine.promise()._l.done = true;
+		struct ret {
+			Cons * c;
+			Prod::promise_type * _promise;
+			bool await_ready() { return false; }
+			void await_suspend(std::experimental::coroutine_handle<Prod::promise_type> _coroutine) {
+				_promise = &_coroutine.promise();
+				_promise->next = c;
+			}
+			void await_resume() {
+				_promise->next = nullptr;
+			}
+		};
+		return ret{this, nullptr};
+	}
+
+	virtual resumable * resume() override final {
+		_coroutine.resume();
+		return _coroutine.promise().next;
+	}
+};
+
+struct Prod::payment_return {
+	int _receipt;
+	Prod * p;
+	bool await_ready() { return false; }
+	void await_suspend(std::experimental::coroutine_handle<Cons::promise_type> _coroutine) {
+		_coroutine.promise().next = p;
+	}
+	int await_resume() { return _receipt; }
+};
+
+Prod::payment_return Prod::payment(int money)  {
+	_coroutine.promise()._l.money = money;
+	return payment_return{ _coroutine.promise()._l.receipt, this };
+}
+
+Prod Prod::main() {
+	auto & p = co_await Prod::data();
+	for(int i = 0; i < p.N; i++) {
+		int p1 = random(100), p2 = random(100);
+		std::cout << p1 << " " << p2 << std::endl;
+		int status = co_await p.c->deliver(p1, p2);
+		std::cout << " $" << p.money << std::endl << status << std::endl;
+		p.receipt += 1;
+	}
+	co_await p.c->stop();
+	std::cout << "prod stops" << std::endl;
+}
+
+Cons Cons::main( Prod & prod ) {
+	auto & c = co_await Cons::data( prod );
+	int money = 1, receipt;
+	for(;!c.done ;) {
+		std::cout << c.p1 << " " << c.p2 << std::endl;
+		std::cout << " $ " << money << std::endl;
+		c.status += 1;
+		receipt = co_await c.p->payment( money );
+		std::cout << " # " << receipt << std::endl;
+		money += 1;
+	}
+	std::cout << "cons stops" << std::endl;
+}
+
+void dispatch(resumable * r) {
+	while((r = r->resume()));
+}
+
+int main() {
+	auto prod = Prod::main();
+	auto cons = Cons::main( prod );
+	srandom( getpid() );
+	prod.start(5, cons);
+	dispatch(&prod);
+}
Index: doc/papers/concurrency/examples/ProdCons.py
===================================================================
--- doc/papers/concurrency/examples/ProdCons.py	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
+++ doc/papers/concurrency/examples/ProdCons.py	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -0,0 +1,40 @@
+def Prod( N ):
+	cons = (yield)              # get cons
+	yield                       # resume scheduler
+	for i in range( N ):
+		print( "prod" )
+		yield cons              # execute next
+	print( "end", "prod" )
+
+def Cons( N ):
+	prod = (yield)              # get prod
+	yield                       # resume scheduler
+	for i in range( N ):
+		print( "cons" )
+		yield prod              # execute next
+	print( "end", "cons" )
+
+def Scheduler():
+	n = (yield)                 # starting coroutine
+	while True:
+		n = next( n )           # schedule coroutine
+
+prod = Prod( 5 )
+cons = Cons( 5 )
+next( prod )                    # prime
+prod.send( cons )               # send cons
+next( cons )                    # prime
+cons.send( prod )               # send prod
+
+s = Scheduler();
+next( s )                       # prime
+try:
+	s.send( prod )				# start cycle
+except StopIteration:
+	print( "scheduler stop" )
+print( "stop" )
+
+# Local Variables: #
+# tab-width: 4 #
+# compile-command: "python3.5 ProdCons.py" #
+# End: #
Index: doc/papers/concurrency/examples/ProdCons.sim
===================================================================
--- doc/papers/concurrency/examples/ProdCons.sim	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
+++ doc/papers/concurrency/examples/ProdCons.sim	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -0,0 +1,104 @@
+BEGIN
+	CLASS Consumer( prod );
+   		REF(Producer) prod; ! constructor parameter;
+		HIDDEN p1, p2, status, done, Main;
+	BEGIN
+		INTEGER p1, p2, status;
+		BOOLEAN done;
+		PROCEDURE main;
+		BEGIN
+			INTEGER money, receipt;
+
+			money := 1;
+			WHILE NOT done DO BEGIN
+				OutText( "cons receives: " );
+				OutInt( p1, 3 );
+				OutText( ", " );
+				OutInt( p2, 3 );
+				status := status + 1;
+				OutText( " and pays $" );
+				OutInt( money, 3 ); OutImage;
+				receipt := prod.payment( money );
+				OutText( "cons receipt #" );
+				OutInt( receipt, 3 ); OutImage;
+				money := money + 1;
+			END;
+			OutText( "cons stops" ); OutImage;
+		END;
+		INTEGER PROCEDURE delivery( p1p, p2p );
+			INTEGER p1p, p2p;
+		BEGIN
+			p1 := p1p;
+			p2 := p2p;
+			Resume( THIS Consumer );
+			delivery := status;
+		END;
+		PROCEDURE stop;
+		BEGIN
+			done := TRUE;
+			Call( THIS Consumer );
+		END;
+		! Consumer constructor code;
+		status := 0;
+		done := FALSE;
+		Detach;
+		main;
+	END Consumer;
+
+	CLASS Producer;
+		HIDDEN cons, N, money, receipt, Main;
+	BEGIN
+		REF(Consumer) cons;
+		INTEGER N, money, receipt;
+		PROCEDURE main;
+		BEGIN
+			INTEGER i, p1, p2, status;
+
+			FOR i := 1 STEP 1 UNTIL N DO BEGIN  
+				p1 := RandInt( 1, 100, p1 );
+				p2 := RandInt( 1, 100, p2 );
+				OutText( "prod delivers: " );
+				OutInt( p1, 3 ); OutText( ", " );
+				OutInt( p2, 3 ); OutImage;
+				status := cons.delivery( p1, p2 );
+				OutText( "prod status: " );
+				OutInt( status, 3 ); OutImage;
+			END;
+			cons.stop;
+			OutText( "prod stops" ); OutImage;
+		END;
+		INTEGER PROCEDURE payment( moneyp );
+			INTEGER moneyp;
+		BEGIN
+			money := moneyp;
+			OutText( "prod payment of $" );
+			OutInt( money, 3 ); OutImage;
+			Resume( THIS Producer );
+			receipt := receipt + 1;
+			payment := receipt;
+		END;
+		PROCEDURE start( Np, consp );
+			INTEGER Np;
+			REF(Consumer) consp;
+		BEGIN
+			N := Np;
+			cons :- consp;
+			Resume( THIS Producer );
+		END;
+		! Producer constructor code;
+		receipt := 0;
+		Detach;
+		main;
+	END Producer;
+	! program main equivalent;
+	REF(Producer) prod;
+	REF(Consumer) cons;
+	prod :- NEW Producer;
+	cons :- NEW Consumer( prod );
+	prod.start( 5, cons );
+END;
+
+! Local Variables: ;
+! tab-width: 4 ;
+! compile-command: "cim ProdCons.sim" ;
+! End: ;
Index: doc/papers/concurrency/examples/Refactor.py
===================================================================
--- doc/papers/concurrency/examples/Refactor.py	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
+++ doc/papers/concurrency/examples/Refactor.py	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -0,0 +1,29 @@
+def Recursion():
+	def Refactor():
+		N = (yield)
+		print( N );
+		if N == 0:
+			yield 0
+		r = Refactor()
+		next( r )
+		x = r.send( N - 1 )
+		print( x );
+		yield x
+
+	N = (yield)
+	print( N );
+	r = Refactor()
+	next( r )
+	print( r.send( N - 1 ) )
+
+c = Recursion()
+next( c )
+try:
+	c.send( 5 )
+except StopIteration:
+	print( "stop" )
+
+# Local Variables: #
+# tab-width: 4 #
+# compile-command: "python3.5 Refactor.py" #
+# End: #
Index: doc/papers/concurrency/examples/counter.cpp
===================================================================
--- doc/papers/concurrency/examples/counter.cpp	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
+++ doc/papers/concurrency/examples/counter.cpp	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -0,0 +1,62 @@
+#include <iostream>
+#include <experimental/coroutine>
+
+struct counter_cor {
+	struct promise_type {
+		counter_cor get_return_object() {
+			return counter_cor(std::experimental::coroutine_handle<promise_type>::from_promise(*this));
+		}
+
+		auto initial_suspend() { return std::experimental::suspend_never(); }
+		auto final_suspend()   { return std::experimental::suspend_never(); }
+
+		void return_void() {}
+
+		void unhandled_exception() {}
+	};
+
+	std::experimental::coroutine_handle<promise_type> _coroutine = nullptr;
+
+	explicit counter_cor(std::experimental::coroutine_handle<promise_type> coroutine)
+		: _coroutine(coroutine)
+	{}
+
+	~counter_cor() {
+		if(_coroutine) { _coroutine.destroy(); }
+	}
+
+	counter_cor() = default;
+	counter_cor(counter_cor const &) = delete;
+	counter_cor& operator=(counter_cor const &) = delete;
+
+	counter_cor(counter_cor&& other) {
+		std::swap(_coroutine, other._coroutine);
+	}
+
+	counter_cor& operator=(counter_cor&& other) {
+		if(&other != this) {
+			_coroutine = other._coroutine;
+			other._coroutine = nullptr;
+		}
+		return *this;
+	}
+
+	void resume() { _coroutine.resume(); }
+};
+
+counter_cor counter() {
+	std::cout << "Counter: called\n";
+	for(unsigned i = 1;; i++) {
+		co_await std::experimental::suspend_always{};
+		std::cout << "Counter: Resumed " << i << " time(s)\n";
+	}
+}
+
+int main() {
+	std::cout << "Main: calling counter\n";
+	auto c = counter();
+	std::cout << "Main: resumes\n";
+	c.resume();
+	c.resume();
+	std::cout << "Main: done\n";
+}
Index: doc/proposals/interned_string.cc
===================================================================
--- doc/proposals/interned_string.cc	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
+++ doc/proposals/interned_string.cc	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -0,0 +1,8 @@
+// Copyright (c) 2015 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in 
+// the file "LICENCE" distributed with this repository.
+
+#include "interned_string.h"
+
+std::unordered_set< std::string > interned_string::canonical;
Index: doc/proposals/interned_string.h
===================================================================
--- doc/proposals/interned_string.h	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
+++ doc/proposals/interned_string.h	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -0,0 +1,57 @@
+#pragma once
+
+// Copyright (c) 2015 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in 
+// the file "LICENCE" distributed with this repository.
+
+#include <functional>
+#include <string>
+#include <unordered_set>
+#include <utility>
+
+/// Keeps canonical copies of a std::string for quicker comparisons
+class interned_string {
+	/// Shared map of canonical string representations
+	static std::unordered_set< std::string > canonical;
+
+	/// Canonical representation of empty string
+	static const std::string* empty_string() {
+		static const std::string* mt = [](){
+			return &*canonical.emplace( "" ).first;
+		}();
+		return mt;
+	}
+
+	/// Canonicalize string
+	template<typename S>
+	static const std::string* intern( S&& s ) {
+		return &*canonical.emplace( std::forward<S>(s) ).first;
+	}
+
+	/// Pointer to stored string
+	const std::string* s;
+	
+public:
+	interned_string() : s{empty_string()} {}
+	interned_string(const char* cs) : s{intern(cs)} {}
+	interned_string(const std::string& ss) : s{intern(ss)} {}
+
+	operator const std::string& () const { return *s; }
+
+	bool operator== (const interned_string& o) const { return s == o.s; }
+	bool operator!= (const interned_string& o) const { return s != o.s; }
+	bool operator< (const interned_string& o) const { return *s < *o.s; }
+};
+
+inline std::ostream& operator<< (std::ostream& out, const interned_string& s) {
+	return out << (const std::string&)s;
+}
+
+namespace std {
+	template<> struct hash<interned_string> {
+		std::size_t operator() (const interned_string& s) const {
+			return std::hash<const std::string*>{}( &(const std::string&)s );
+		}
+	};
+}
Index: doc/proposals/specialized_casts.md
===================================================================
--- doc/proposals/specialized_casts.md	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
+++ doc/proposals/specialized_casts.md	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -0,0 +1,32 @@
+## Specialized Casts ##
+
+There is some use in Cforall for cast operators with semantics other than the standard C cast. To make these alternate casts look like the familiar C cast, this proposal follows the example of the virtual proposal's virtual cast `(virtual Foo)x` and uses an added (pseudo-)keyword inside the cast parens.
+
+### C (Conversion) Cast ###
+The standard C cast performs _conversions_, transformations between types which may make a new object with a different in-memory representation. Cforall maintains these semantics in a backward-compatible way while accounting for name overloading by choosing the lowest-cost interpretation of the argument expression which is convertable to the target type, breaking ties by conversion cost.
+
+The C cast must be maintained for backward-compatibility, and developing a second cast operator with identical semantics seems an undesirable multiplication of language features, but `(convert Foo)` or `(to Foo)` would be reasonable options for a keyword. An alternate semantics for a Cforall-specific conversion cast would be to choose the cast interpretation with the lowest sum of conversion cost and interpretation cost, which aligns better with Cforall function call resolution algorithm.
+
+### Ascription Cast ###
+Using casts in Cforall for type _ascription_ ("select the interpretation of this type") works by the conversion-cost tiebreaker behaviour of the cast operator. However, the ascription interpretation of casts is prioritized less than the conversion interpretation of casts, sometimes resulting in some surprising results, as in the following example:
+
+	int f(int);      // f1
+	int f(double);   // f2
+	int g(int);      // g1
+	double g(long);  // g2
+
+	f((double)42);   // selects f2 by cast on argument
+	(double)g(42);   // does NOT select g2, argument conversion cost results in g1
+
+An ascription cast which reversed the priorities of the C cast would be useful for selecting expressions based on their return type; a reversal of the priorities of the standard C cast would work for this (that is, select the lowest-cost conversion, breaking ties based on argument cost). A plausible stricter semantics would be to select the cheapest interpretation with a zero-cost conversion to the target type, reporting a compiler error otherwise (this semantics would make ascription a solely compile-time phenomenon, rather than relying on possible runtime conversions). A resonable keyword would be `(as Foo)`, which is short, evocative, and echos "ascription"; `(return Foo)` would not introduce new keywords, and speaks to its use in return-type selection, as in the following corrected version of the example above:
+
+	(as double)g(42);  // selects g2, as expected (under either presented ascription semantics)
+
+### Coercion Cast ###
+Some of the explict conversions in C are defined to be a _coercions_ (reinterpret the bits of this value as another type). Use of coercions often relies on non-standard implementation details of the provided environment, and as such is discouraged, but is sometimes necessary. Since all explicit pointer casts in C are coercions, any lvalue `x` in C/Cforall can be coerced with the pattern `*(Foo*)&x`, but this is complex and doesn't extend to rvalues. A dedicated coercion cast would solve these issues; `(reinterpret Foo)` (from C++), `(transmute Foo)` (from Rust), or `(coerce Foo)` would be reasonable keywords.
+
+### Qualifier Cast ###
+A more restricted (and thus safer) form of coercion is modifiying the qualifiers of a type; C++ has `const_cast` for this purpose, and a similar feature would be useful for Cforall. With regard to syntax, `(requalify const Foo)`/`(requalify Foo)` to add/strip `const` would echo C++, but given that the vast majority of uses are stripping const-qualfiers, `(non const)` would be shorter, clearer, easily searchable, and not require the programmer to exactly match the argument type. In this syntax, coercion casts could be used to add qualifiers, or another cast type (say `(with const)`) could be introduced to add qualfiers.
+
+### Virtual Cast ###
+_see virtual.txt; semantics equivalent to C++ dynamic cast_
Index: doc/proposals/vtable.md
===================================================================
--- doc/proposals/vtable.md	(revision 69c37ccfbd57ffcac000650a919c9708faf8c701)
+++ doc/proposals/vtable.md	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -11,4 +11,9 @@
 should be able to store anything that goes into a trait.
 
+I also include notes on a sample implementation, which primarly exists to show
+there is a resonable implementation. The code samples for that are in a slight
+psudo-code to help avoid name mangling and keeps some CFA features while they
+would actually be writen in C.
+
 Trait Instances
 ---------------
@@ -42,12 +47,120 @@
 before.
 
-Internally a trait object is a pair of pointers. One to an underlying object
-and the other to the vtable. All calls on an trait are implemented by looking
-up the matching function pointer and passing the underlying object and the
-remaining arguments to it.
-
-Trait objects can be moved by moving the pointers. Almost all other operations
-require some functions to be implemented on the underlying type. Depending on
-what is in the virtual table a trait type could be a dtype or otype.
+For traits to be used this way they should meet two requirements. First they
+should only have a single polymorphic type and each assertion should use that
+type once as a parameter. Extentions may later loosen these requirements.
+
+If a trait object is used it should generate a series of implicate functions
+each of which implements one of the functions required by the trait. So for
+combiner there is an implicate:
+
+    void combine(trait combiner & this, int);
+
+This function is the one actually called at the end
+
+The main use case for trait objects is that they can be stored. They can be
+passed into functions, but using the trait directly is prefred in this case.
+
+    trait drawable(otype T) {
+        void draw(Surface & to, T & draw);
+        Rect(int) drawArea(T & draw);
+    };
+
+    struct UpdatingSurface {
+        Surface * surface;
+        vector(trait drawable) drawables;
+    };
+
+    void updateSurface(UpdatingSurface & us) {
+        for (size_t i = 0 ; i < us.drawables.size ; ++i) {
+            draw(us.surface, us.drawables[i]);
+        }
+    }
+
+Currently these traits are limited to 1 trait parameter and functions should
+have exactly 1 parameter. We cannot abstract away pairs of types and still
+pass them into normal functions, which take them seperately.
+
+The second is required the because we need to get the vtable from somewhere.
+If there are 0 trait objects than no vtable is avalible, if we have more than
+1 than the vtables give conflicting answers on what underlying function to
+call. And even then the underlying type assumes a concrete type.
+
+This loop can sort of be broken by using the trait object directly in the
+signature. This has well defined meaning, but might not be useful.
+
+    trait example(otype T) {
+        bool test(T & this, trait example & that);
+    }
+
+#### Sample Implementation
+A simple way to implement trait objects is by a pair of pointers. One to the
+underlying object and one to the vtable.
+
+    struct vtable_drawable {
+        void (*draw)(Surface &, void *);
+        Rect(int) (*drawArea)(void *);
+    };
+
+    struct drawable {
+        void * object;
+        vtable_drawable * vtable;
+    };
+
+The functions that run on the trait object would generally be generated using
+the following pattern:
+
+    void draw(Surface & surface, drawable & traitObj) {
+        return traitObj.vtable->draw(surface, traitObj.object);
+    }
+
+There may have to be special cases for things like copy construction, that
+might require a more sigificant wrapper. On the other hand moving could be
+implemented by moving the pointers without any need to refer to the base
+object.
+
+### Extention: Multiple Trait Parameters
+Currently, this gives traits two independent uses. They use the same syntax,
+except for limits boxable traits have, and yet don't really mix. The most
+natural way to do this is to allow trait instances to pick one parameter
+that they are generic over, the others they choose types to implement.
+
+The two ways to do the selection, the first is do it at the trait definition.
+Each trait picks out a single parameter which it can box (here the `virtual`
+qualifier). When you create an instance of a trait object you provide
+arguments like for a generic structure, but skip over the marked parameter.
+
+    trait combiner(virtual otype T, otype Combined) {
+        void combine(T &, Combined &);
+    }
+
+    trait combiner(int) int_combiner;
+
+The second is to do it at the instaniation point. A placeholder (here the
+keyword `virtual`) is used to explicately skip over the parameter that will be
+abstracted away, with the same rules as above if it was the marked parameter.
+
+    trait combiner(otype T, otype Combined) {
+        void combine(T &, Combined &);
+    };
+
+    trait combiner(virtual, int) int_combiner;
+
+Using both (first to set the default, second as a local override) would also
+work, although might be exessively complicated.
+
+This is useful in cases where you want to use a generic type, but leave part
+of it open and store partially generic result. As a simple example
+
+    trait folder(otype T, otype In, otype Out) {
+        void fold(T & this, In);
+        Out fold_result(T & this);
+    }
+
+Which allows you to fold values without putting them in a container. If they
+are already in a container this is exessive, but if they are generated over
+time this gives you a simple interface. This could for instance be used in
+a profile, where T changes for each profiling statistic and you can plug in
+multiple profilers for any run by adding them to an array.
 
 Hierarchy
@@ -90,19 +203,130 @@
 the pointer to it.
 
+Exception Example:
+(Also I'm not sure where I got these casing rules.)
+
+    trait exception(otype T) virtual() {
+        char const * what(T & this);
+    }
+
+    trait io_error(otype T) virtual(exception) {
+        FILE * which_file(T & this);
+    }
+
+    struct eof_error(otype T) virtual(io_error) {
+        FILE * file;
+    }
+
+    char const * what(eof_error &) {
+        return "Tried to read from an empty file.";
+    }
+
+    FILE * which_file(eof_error & this) {
+        return eof_error.file;
+    }
+
+Ast Example:
+
+    trait ast_node(otype T) virtual() {
+        void print(T & this, ostream & out);
+        void visit(T & this, Visitor & visitor);
+        CodeLocation const & get_code_location(T & this);
+    }
+
+    trait expression_node(otype T) virtual(ast_node) {
+        Type eval_type(T const & this);
+    }
+
+    struct operator_expression virtual(expression_node) {
+        enum operator_kind kind;
+        trait expression_node rands[2];
+    }
+
+    trait statement_node(otype T) virtual(ast_node) {
+        vector(Label) & get_labels(T & this);
+    }
+
+    struct goto_statement virtual(statement_node) {
+        vector(Label) labels;
+        Label target;
+    }
+
+    trait declaration_node(otype T) virtual(ast_node) {
+        string name_of(T const & this);
+        Type type_of(T const & this);
+    }
+
+    struct using_declaration virtual(declaration_node) {
+        string new_type;
+        Type old_type;
+    }
+
+    struct variable_declaration virtual(declaration_node) {
+        string name;
+        Type type;
+    }
+
+#### Sample Implementation
+The type id may be as little as:
+
+    struct typeid {
+        struct typeid const * const parent;
+    };
+
+Some linker magic would have to be used to ensure exactly one copy of each
+structure for each type exists in memory. There seem to be spectial once
+sections that support this and it should be easier than generating unique
+ids across compilation units.
+
+The structure could be extended to contain any additional type information.
+
+There are two general designs for vtables with type ids. The first is to put
+the type id at the top of the vtable, this is the most compact and efficient
+solution but only works if we have exactly 1 vtable for each type. The second
+is to put a pointer to the type id in each vtable. This has more overhead but
+allows multiple vtables.
+
+    struct <trait>_vtable {
+        struct typeid const id;
+
+        // Trait dependent list of vtable members.
+    };
+
+    struct <trait>_vtable {
+        struct typeid const * const id;
+
+        // Trait dependent list of vtable members.
+    };
+
+### Virtual Casts
+To convert from a pointer to a type higher on the hierarchy to one lower on
+the hierarchy a check is used to make sure that the underlying type is also
+of that lower type.
+
+The proposed syntax for this is:
+
+    trait SubType * new_value = (virtual trait SubType *)super_type;
+
+It will return the same pointer if it does point to the subtype and null if
+it does not, doing the check and conversion in one operation.
+
 ### Inline vtables
 Since the structures here are usually made to be turned into trait objects
 it might be worth it to have fields on them to store the virtual table
-pointer. This would have to be declared on the trait as an assertion, but if
-it is the trait object could be a single pointer.
-
-It is trivial to do if the field with the virtual table pointer is fixed.
-Otherwise some trickery with pointing to the field and storing the offset in
-the virtual table to recover the main object would have to be used.
+pointer. This would have to be declared on the trait as an assertion (example:
+`vtable;` or `T.vtable;`), but if it is the trait object could be a single
+pointer.
+
+There are also three options for where the pointer to the vtable. It could be
+anywhere, a fixed location for each trait or always at the front. For the per-
+trait solution an extention to specify what it is (example `vtable[0];`) which
+could also be used to combine it with others. So these options can be combined
+to allow access to all three options.
 
 ### Virtual Tables as Types
-Here we consider encoding plus the implementation of functions on it. Which
-is to say in the type hierarchy structures aren't concrete types anymore,
-instead they are parent types to vtables, which combine the encoding and
-implementation.
+Here we consider encoding plus the implementation of functions on it to be a
+type. Which is to say in the type hierarchy structures aren't concrete types
+anymore, instead they are parent types to vtables, which combine the encoding
+and implementation.
 
 Resolution Scope
@@ -123,6 +347,18 @@
 other.
 
-Some syntax would have to be added. All resolutions can be found at compile
-time and a single vtable created for each type at compilation time.
+Some syntax would have to be added to specify the resolution point. To ensure
+a single instance there may have to be two variants, one forward declaration
+and one to create the instance. With some compiler magic the forward
+declaration maybe enough.
+
+    extern trait combiner(struct summation) vtable;
+    trait combiner(struct summation) vtable;
+
+Or (with the same variants):
+
+    vtable combiner(struct summation);
+
+The extern variant promises that the vtable will exist while the normal one
+is where the resolution actually happens.
 
 ### Explicit Resolution Points:
@@ -141,4 +377,26 @@
 vtable.
 
+    extern trait combiner(struct summation) vtable sum;
+    trait combiner(struct summation) vtable sum;
+
+    extern trait combiner(struct summation) vtable sum default;
+    trait combiner(struct summation) vtable sum default;
+
+The extern difference is the same before. The name (sum in the samples) is
+used at the binding site to say which one is picked. The default keyword can
+be used in only some of the declarations.
+
+    trait combiner fee = (summation_instance, sum);
+    trait combiner foe = summation_instance;
+
+(I am not really happy about this syntax, but it kind of works.)
+The object being bound is required. The name of the vtable is optional if
+there is exactly one vtable name marked with default.
+
+These could also be placed inside functions. In which case both the name and
+the default keyword might be optional. If the name is ommited in an assignment
+the closest vtable is choosen (returning to the global default rule if no
+approprate local vtable is in scope).
+
 ### Site Based Resolution:
 Every place in code where the binding of a vtable to an object occurs has
Index: doc/user/user.tex
===================================================================
--- doc/user/user.tex	(revision 69c37ccfbd57ffcac000650a919c9708faf8c701)
+++ doc/user/user.tex	(revision f343c6b9c8b5f408f8025426215c2bc3b3ba5ede)
@@ -11,6 +11,6 @@
 %% Created On       : Wed Apr  6 14:53:29 2016
 %% Last Modified By : Peter A. Buhr
-%% Last Modified On : Tue Dec 11 23:19:26 2018
-%% Update Count     : 3400
+%% Last Modified On : Sun Apr 14 11:02:34 2019
+%% Update Count     : 3443
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
@@ -508,16 +508,22 @@
 
 As for \Index{division}, there are exponentiation operators for integral and floating types, including the builtin \Index{complex} types.
-Unsigned integral exponentiation\index{exponentiation!unsigned integral} is performed with repeated multiplication\footnote{The multiplication computation is $O(\log y)$.} (or shifting if the base is 2).
-Signed integral exponentiation\index{exponentiation!signed integral} is performed with repeated multiplication (or shifting if the base is 2), but yields a floating result because $x^{-y}=1/x^y$.
-Hence, it is important to designate exponent integral-constants as unsigned or signed: ©3 \ 3u© return an integral result, while ©3 \ 3© returns a floating result.
-Floating exponentiation\index{exponentiation!floating} is performed using \Index{logarithm}s\index{exponentiation!logarithm}, so the base cannot be negative.
-\begin{cfa}
-sout | 2 ®\® 8u | 4 ®\® 3u | -4 ®\® 3u | 4 ®\® -3 | -4 ®\® -3 | 4.0 ®\® 2.1 | (1.0f+2.0fi) ®\® (3.0f+2.0fi);
-256 64 -64 0.015625 -0.015625 18.3791736799526 0.264715-1.1922i
-\end{cfa}
+Integral exponentiation\index{exponentiation!unsigned integral} is performed with repeated multiplication\footnote{The multiplication computation is $O(\log y)$.} (or shifting if the exponent is 2).
+Overflow from large exponents or negative exponents return zero.
+Floating exponentiation\index{exponentiation!floating} is performed using \Index{logarithm}s\index{exponentiation!logarithm}, so the exponent cannot be negative.
+\begin{cfa}
+sout | 1 ®\® 0 | 1 ®\® 1 | 2 ®\® 8 | -4 ®\® 3 | 5 ®\® 3 | 5 ®\® 32 | 5L ®\® 32 | 5L ®\® 64 | -4 ®\® -3 | -4.0 ®\® -3 | 4.0 ®\® 2.1
+	   | (1.0f+2.0fi) ®\® (3.0f+2.0fi);
+1 1 256 -64 125 ®0® 3273344365508751233 ®0® ®0® -0.015625 18.3791736799526 0.264715-1.1922i
+\end{cfa}
+Note, ©5 ®\® 32© and ©5L ®\® 64© overflow, and ©-4 ®\® -3© is a fraction but stored in an integer so all three computations generate an integral zero.
 Parenthesis are necessary for complex constants or the expression is parsed as ©1.0f+®(®2.0fi \ 3.0f®)®+2.0fi©.
-The exponentiation operator is available for all the basic types, but for user-defined types, only the integral-computation versions are available.
-For returning an integral value, the user type ©T© must define multiplication, ©*©, and one, ©1©;
-for returning a floating value, an additional divide of type ©T© into a ©double© returning a ©double© (©double ?/?( double, T )©) is necessary for negative exponents.
+The exponentiation operator is available for all the basic types, but for user-defined types, only the integral-computation version is available.
+\begin{cfa}
+forall( otype OT | { void ?{}( OT & this, one_t ); OT ?*?( OT, OT ); } )
+OT ?®\®?( OT ep, unsigned int y );
+forall( otype OT | { void ?{}( OT & this, one_t ); OT ?*?( OT, OT ); } )
+OT ?®\®?( OT ep, unsigned long int y );
+\end{cfa}
+The user type ©T© must define multiplication, one, ©1©, and, ©*©.
 
 
@@ -549,72 +555,90 @@
 \subsection{Loop Control}
 
-The ©for©/©while©/©do-while© loop-control allows empty or simplified ranges.
+The ©for©/©while©/©do-while© loop-control allows empty or simplified ranges (see Figure~\ref{f:LoopControlExamples}).
+\begin{itemize}
+\item
 An empty conditional implies ©1©.
-The up-to range ©~©\index{~@©~©} means exclusive range [M,N);
-the up-to range ©~=©\index{~=@©~=©} means inclusive range [M,N].
-The down-to range ©-~©\index{-~@©-~©} means exclusive range [N,M);
-the down-to range ©-~=©\index{-~=@©-~=©} means inclusive range [N,M].
+\item
+The up-to range ©~©\index{~@©~©} means exclusive range [M,N).
+\item
+The up-to range ©~=©\index{~=@©~=©} means inclusive range [M,N].
+\item
+The down-to range ©-~©\index{-~@©-~©} means exclusive range [N,M).
+\item
+The down-to range ©-~=©\index{-~=@©-~=©} means inclusive range [N,M].
+\item
+©@© means put nothing in this field.
+\item
 ©0© is the implicit start value;
+\item
 ©1© is the implicit increment value.
+\item
 The up-to range uses ©+=© for increment;
-the down-to range uses ©-=© for decrement.
+\item
+The down-to range uses ©-=© for decrement.
+\item
 The loop index is polymorphic in the type of the start value or comparison value when start is implicitly ©0©.
+\end{itemize}
+
+\begin{figure}
 \begin{cquote}
-\begin{tabular}{@{}ll|l@{}}
-\multicolumn{2}{c|}{loop control} & \multicolumn{1}{c}{output} \\
+\begin{tabular}{@{}l|l@{}}
+\multicolumn{1}{c|}{loop control} & \multicolumn{1}{c}{output} \\
 \hline
 \begin{cfa}
-while ®()® { sout | "empty"; break; }
-do { sout | "empty"; break; } while ®()®;
-for ®()® { sout | "empty"; break; }
-for ( ®0® ) { sout | "A"; }
-for ( ®1® ) { sout | "A"; }
-for ( ®10® ) { sout | "A"; }
-for ( ®1 ~= 10 ~ 2® ) { sout | "B"; }
-for ( ®10 -~= 1 ~ 2® ) { sout | "C"; }
-for ( ®0.5 ~ 5.5® ) { sout | "D"; }
-for ( ®5.5 -~ 0.5® ) { sout | "E"; }
-for ( ®i; 10® ) { sout | i; }
-for ( ®i; 1 ~= 10 ~ 2® ) { sout | i; }
-for ( ®i; 10 -~= 1 ~ 2® ) { sout | i; }
-for ( ®i; 0.5 ~ 5.5® ) { sout | i; }
-for ( ®i; 5.5 -~ 0.5® ) { sout | i; }
-for ( ®ui; 2u ~= 10u ~ 2u® ) { sout | ui; }
-for ( ®ui; 10u -~= 2u ~ 2u® ) { sout | ui; }
+sout | nlOff;
+while ®()® { sout | "empty"; break; } sout | nl;
+do { sout | "empty"; break; } while ®()®; sout | nl;
+for ®()® { sout | "empty"; break; } sout | nl;
+for ( ®0® ) { sout | "A"; } sout | "zero" | nl;
+for ( ®1® ) { sout | "A"; } sout | nl;
+for ( ®10® ) { sout | "A"; } sout | nl;
+for ( ®1 ~= 10 ~ 2® ) { sout | "B"; } sout | nl;
+for ( ®10 -~= 1 ~ 2® ) { sout | "C"; } sout | nl;
+for ( ®0.5 ~ 5.5® ) { sout | "D"; } sout | nl;
+for ( ®5.5 -~ 0.5® ) { sout | "E"; } sout | nl;
+for ( ®i; 10® ) { sout | i; } sout | nl;
+for ( ®i; 1 ~= 10 ~ 2® ) { sout | i; } sout | nl;
+for ( ®i; 10 -~= 1 ~ 2® ) { sout | i; } sout | nl;
+for ( ®i; 0.5 ~ 5.5® ) { sout | i; } sout | nl;
+for ( ®i; 5.5 -~ 0.5® ) { sout | i; } sout | nl;
+for ( ®ui; 2u ~= 10u ~ 2u® ) { sout | ui; } sout | nl;
+for ( ®ui; 10u -~= 2u ~ 2u® ) { sout | ui; } sout | nl;
 enum { N = 10 };
-for ( ®N® ) { sout | "N"; }
-for ( ®i; N® ) { sout | i; }
-for ( ®i; N -~ 0® ) { sout | i; }
+for ( ®N® ) { sout | "N"; } sout | nl;
+for ( ®i; N® ) { sout | i; } sout | nl;
+for ( ®i; N -~ 0® ) { sout | i; } sout | nl;
 const int start = 3, comp = 10, inc = 2;
-for ( ®i; start ~ comp ~ inc + 1® ) { sout | i; }
+for ( ®i; start ~ comp ~ inc + 1® ) { sout | i; } sout | nl;
+for ( ®i; 1 ~ @® ) { if ( i > 10 ) break;
+	sout | i; } sout | nl;
+for ( ®i; 10 -~ @® ) { if ( i < 0 ) break;
+	sout | i; } sout | nl;
+for ( ®i; 2 ~ @ ~ 2® ) { if ( i > 10 ) break;
+	sout | i; } sout | nl;
+for ( ®i; 2.1 ~ @ ~ @® ) { if ( i > 10.5 ) break;
+	sout | i; i += 1.7; } sout | nl;
+for ( ®i; 10 -~ @ ~ 2® ) { if ( i < 0 ) break;
+	sout | i; } sout | nl;
+for ( ®i; 12.1 ~ @ ~ @® ) { if ( i < 2.5 ) break;
+	sout | i; i -= 1.7; } sout | nl;
+for ( ®i; 5 : j; -5 ~ @® ) { sout | i | j; } sout | nl;
+for ( ®i; 5 : j; -5 -~ @® ) { sout | i | j; } sout | nl;
+for ( ®i; 5 : j; -5 ~ @ ~ 2® ) { sout | i | j; } sout | nl;
+for ( ®i; 5 : j; -5 -~ @ ~ 2® ) { sout | i | j; } sout | nl;
+for ( ®j; -5 ~ @ : i; 5® ) { sout | i | j; } sout | nl;
+for ( ®j; -5 -~ @ : i; 5® ) { sout | i | j; } sout | nl;
+for ( ®j; -5 ~ @ ~ 2 : i; 5® ) { sout | i | j; } sout | nl;
+for ( ®j; -5 -~ @ ~ 2 : i; 5® ) { sout | i | j; } sout | nl;
+for ( ®j; -5 -~ @ ~ 2 : i; 5 : k; 1.5 ~ @® ) {
+	sout | i | j | k; } sout | nl;
+for ( ®j; -5 -~ @ ~ 2 : k; 1.5 ~ @ : i; 5® ) {
+	sout | i | j | k; } sout | nl;
+for ( ®k; 1.5 ~ @ : j; -5 -~ @ ~ 2 : i; 5® ) {
+	sout | i | j | k; } sout | nl;
 \end{cfa}
 &
 \begin{cfa}
-sout | nl;
-sout | nl;
-sout | nl;
-sout | "zero" | nl;
-sout | nl;
-sout | nl;
-sout | nl;
-sout | nl;
-sout | nl;
-sout | nl;
-sout | nl;
-sout | nl;
-sout | nl;
-sout | nl;
-sout | nl;
-sout | nl;
-sout | nl | nl;
-
-sout | nl;
-sout | nl;
-sout | nl | nl;
-
-sout | nl;
-\end{cfa}
-&
-\begin{cfa}
+
 empty
 empty
@@ -640,7 +664,36 @@
 
 3 6 9
+
+1 2 3 4 5 6 7 8 9 10
+
+10 9 8 7 6 5 4 3 2 1 0
+
+2 4 6 8 10
+
+2.1 3.8 5.5 7.2 8.9
+
+10 8 6 4 2 0
+
+12.1 10.4 8.7 7 5.3 3.6
+0 -5 1 -4 2 -3 3 -2 4 -1
+0 -5 1 -6 2 -7 3 -8 4 -9
+0 -5 1 -3 2 -1 3 1 4 3
+0 -5 1 -7 2 -9 3 -11 4 -13
+0 -5 1 -4 2 -3 3 -2 4 -1
+0 -5 1 -6 2 -7 3 -8 4 -9
+0 -5 1 -3 2 -1 3 1 4 3
+0 -5 1 -7 2 -9 3 -11 4 -13
+
+0 -5 1.5 1 -7 2.5 2 -9 3.5 3 -11 4.5 4 -13 5.5
+
+0 -5 1.5 1 -7 2.5 2 -9 3.5 3 -11 4.5 4 -13 5.5
+
+0 -5 1.5 1 -7 2.5 2 -9 3.5 3 -11 4.5 4 -13 5.5
 \end{cfa}
 \end{tabular}
 \end{cquote}
+\caption{Loop Control Examples}
+\label{f:LoopControlExamples}
+\end{figure}
 
 
@@ -1320,5 +1373,8 @@
 \end{cfa}
 Essentially, the return type is wrapped around the routine name in successive layers (like an \Index{onion}).
-While attempting to make the two contexts consistent is a laudable goal, it has not worked out in practice.
+While attempting to make the two contexts consistent is a laudable goal, it has not worked out in practice, even though Dennis Richie believed otherwise:
+\begin{quote}
+In spite of its difficulties, I believe that the C's approach to declarations remains plausible, and am comfortable with it; it is a useful unifying principle.~\cite[p.~12]{Ritchie93}
+\end{quote}
 
 \CFA provides its own type, variable and routine declarations, using a different syntax.
