Index: benchmark/Makefile.am
===================================================================
--- benchmark/Makefile.am	(revision 35a408b7afe5994cf2251ff7078ad4636f315997)
+++ benchmark/Makefile.am	(revision b0ab7853291fa2ff5960f20cb326515dcc4c6655)
@@ -11,6 +11,6 @@
 ## Created On       : Sun May 31 09:08:15 2015
 ## Last Modified By : Peter A. Buhr
-## Last Modified On : Sun Jun 23 12:34:29 2019
-## Update Count     : 52
+## Last Modified On : Mon Jun 24 16:45:42 2019
+## Update Count     : 53
 ###############################################################################
 
@@ -31,5 +31,4 @@
 BENCH_V_JAVAC = $(__bench_v_JAVAC_$(__quiet))
 BENCH_V_UPP = $(__bench_v_UPP_$(__quiet))
-BENCH_V_QTHREAD = $(__bench_v_QTHREAD_$(__quiet))
 
 __quiet = verbose
@@ -46,5 +45,4 @@
 __bench_v_JAVAC_verbose = $(AM_V_JAVAC)
 __bench_v_UPP_verbose = $(AM_V_UPP)
-__bench_v_QTHREAD_verbose = $(AM_V_CC)
 
 
@@ -176,6 +174,5 @@
 	ctxswitch-upp_thread.run	\
 	ctxswitch-goroutine.run		\
-	ctxswitch-java_thread.run	\
-	ctxswitch-qthreads.run
+	ctxswitch-java_thread.run
 
 
@@ -224,7 +221,4 @@
 	@echo "java JavaThread" >> a.out
 	@chmod a+x a.out
-
-ctxswitch-qthreads$(EXEEXT):
-	$(BENCH_V_QTHREADS)$(COMPILE) -DBENCH_N=50000000 -I/u/pabuhr/software/qthreads/include -L/u/pabuhr/software/qthreads/lib -Xlinker -R/u/pabuhr/software/qthreads/lib $(srcdir)/ctxswitch/qthreads.c -lqthread
 
 ## =========================================================================================================
@@ -320,6 +314,5 @@
 	creation-upp_thread.run			\
 	creation-goroutine.run			\
-	creation-java_thread.run		\
-	creation-qthreads.run
+	creation-java_thread.run
 
 creation-cfa_coroutine$(EXEEXT):
@@ -349,7 +342,4 @@
 	@echo "java JavaThread" >> a.out
 	@chmod a+x a.out
-
-creation-qthreads$(EXEEXT):
-	$(BENCH_V_QTHREADS)$(COMPILE) -DBENCH_N=50000000 -I/u/pabuhr/software/qthreads/include -L/u/pabuhr/software/qthreads/lib -Xlinker -R/u/pabuhr/software/qthreads/lib $(srcdir)/ctxswitch/qthreads.c -lqthread
 
 ## =========================================================================================================
Index: nchmark/creation/qthreads.c
===================================================================
--- benchmark/creation/qthreads.c	(revision 35a408b7afe5994cf2251ff7078ad4636f315997)
+++ 	(revision )
@@ -1,34 +1,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <unistd.h>
-#include <qthread.h>
-
-#include "bench.h"
-
-static aligned_t greeter( void * arg ) {
-	return 0;
-}
-
-int main( int argc, char *argv[] ) {
-	aligned_t return_value = 0;
-	int status;
-
-	status = qthread_init( 1 );
-	assert(status == QTHREAD_SUCCESS);
-
-	BENCH(
-		for ( size_t i = 0; i < n; i += 1 ) {
-			qthread_fork( greeter, NULL, &return_value );
-			qthread_readFF( NULL, &return_value );
-		}, result
-	)
-	printf( "%g\n", result );
-
-	return EXIT_SUCCESS;
-}
-
-// Local Variables: //
-// tab-width: 4 //
-// compile-command: "gcc -g -O2 -Wall -I.. -I/u/pabuhr/software/qthreads/include -L/u/pabuhr/software/qthreads/lib -Xlinker -R/u/pabuhr/software/qthreads/lib qthreads.c -lqthread" //
-// End: //
Index: nchmark/ctxswitch/qthreads.c
===================================================================
--- benchmark/ctxswitch/qthreads.c	(revision 35a408b7afe5994cf2251ff7078ad4636f315997)
+++ 	(revision )
@@ -1,45 +1,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <unistd.h>
-#include <qthread.h>
-
-#include "bench.h"
-
-int argc;
-char **argv;
-
-static aligned_t greeter( __attribute__((unused)) void * arg ) {
-	BENCH(
-		for ( size_t i = 0; i < n; i += 1 ) {
-			qthread_yield();
-		},
-		result
-	)
-	printf( "%g\n", result );
-	return 0;
-}
-
-int main( int margc, char *margv[] ) {
-	argc = margc;
-	argv = margv;
-
-	aligned_t return_value = 0;
-	int status;
-
-	status = qthread_init( 1 );
-	assert(status == QTHREAD_SUCCESS);
-
-	status = qthread_fork( greeter, NULL, &return_value );
-	assert(status == QTHREAD_SUCCESS);
-
-	int ret = qthread_readFF( NULL, &return_value );
-	assert(ret == QTHREAD_SUCCESS);
-
-	return EXIT_SUCCESS;
-}
-
-// Local Variables: //
-// tab-width: 4 //
-// compile-command: "gcc -g -O2 -Wall -I.. -I/u/pabuhr/software/qthreads/include -L/u/pabuhr/software/qthreads/lib -Xlinker -R/u/pabuhr/software/qthreads/lib qthreads.c -lqthread" //
-// End: //
Index: doc/bibliography/pl.bib
===================================================================
--- doc/bibliography/pl.bib	(revision 35a408b7afe5994cf2251ff7078ad4636f315997)
+++ doc/bibliography/pl.bib	(revision b0ab7853291fa2ff5960f20cb326515dcc4c6655)
@@ -954,5 +954,5 @@
     key		= {Cforall Benchmarks},
     author	= {{\textsf{C}{$\mathbf{\forall}$} Benchmarks}},
-    howpublished= {\href{https://plg.uwaterloo.ca/~cforall/benchmarks}{https://\-plg.uwaterloo.ca/\-$\sim$cforall/\-benchmarks}},
+    howpublished= {\href{https://plg.uwaterloo.ca/~cforall/benchmark.tar}{https://\-plg.uwaterloo.ca/\-$\sim$cforall/\-benchmark.tar}},
 }
 
Index: doc/papers/concurrency/Paper.tex
===================================================================
--- doc/papers/concurrency/Paper.tex	(revision 35a408b7afe5994cf2251ff7078ad4636f315997)
+++ doc/papers/concurrency/Paper.tex	(revision b0ab7853291fa2ff5960f20cb326515dcc4c6655)
@@ -316,7 +316,7 @@
 Finally, performant user-threading implementations (both time and space) meet or exceed direct kernel-threading implementations, while achieving the programming advantages of high concurrency levels and safety.
 
-A further effort over the past two decades is the development of language memory models to deal with the conflict between language features and compiler/hardware optimizations, \ie, some language features are unsafe in the presence of aggressive sequential optimizations~\cite{Buhr95a,Boehm05}.
+A further effort over the past two decades is the development of language memory models to deal with the conflict between language features and compiler/hardware optimizations, \ie some language features are unsafe in the presence of aggressive sequential optimizations~\cite{Buhr95a,Boehm05}.
 The consequence is that a language must provide sufficient tools to program around safety issues, as inline and library code is all sequential to the compiler.
-One solution is low-level qualifiers and functions (\eg, @volatile@ and atomics) allowing \emph{programmers} to explicitly write safe (race-free~\cite{Boehm12}) programs.
+One solution is low-level qualifiers and functions (\eg @volatile@ and atomics) allowing \emph{programmers} to explicitly write safe (race-free~\cite{Boehm12}) programs.
 A safer solution is high-level language constructs so the \emph{compiler} knows the optimization boundaries, and hence, provides implicit safety.
 This problem is best known with respect to concurrency, but applies to other complex control-flow, like exceptions\footnote{
@@ -324,8 +324,8 @@
 The key feature that dovetails with this paper is nonlocal exceptions allowing exceptions to be raised across stacks, with synchronous exceptions raised among coroutines and asynchronous exceptions raised among threads, similar to that in \uC~\cite[\S~5]{uC++}
 } and coroutines.
-Finally, language solutions allow matching constructs with language paradigm, \ie, imperative and functional languages often have different presentations of the same concept to fit their programming model.
+Finally, language solutions allow matching constructs with language paradigm, \ie imperative and functional languages often have different presentations of the same concept to fit their programming model.
 
 Finally, it is important for a language to provide safety over performance \emph{as the default}, allowing careful reduction of safety for performance when necessary.
-Two concurrency violations of this philosophy are \emph{spurious wakeup} (random wakeup~\cite[\S~8]{Buhr05a}) and \emph{barging} (signals-as-hints~\cite[\S~8]{Buhr05a}), where one is a consequence of the other, \ie, once there is spurious wakeup, signals-as-hints follow.
+Two concurrency violations of this philosophy are \emph{spurious wakeup} (random wakeup~\cite[\S~8]{Buhr05a}) and \emph{barging} (signals-as-hints~\cite[\S~8]{Buhr05a}), where one is a consequence of the other, \ie once there is spurious wakeup, signals-as-hints follow.
 However, spurious wakeup is \emph{not} a foundational concurrency property~\cite[\S~8]{Buhr05a}, it is a performance design choice.
 Similarly, signals-as-hints are often a performance decision.
@@ -337,5 +337,5 @@
 Most augmented traditional (Fortran 18~\cite{Fortran18}, Cobol 14~\cite{Cobol14}, Ada 12~\cite{Ada12}, Java 11~\cite{Java11}) and new languages (Go~\cite{Go}, Rust~\cite{Rust}, and D~\cite{D}), except \CC, diverge from C with different syntax and semantics, only interoperate indirectly with C, and are not systems languages, for those with managed memory.
 As a result, there is a significant learning curve to move to these languages, and C legacy-code must be rewritten.
-While \CC, like \CFA, takes an evolutionary approach to extend C, \CC's constantly growing complex and interdependent features-set (\eg, objects, inheritance, templates, etc.) mean idiomatic \CC code is difficult to use from C, and C programmers must expend significant effort learning \CC.
+While \CC, like \CFA, takes an evolutionary approach to extend C, \CC's constantly growing complex and interdependent features-set (\eg objects, inheritance, templates, etc.) mean idiomatic \CC code is difficult to use from C, and C programmers must expend significant effort learning \CC.
 Hence, rewriting and retraining costs for these languages, even \CC, are prohibitive for companies with a large C software-base.
 \CFA with its orthogonal feature-set, its high-performance runtime, and direct access to all existing C libraries circumvents these problems.
@@ -367,14 +367,14 @@
 \section{Stateful Function}
 
-The stateful function is an old idea~\cite{Conway63,Marlin80} that is new again~\cite{C++20Coroutine19}, where execution is temporarily suspended and later resumed, \eg, plugin, device driver, finite-state machine.
+The stateful function is an old idea~\cite{Conway63,Marlin80} that is new again~\cite{C++20Coroutine19}, where execution is temporarily suspended and later resumed, \eg plugin, device driver, finite-state machine.
 Hence, a stateful function may not end when it returns to its caller, allowing it to be restarted with the data and execution location present at the point of suspension.
 This capability is accomplished by retaining a data/execution \emph{closure} between invocations.
-If the closure is fixed size, we call it a \emph{generator} (or \emph{stackless}), and its control flow is restricted, \eg, suspending outside the generator is prohibited.
-If the closure is variably sized, we call it a \emph{coroutine} (or \emph{stackful}), and as the names implies, often implemented with a separate stack with no programming restrictions.
+If the closure is fixed size, we call it a \emph{generator} (or \emph{stackless}), and its control flow is restricted, \eg suspending outside the generator is prohibited.
+If the closure is variable size, we call it a \emph{coroutine} (or \emph{stackful}), and as the names implies, often implemented with a separate stack with no programming restrictions.
 Hence, refactoring a stackless coroutine may require changing it to stackful.
-A foundational property of all \emph{stateful functions} is that resume/suspend \emph{do not} cause incremental stack growth, \ie, resume/suspend operations are remembered through the closure not the stack.
+A foundational property of all \emph{stateful functions} is that resume/suspend \emph{do not} cause incremental stack growth, \ie resume/suspend operations are remembered through the closure not the stack.
 As well, activating a stateful function is \emph{asymmetric} or \emph{symmetric}, identified by resume/suspend (no cycles) and resume/resume (cycles).
 A fixed closure activated by modified call/return is faster than a variable closure activated by context switching.
-Additionally, any storage management for the closure (especially in unmanaged languages, \ie, no garbage collection) must also be factored into design and performance.
+Additionally, any storage management for the closure (especially in unmanaged languages, \ie no garbage collection) must also be factored into design and performance.
 Therefore, selecting between stackless and stackful semantics is a tradeoff between programming requirements and performance, where stackless is faster and stackful is more general.
 Note, creation cost is amortized across usage, so activation cost is usually the dominant factor.
@@ -648,5 +648,5 @@
 \end{center}
 The example takes advantage of resuming a generator in the constructor to prime the loops so the first character sent for formatting appears inside the nested loops.
-The destructor provides a newline if formatted text ends with a full line.
+The destructor provides a newline, if formatted text ends with a full line.
 Figure~\ref{f:CFormatSim} shows the C implementation of the \CFA input generator with one additional field and the computed @goto@.
 For contrast, Figure~\ref{f:PythonFormatter} shows the equivalent Python format generator with the same properties as the Fibonacci generator.
@@ -2719,5 +2719,5 @@
 Each benchmark experiment is run 31 times.
 All omitted tests for other languages are functionally identical to the \CFA tests and available online~\cite{CforallBenchMarks}.
-
+% tar --exclude=.deps --exclude=Makefile --exclude=Makefile.in --exclude=c.c --exclude=cxx.cpp --exclude=fetch_add.c -cvhf benchmark.tar benchmark
 
 \paragraph{Object Creation}
@@ -2749,5 +2749,5 @@
 \multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} & \multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
 \CFA Coroutine Lazy		& 14.3		& 14.3		& 0.32		\\
-\CFA Coroutine Eager	& 2203.7	& 2205.6	& 26.03		\\
+\CFA Coroutine Eager	& 522.8		& 525.3		& 5.81		\\
 \CFA Thread				& 1257.8	& 1291.2	& 86.19		\\
 \uC Coroutine			& 92.2		& 91.4		& 1.58		\\
Index: doc/user/user.tex
===================================================================
--- doc/user/user.tex	(revision 35a408b7afe5994cf2251ff7078ad4636f315997)
+++ doc/user/user.tex	(revision b0ab7853291fa2ff5960f20cb326515dcc4c6655)
@@ -11,6 +11,6 @@
 %% Created On       : Wed Apr  6 14:53:29 2016
 %% Last Modified By : Peter A. Buhr
-%% Last Modified On : Sat Jun 15 16:29:45 2019
-%% Update Count     : 3847
+%% Last Modified On : Tue Jun 25 08:51:33 2019
+%% Update Count     : 3871
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
@@ -3346,34 +3346,36 @@
 
 
-\section{I/O Stream Library}
-\label{s:IOStreamLibrary}
+\section{Stream I/O Library}
+\label{s:StreamIOLibrary}
 \index{input/output stream library}
 \index{stream library}
 
-The goal of \CFA input/output (I/O) is to simplify the common cases\index{I/O!common case}, while fully supporting polymorphism and user defined types in a consistent way.
-\CFA I/O combines ideas from C ©printf©, \CC, and Python.
-I/O can be unformatted or formatted.
-Unformatted means \CFA selects the output or input format for values that match with the type of a variable.
-Formatted means additional information is specified to augment how an output or input of value is interpreted.
-\CFA formatting is a cross between C ©printf© and \CC ©cout© manipulators.
+The goal of \CFA stream input/output (I/O) is to simplify the common cases\index{I/O!common case}, while fully supporting polymorphism and user defined types in a consistent way.
+Stream I/O can be implicitly or explicitly formatted.
+Implicit formatting means \CFA selects the output or input format for values that match with the type of a variable.
+Explicit formatting means additional information is specified to augment how an output or input of value is interpreted.
+\CFA formatting is a cross between C ©printf© and \CC ©cout© manipulators, and Python implicit spacing and newline.
+Specifically:
 \begin{itemize}
 \item
-©printf© format codes are dense, making them difficult to read and remember.
+©printf©/Python format codes are dense, making them difficult to read and remember.
 \CFA/\CC format manipulators are named, making them easier to read and remember.
 \item
-©printf© separates format codes from associated variables, making it difficult to match codes with variables.
+©printf©/Python separates format codes from associated variables, making it difficult to match codes with variables.
 \CFA/\CC co-locate codes with associated variables, where \CFA has the tighter binding.
 \item
-Format manipulators in \CC have global rather than local effect, except ©setw©.
+Format manipulators in \CFA have local effect, whereas \CC have global effect, except ©setw©.
 Hence, it is common programming practice to toggle manipulators on and then back to the default to prevent downstream side-effects.
 Without this programming style, errors occur when moving prints, as manipulator effects incorrectly flow into the new location.
 (To guarantee no side-effects, manipulator values must be saved and restored across function calls.)
+\item
+\CFA has more sophisticated implicit spacing between values than Python, plus implicit newline at the end of a print.
 \end{itemize}
 The \CFA header file for the I/O library is \Indexc{fstream.hfa}.
 
-For unformatted output, the common case is printing a sequence of variables separated by whitespace.
+For implicit formatted output, the common case is printing a series of variables separated by whitespace.
 \begin{cquote}
-\begin{tabular}{@{}l@{\hspace{3em}}l@{}}
-\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}	& \multicolumn{1}{c}{\textbf{\CC}}	\\
+\begin{tabular}{@{}l@{\hspace{2em}}l@{\hspace{2em}}l@{}}
+\multicolumn{1}{c@{\hspace{2em}}}{\textbf{\CFA}}	& \multicolumn{1}{c@{\hspace{2em}}}{\textbf{\CC}}	& \multicolumn{1}{c}{\textbf{Python}}	\\
 \begin{cfa}
 int x = 1, y = 2, z = 3;
@@ -3385,5 +3387,14 @@
 cout << x ®<< " "® << y ®<< " "® << z << endl;
 \end{cfa}
+&
+\begin{cfa}
+x = 1;  y = 2;  z = 3
+print( x, y, z )
+\end{cfa}
 \\
+\begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
+1® ®2® ®3
+\end{cfa}
+&
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
 1® ®2® ®3
@@ -3429,5 +3440,5 @@
 There is a weak similarity between the \CFA logical-or operator and the \Index{Shell pipe-operator} for moving data, where data flows in the correct direction for input but the opposite direction for output.
 
-For unformatter input, the common case is reading a sequence of values separated by whitespace, where the type of an input constant must match with the type of the input variable.
+For implicit formatted input, the common case is reading a sequence of values separated by whitespace, where the type of an input constant must match with the type of the input variable.
 \begin{cquote}
 \begin{lrbox}{\LstBox}
@@ -3436,7 +3447,7 @@
 \end{cfa}
 \end{lrbox}
-\begin{tabular}{@{}l@{\hspace{3em}}l@{}}
+\begin{tabular}{@{}l@{\hspace{3em}}l@{\hspace{3em}}l@{}}
 \multicolumn{1}{@{}l@{}}{\usebox\LstBox} \\
-\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}	& \multicolumn{1}{c}{\textbf{\CC}}	\\
+\multicolumn{1}{c@{\hspace{2em}}}{\textbf{\CFA}}	& \multicolumn{1}{c@{\hspace{2em}}}{\textbf{\CC}}	& \multicolumn{1}{c}{\textbf{Python}}	\\
 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
 sin | x | y | z;
@@ -3446,11 +3457,25 @@
 cin >> x >> y >> z;
 \end{cfa}
+&
+\begin{cfa}[aboveskip=0pt,belowskip=0pt]
+x = int(input());  y = float(input());  z = input();
+\end{cfa}
 \\
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
 ®1® ®2.5® ®A®
+
+
 \end{cfa}
 &
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
 ®1® ®2.5® ®A®
+
+
+\end{cfa}
+&
+\begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
+®1®
+®2.5®
+®A®
 \end{cfa}
 \end{tabular}
@@ -3705,5 +3730,5 @@
 0b0 0b11011 0b11011 0b11011 0b11011
 sout | bin( -27HH ) | bin( -27H ) | bin( -27 ) | bin( -27L );
-0b11100101 0b1111111111100101 0b11111111111111111111111111100101 0b(58 1s)100101
+0b11100101 0b1111111111100101 0b11111111111111111111111111100101 0b®(58 1s)®100101
 \end{cfa}
 
@@ -3782,5 +3807,4 @@
 ®  ®4.000000 ® ®4.000000 4.000000
 ®  ®ab ® ®ab ab
-    ab    ab ab
 \end{cfa}
 If the value is larger, it is printed without truncation, ignoring the ©minimum©.
