Index: doc/LaTeXmacros/common.tex
===================================================================
--- doc/LaTeXmacros/common.tex	(revision a6c45c6127052add9b2043d7186d1f3f7c51f00c)
+++ doc/LaTeXmacros/common.tex	(revision 2d8a770eb55adae00a48d7d9da1b194d16f41862)
@@ -11,6 +11,6 @@
 %% Created On       : Sat Apr  9 10:06:17 2016
 %% Last Modified By : Peter A. Buhr
-%% Last Modified On : Sun Feb 14 15:52:46 2021
-%% Update Count     : 524
+%% Last Modified On : Tue Apr 27 12:03:17 2021
+%% Update Count     : 539
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
@@ -102,4 +102,5 @@
 \renewcommand\subsubsection{\@startsection{subsubsection}{3}{\z@}{-2.5ex \@plus -1ex \@minus -.2ex}{1.0ex \@plus .2ex}{\normalfont\normalsize\bfseries}}
 \renewcommand\paragraph{\@startsection{paragraph}{4}{\z@}{-2.0ex \@plus -1ex \@minus -.2ex}{-1em}{\normalfont\normalsize\bfseries}}
+\renewcommand\subparagraph{\@startsection{subparagraph}{4}{\z@}{-1.5ex \@plus -1ex \@minus -.2ex}{-1em}{\normalfont\normalsize\bfseries\itshape}}
 
 % index macros
@@ -284,5 +285,5 @@
 showlines=true,							% show blank lines at end of code
 aboveskip=4pt,							% spacing above/below code block
-belowskip=0pt,
+belowskip=2pt,
 numberstyle=\footnotesize\sf,			% numbering style
 % replace/adjust listing characters that look bad in sanserif
@@ -297,6 +298,6 @@
 \lstset{
 language=CFA,
-moredelim=**[is][\color{red}]{@}{@},	% red highlighting @...@
-%moredelim=**[is][\color{red}]{®}{®},	% red highlighting ®...® (registered trademark symbol) emacs: C-q M-.
+%moredelim=**[is][\color{red}]{@}{@},	% red highlighting @...@
+moredelim=**[is][\color{red}]{®}{®},	% red highlighting ®...® (registered trademark symbol) emacs: C-q M-.
 %moredelim=**[is][\color{blue}]{ß}{ß},	% blue highlighting ß...ß (sharp s symbol) emacs: C-q M-_
 %moredelim=**[is][\color{OliveGreen}]{¢}{¢}, % green highlighting ¢...¢ (cent symbol) emacs: C-q M-"
Index: doc/user/user.tex
===================================================================
--- doc/user/user.tex	(revision a6c45c6127052add9b2043d7186d1f3f7c51f00c)
+++ doc/user/user.tex	(revision 2d8a770eb55adae00a48d7d9da1b194d16f41862)
@@ -11,6 +11,6 @@
 %% Created On       : Wed Apr  6 14:53:29 2016
 %% Last Modified By : Peter A. Buhr
-%% Last Modified On : Sun Apr 25 19:03:03 2021
-%% Update Count     : 4951
+%% Last Modified On : Wed Apr 28 21:48:59 2021
+%% Update Count     : 5051
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
@@ -3312,4 +3312,5 @@
 
 \section{Tuples}
+\label{tuples}
 
 In C and \CFA, lists of elements appear in several contexts, such as the parameter list for a routine call.
@@ -3420,4 +3421,5 @@
 
 \subsection{Tuple Coercions}
+\label{tuple coercions}\label{coercions!tuple}
 
 There are four coercions that can be performed on tuples and tuple variables: closing, opening, flattening and structuring.
@@ -3464,4 +3466,5 @@
 
 \subsection{Mass Assignment}
+\label{mass assignment}\label{assignment!mass}
 
 \CFA permits assignment to several variables at once using mass assignment~\cite{CLU}.
@@ -3504,4 +3507,5 @@
 
 \subsection{Multiple Assignment}
+\label{multiple assignment}\label{assignment!multiple}
 
 \CFA also supports the assignment of several values at once, known as multiple assignment~\cite{CLU,Galletly96}.
@@ -3545,4 +3549,5 @@
 
 \subsection{Cascade Assignment}
+\index{cascade assignment}\index{assignment!cascade}
 
 As in C, \CFA mass and multiple assignments can be cascaded, producing cascade assignment.
@@ -3564,12 +3569,12 @@
 \section{Stream I/O Library}
 \label{s:StreamIOLibrary}
-\index{input/output stream library}
-\index{stream library}
+\index{input}\index{output}
+\index{stream library}\index{library!stream}
 
 The goal of \CFA stream input/output (I/O) is to simplify the common cases\index{I/O!common case}, while fully supporting polymorphism and user defined types in a consistent way.
 Stream I/O can be implicitly or explicitly formatted.
-Implicit formatting means \CFA selects the output or input format for values that match with the type of a variable.
+Implicit formatting means \CFA selects the output or input format for values that matches the variable's type.
 Explicit formatting means additional information is specified to augment how an output or input of value is interpreted.
-\CFA formatting is a cross between C ©printf© and \CC ©cout© manipulators, and Python implicit spacing and newline.
+\CFA formatting incorporates ideas from C ©printf©, \CC ©stream© manipulators, and Python implicit spacing and newline.
 Specifically:
 \begin{itemize}
@@ -3584,9 +3589,21 @@
 Hence, it is common programming practice to toggle manipulators on and then back to the default to prevent downstream side-effects.
 Without this programming style, errors occur when moving prints, as manipulator effects incorrectly flow into the new location.
-(To guarantee no side-effects, manipulator values must be saved and restored across function calls.)
-\item
-\CFA has more sophisticated implicit spacing between values than Python, plus implicit newline at the end of a print.
+Furthermore, to guarantee no side-effects, manipulator values must be saved and restored across function calls.
+\item
+\CFA has more sophisticated implicit value spacing than Python, plus implicit newline at the end of a print.
 \end{itemize}
+
+The standard polymorphic I/Os stream are ©stdin©/©sin© (input), ©stdout©/©sout© and ©stderr©/©serr© (output) (like C++ ©cin©/©cout©/©cerr©).
+Polymorphic streams ©exit© and ©abort© provide implicit program termination without and with generating a stack trace and core file.
+Stream ©exit© implicitly returns ©EXIT_FAILURE© to the shell.
+\begin{cfa}
+®exit®   | "x (" | x | ") negative value."; // terminate and return EXIT_FAILURE to shell
+®abort® | "x (" | x | ") negative value."; // terminate and generate stack trace and core file
+\end{cfa}
+Note, \CFA stream variables ©stdin©, ©stdout©, ©stderr©, ©exit©, and ©abort© overload C variables ©stdin©, ©stdout©, ©stderr©, and functions ©exit© and ©abort©, respectively.
 The \CFA header file for the I/O library is \Indexc{fstream.hfa}.
+
+
+\subsection{Basic I/O}
 
 For implicit formatted output, the common case is printing a series of variables separated by whitespace.
@@ -3601,5 +3618,5 @@
 \begin{cfa}
 
-cout << x ®<< " "® << y ®<< " "® << z << endl;
+cout  <<  x  ®<< " "®  <<  y  ®<< " "®  <<  z  << endl;
 \end{cfa}
 &
@@ -3653,5 +3670,5 @@
 \end{tabular}
 \end{cquote}
-Input and output use a uniform operator, ©|©, rather than separate operators, as in ©>>© and ©<<© for \CC.
+Input and output use a uniform operator, ©|©, rather than \CC's ©>>© and ©<<© input/output operators.
 There is a weak similarity between the \CFA logical-or operator and the \Index{Shell pipe-operator} for moving data, where data flows in the correct direction for input but the opposite direction for output.
 
@@ -3698,4 +3715,113 @@
 \end{cquote}
 
+\VRef[Figure]{f:CFACommand-LineProcessing} shows idiomatic \CFA command-line processing and copying an input file to an output file.
+Note, a stream variable may be copied because it is a reference to an underlying stream data-structures.
+All I/O errors are handles as exceptions, but end-of-file is not an exception as C programmers are use to explicitly checking for it.
+
+\begin{figure}
+\begin{cfa}
+#include ®<fstream.hfa>®
+
+int main( int argc, char * argv[] ) {
+	®ifstream® in  = stdin;					$\C{// copy default files}$
+	®ofstream® out = stdout;
+
+	try {
+		choose ( argc ) {
+		  case 2, 3:
+			®open®( in, argv[1] );			$\C{// open input file first as output creates file}$
+			if ( argc == 3 ) ®open®( out, argv[2] ); $\C{// do not create output unless input opens}$
+		  case 1: ;							$\C{// use default files}$
+		  default:
+			®exit® | "Usage" | argv[0] | "[ input-file (default stdin) "
+				   "[ output-file (default stdout) ] ]";
+		} // choose
+	} catch( ®Open_Failure® * ex; ex->istream == &in ) {
+		®exit® | "Unable to open input file" | argv[1];
+	} catch( ®Open_Failure® * ex; ex->ostream == &out ) {
+		®close®( in );						$\C{// optional}$
+		®exit® | "Unable to open output file" | argv[2];
+	} // try
+
+	out | nlOff;							$\C{// turn off auto newline}$
+	in | nlOn;								$\C{// turn on reading newline}$
+	char ch;
+	for () {								$\C{// read/write characters}$
+		in | ch;
+	  if ( eof( in ) ) break;				$\C{// eof ?}$
+		out | ch;
+	} // for
+} // main
+\end{cfa}
+\caption{\CFA Command-Line Processing}
+\label{f:CFACommand-LineProcessing}
+\end{figure}
+
+\VRef[Figure]{f:StreamFunctions} shows the stream operations.
+\begin{itemize}[topsep=4pt,itemsep=2pt,parsep=0pt]
+\item
+\Indexc{fail} tests the stream error-indicator, returning nonzero if it is set.
+\item
+\Indexc{clear} resets the stream error-indicator.
+\item
+\Indexc{flush} (©ofstream© only) causes any unwritten data for a stream to be written to the file.
+\item
+\Indexc{eof} (©ifstream© only) tests the end-of-file indicator for the stream pointed to by stream.
+Returns true if the end-of-file indicator is set, otherwise false.
+\item
+\Indexc{open} binds the file with ©name© to a stream accessed with ©mode© (see ©fopen©).
+\item
+\Indexc{close} flushes the stream and closes the file.
+\item
+\Indexc{write} (©ofstream© only) write ©size© bytes to the stream.
+The bytes are written lazily to file when internal buffers fill.
+Eager buffer writes are done with ©flush©
+\item
+\Indexc{read} (©ifstream© only) read ©size© bytes to the stream.
+\item
+\Indexc{ungetc} (©ifstream© only) pushes the character back to the input stream.
+Pushed-back characters returned by subsequent reads in the reverse order of pushing.
+\end{itemize}
+The constructor functions:
+\begin{itemize}[topsep=4pt,itemsep=2pt,parsep=0pt]
+\item
+create an unbound stream, which is subsequently bound to a file with ©open©.
+\item
+create a bound stream to the associated file with given ©mode©.
+\end{itemize}
+The destructor closes the stream.
+
+\begin{figure}
+\begin{cfa}
+// *********************************** ofstream ***********************************
+
+bool fail( ofstream & );$\indexc{fail}\index{ofstream@©ofstream©!©fail©}$
+void clear( ofstream & );$\indexc{clear}\index{ofstream@©ofstream©!©clear©}$
+int flush( ofstream & );$\indexc{flush}\index{ofstream@©ofstream©!©flush©}$
+void open( ofstream &, const char name[], const char mode[] = "w" );$\indexc{open}\index{ofstream@©ofstream©!©open©}$
+void close( ofstream & );$\indexc{close}\index{ofstream@©ofstream©!©close©}$
+ofstream & write( ofstream &, const char data[], size_t size );$\indexc{write}\index{ofstream@©ofstream©!©write©}$
+
+void ?{}( ofstream & );$\index{ofstream@©ofstream©!©?{}©}$
+void ?{}( ofstream &, const char name[], const char mode[] = "w" );
+void ^?{}( ofstream & );$\index{ofstream@©ofstream©!©^?{}©}$
+
+// *********************************** ifstream ***********************************
+
+bool fail( ifstream & is );$\indexc{fail}\index{ifstream@©ifstream©!©fail©}$
+void clear( ifstream & );$\indexc{clear}\index{ifstream@©ifstream©!©clear©}$
+bool eof( ifstream & is );$\indexc{eof}\index{ifstream@©ifstream©!©eof©}$
+void open( ifstream & is, const char name[], const char mode[] = "r" );$\indexc{open}\index{ifstream@©ifstream©!©open©}$
+void close( ifstream & is );$\indexc{close}\index{ifstream@©ifstream©!©close©}$
+ifstream & read( ifstream & is, char data[], size_t size );$\indexc{read}\index{ifstream@©ifstream©!©read©}$
+ifstream & ungetc( ifstream & is, char c );$\indexc{unget}\index{ifstream@©ifstream©!©unget©}$
+
+void ?{}( ifstream & is );$\index{ifstream@©ifstream©!©?{}©}$
+void ?{}( ifstream & is, const char name[], const char mode[] = "r" );
+void ^?{}( ifstream & is );$\index{ifstream@©ifstream©!©^?{}©}$
+\end{cfa}
+\caption{Stream Functions}
+\label{f:StreamFunctions}
+\end{figure}
 
 
@@ -4030,5 +4156,5 @@
 sout | wd( 4, "ab" ) | wd( 3, "ab" ) | wd( 2, "ab" );
 \end{cfa}
-\begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
+\begin{cfa}[showspaces=true,aboveskip=0pt]
 ®  ®34 ® ®34 34
 ®  ®4.000000 ® ®4.000000 4.000000
@@ -4378,11 +4504,62 @@
 \end{cfa}
 
-\Textbf{WARNING:} ©printf©\index{printf@©printf©}, ©scanf©\index{scanf@©scanf©} and their derivatives are unsafe when used with user-level threading, as in \CFA.
-These stream routines use kernel-thread locking (©futex©\index{futex@©futex©}), which block kernel threads, to prevent interleaving of I/O.
-However, the following simple example illustrates how a deadlock can occur (other complex scenarios are possible).
-Assume a single kernel thread and two user-level threads calling ©printf©.
-One user-level thread acquires the I/O lock and is time-sliced while performing ©printf©.
-The other user-level thread then starts execution, calls ©printf©, and blocks the only kernel thread because it cannot acquire the I/O lock.
-It does not help if the kernel lock is multiple acquisition, \ie, the lock owner can acquire it multiple times, because it then results in two user threads in the ©printf© critical section, corrupting the stream.
+
+\section{String Stream}
+
+All the stream formatting capabilities are available to format text to/from a C string rather than to a stream file.
+\VRef[Figure]{f:StringStreamProcessing} shows writing (output) and reading (input) from a C string.
+\begin{figure}
+\begin{cfa}
+#include <fstream.hfa>
+#include <strstream.hfa>
+
+int main() {
+	enum { size = 256 };
+	char buf[size]; $\C{// output buffer}$
+	®ostrstream osstr = { buf, size };® $\C{// bind output buffer/size}$
+	int i = 3, j = 5, k = 7;
+	double x = 12345678.9, y = 98765.4321e-11;
+
+	osstr | i | hex(j) | wd(10, k) | sci(x) | unit(eng(y)); $\C{// same lines of output}$
+	write( osstr );
+	printf( "%s", buf );
+	sout | i | hex(j) | wd(10, k) | sci(x) | unit(eng(y));
+
+	char buf2[] = "12 14 15 3.5 7e4"; $\C{// input buffer}$
+	®istrstream isstr = { buf2 };®
+	isstr | i | j | k | x | y;
+	sout | i | j | k | x | y;
+}
+\end{cfa}
+\caption{String Stream Processing}
+\label{f:StringStreamProcessing}
+\end{figure}
+
+\VRef[Figure]{f:StringStreamFunctions} shows the string stream operations.
+\begin{itemize}[topsep=4pt,itemsep=2pt,parsep=0pt]
+\item
+\Indexc{write} (©ostrstream© only) writes all the buffered characters to the specified stream (©stdout© default).
+\end{itemize}
+The constructor functions:
+\begin{itemize}[topsep=4pt,itemsep=2pt,parsep=0pt]
+\item
+create a bound stream to a write buffer (©ostrstream©) of ©size© or a read buffer (©istrstream©) containing a C string terminated with ©'\0'©.
+\end{itemize}
+
+\begin{figure}
+\begin{cfa}
+// *********************************** ostrstream ***********************************
+
+ostrstream & write( ostrstream & os, FILE * stream = stdout );
+
+void ?{}( ostrstream &, char buf[], size_t size );
+
+// *********************************** istrstream ***********************************
+
+void ?{}( istrstream & is, char buf[] );
+\end{cfa}
+\caption{String Stream Functions}
+\label{f:StringStreamFunctions}
+\end{figure}
 
 
@@ -8111,8 +8288,8 @@
 \begin{cquote}
 \begin{tabular}{@{}l@{\hspace{\parindentlnth}}|@{\hspace{\parindentlnth}}l@{}}
-\multicolumn{1}{@{}c|@{\hspace{\parindentlnth}}}{\textbf{\CFA}}	& \multicolumn{1}{@{\hspace{\parindentlnth}}c}{\textbf{C}@{}}	\\
+\multicolumn{1}{@{}c|@{\hspace{\parindentlnth}}}{\textbf{\CFA}}	& \multicolumn{1}{@{\hspace{\parindentlnth}}c@{}}{\textbf{C}}	\\
 \hline
 \begin{cfa}
-#include <gmp>$\indexc{gmp}$
+#include <gmp.hfa>$\indexc{gmp}$
 int main( void ) {
 	sout | "Factorial Numbers";
Index: libcfa/src/concurrency/clib/cfathread.cfa
===================================================================
--- libcfa/src/concurrency/clib/cfathread.cfa	(revision a6c45c6127052add9b2043d7186d1f3f7c51f00c)
+++ libcfa/src/concurrency/clib/cfathread.cfa	(revision 2d8a770eb55adae00a48d7d9da1b194d16f41862)
@@ -243,5 +243,5 @@
 	// Mutex
 	struct cfathread_mutex {
-		single_acquisition_lock impl;
+		fast_lock impl;
 	};
 	int cfathread_mutex_init(cfathread_mutex_t *restrict mut, const cfathread_mutexattr_t *restrict) __attribute__((nonnull (1))) { *mut = new(); return 0; }
@@ -258,5 +258,5 @@
 	// Condition
 	struct cfathread_condition {
-		condition_variable(single_acquisition_lock) impl;
+		condition_variable(fast_lock) impl;
 	};
 	int cfathread_cond_init(cfathread_cond_t *restrict cond, const cfathread_condattr_t *restrict) __attribute__((nonnull (1))) { *cond = new(); return 0; }
Index: libcfa/src/concurrency/invoke.h
===================================================================
--- libcfa/src/concurrency/invoke.h	(revision a6c45c6127052add9b2043d7186d1f3f7c51f00c)
+++ libcfa/src/concurrency/invoke.h	(revision 2d8a770eb55adae00a48d7d9da1b194d16f41862)
@@ -148,4 +148,5 @@
 		struct $thread * prev;
 		volatile unsigned long long ts;
+		unsigned preferred;
 	};
 
@@ -199,4 +200,6 @@
 		} node;
 
+		struct processor * last_proc;
+
 		#if defined( __CFA_WITH_VERIFY__ )
 			void * canary;
Index: libcfa/src/concurrency/io.cfa
===================================================================
--- libcfa/src/concurrency/io.cfa	(revision a6c45c6127052add9b2043d7186d1f3f7c51f00c)
+++ libcfa/src/concurrency/io.cfa	(revision 2d8a770eb55adae00a48d7d9da1b194d16f41862)
@@ -40,4 +40,5 @@
 	#include "kernel.hfa"
 	#include "kernel/fwd.hfa"
+	#include "kernel_private.hfa"
 	#include "io/types.hfa"
 
@@ -89,7 +90,9 @@
 	static inline unsigned __flush( struct $io_context & );
 	static inline __u32 __release_sqes( struct $io_context & );
+	extern void __kernel_unpark( $thread * thrd );
 
 	bool __cfa_io_drain( processor * proc ) {
 		/* paranoid */ verify( ! __preemption_enabled() );
+		/* paranoid */ verify( ready_schedule_islocked() );
 		/* paranoid */ verify( proc );
 		/* paranoid */ verify( proc->io.ctx );
@@ -115,5 +118,5 @@
 			__cfadbg_print_safe( io, "Kernel I/O : Syscall completed : cqe %p, result %d for %p\n", &cqe, cqe.res, future );
 
-			fulfil( *future, cqe.res );
+			__kernel_unpark( fulfil( *future, cqe.res, false ) );
 		}
 
@@ -124,4 +127,5 @@
 		__atomic_store_n( ctx->cq.head, head + count, __ATOMIC_SEQ_CST );
 
+		/* paranoid */ verify( ready_schedule_islocked() );
 		/* paranoid */ verify( ! __preemption_enabled() );
 
Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision a6c45c6127052add9b2043d7186d1f3f7c51f00c)
+++ libcfa/src/concurrency/kernel.cfa	(revision 2d8a770eb55adae00a48d7d9da1b194d16f41862)
@@ -34,4 +34,9 @@
 #include "invoke.h"
 
+#if !defined(__CFA_NO_STATISTICS__)
+	#define __STATS( ...) __VA_ARGS__
+#else
+	#define __STATS( ...)
+#endif
 
 //-----------------------------------------------------------------------------
@@ -166,7 +171,5 @@
 		preemption_scope scope = { this };
 
-		#if !defined(__CFA_NO_STATISTICS__)
-			unsigned long long last_tally = rdtscl();
-		#endif
+		__STATS( unsigned long long last_tally = rdtscl(); )
 
 		// if we need to run some special setup, now is the time to do it.
@@ -266,4 +269,117 @@
 				__cfa_io_flush( this );
 			}
+
+		// 	SEARCH: {
+		// 		/* paranoid */ verify( ! __preemption_enabled() );
+		// 		/* paranoid */ verify( kernelTLS().this_proc_id );
+
+		// 		// First, lock the scheduler since we are searching for a thread
+
+		// 		// Try to get the next thread
+		// 		ready_schedule_lock();
+		// 		readyThread = pop_fast( this->cltr );
+		// 		ready_schedule_unlock();
+		// 		if(readyThread) {  break SEARCH; }
+
+		// 		// If we can't find a thread, might as well flush any outstanding I/O
+		// 		if(this->io.pending) { __cfa_io_flush( this ); }
+
+		// 		// Spin a little on I/O, just in case
+		// 		for(25) {
+		// 			__maybe_io_drain( this );
+		// 			ready_schedule_lock();
+		// 			readyThread = pop_fast( this->cltr );
+		// 			ready_schedule_unlock();
+		// 			if(readyThread) {  break SEARCH; }
+		// 		}
+
+		// 		// no luck, try stealing a few times
+		// 		for(25) {
+		// 			if( __maybe_io_drain( this ) ) {
+		// 				ready_schedule_lock();
+		// 				readyThread = pop_fast( this->cltr );
+		// 			} else {
+		// 				ready_schedule_lock();
+		// 				readyThread = pop_slow( this->cltr );
+		// 			}
+		// 			ready_schedule_unlock();
+		// 			if(readyThread) {  break SEARCH; }
+		// 		}
+
+		// 		// still no luck, search for a thread
+		// 		ready_schedule_lock();
+		// 		readyThread = pop_search( this->cltr );
+		// 		ready_schedule_unlock();
+		// 		if(readyThread) { break SEARCH; }
+
+		// 		// Don't block if we are done
+		// 		if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP;
+
+		// 		__STATS( __tls_stats()->ready.sleep.halts++; )
+
+		// 		// Push self to idle stack
+		// 		mark_idle(this->cltr->procs, * this);
+
+		// 		// Confirm the ready-queue is empty
+		// 		__maybe_io_drain( this );
+		// 		ready_schedule_lock();
+		// 		readyThread = pop_search( this->cltr );
+		// 		ready_schedule_unlock();
+
+		// 		if( readyThread ) {
+		// 			// A thread was found, cancel the halt
+		// 			mark_awake(this->cltr->procs, * this);
+
+		// 			__STATS( __tls_stats()->ready.sleep.cancels++; )
+
+		// 			// continue the main loop
+		// 			break SEARCH;
+		// 		}
+
+		// 		__STATS( if(this->print_halts) __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->id, rdtscl()); )
+		// 		__cfadbg_print_safe(runtime_core, "Kernel : core %p waiting on eventfd %d\n", this, this->idle);
+
+		// 		// __disable_interrupts_hard();
+		// 		eventfd_t val;
+		// 		eventfd_read( this->idle, &val );
+		// 		// __enable_interrupts_hard();
+
+		// 		__STATS( if(this->print_halts) __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->id, rdtscl()); )
+
+		// 		// We were woken up, remove self from idle
+		// 		mark_awake(this->cltr->procs, * this);
+
+		// 		// DON'T just proceed, start looking again
+		// 		continue MAIN_LOOP;
+		// 	}
+
+		// RUN_THREAD:
+		// 	/* paranoid */ verify( kernelTLS().this_proc_id );
+		// 	/* paranoid */ verify( ! __preemption_enabled() );
+		// 	/* paranoid */ verify( readyThread );
+
+		// 	// Reset io dirty bit
+		// 	this->io.dirty = false;
+
+		// 	// We found a thread run it
+		// 	__run_thread(this, readyThread);
+
+		// 	// Are we done?
+		// 	if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP;
+
+		// 	#if !defined(__CFA_NO_STATISTICS__)
+		// 		unsigned long long curr = rdtscl();
+		// 		if(curr > (last_tally + 500000000)) {
+		// 			__tally_stats(this->cltr->stats, __cfaabi_tls.this_stats);
+		// 			last_tally = curr;
+		// 		}
+		// 	#endif
+
+		// 	if(this->io.pending && !this->io.dirty) {
+		// 		__cfa_io_flush( this );
+		// 	}
+
+		// 	// Check if there is pending io
+		// 	__maybe_io_drain( this );
 		}
 
@@ -402,7 +518,5 @@
 	$thread * thrd_src = kernelTLS().this_thread;
 
-	#if !defined(__CFA_NO_STATISTICS__)
-		struct processor * last_proc = kernelTLS().this_processor;
-	#endif
+	__STATS( thrd_src->last_proc = kernelTLS().this_processor; )
 
 	// Run the thread on this processor
@@ -423,5 +537,6 @@
 
 	#if !defined(__CFA_NO_STATISTICS__)
-		if(last_proc != kernelTLS().this_processor) {
+		/* paranoid */ verify( thrd_src->last_proc != 0p );
+		if(thrd_src->last_proc != kernelTLS().this_processor) {
 			__tls_stats()->ready.threads.migration++;
 		}
@@ -436,5 +551,5 @@
 // Scheduler routines
 // KERNEL ONLY
-void __schedule_thread( $thread * thrd ) {
+static void __schedule_thread( $thread * thrd ) {
 	/* paranoid */ verify( ! __preemption_enabled() );
 	/* paranoid */ verify( kernelTLS().this_proc_id );
@@ -457,4 +572,5 @@
 	// Dereference the thread now because once we push it, there is not guaranteed it's still valid.
 	struct cluster * cl = thrd->curr_cluster;
+	__STATS(bool outside = thrd->last_proc && thrd->last_proc != kernelTLS().this_processor; )
 
 	// push the thread to the cluster ready-queue
@@ -470,8 +586,12 @@
 		if( kernelTLS().this_stats ) {
 			__tls_stats()->ready.threads.threads++;
+			if(outside) {
+				__tls_stats()->ready.threads.extunpark++;
+			}
 			__push_stat( __tls_stats(), __tls_stats()->ready.threads.threads, false, "Processor", kernelTLS().this_processor );
 		}
 		else {
 			__atomic_fetch_add(&cl->stats->ready.threads.threads, 1, __ATOMIC_RELAXED);
+			__atomic_fetch_add(&cl->stats->ready.threads.extunpark, 1, __ATOMIC_RELAXED);
 			__push_stat( cl->stats, cl->stats->ready.threads.threads, true, "Cluster", cl );
 		}
@@ -508,5 +628,12 @@
 
 	ready_schedule_lock();
-		$thread * thrd = pop_slow( this );
+		$thread * thrd;
+		for(25) {
+			thrd = pop_slow( this );
+			if(thrd) goto RET;
+		}
+		thrd = pop_search( this );
+
+		RET:
 	ready_schedule_unlock();
 
@@ -532,4 +659,19 @@
 }
 
+void __kernel_unpark( $thread * thrd ) {
+	/* paranoid */ verify( ! __preemption_enabled() );
+	/* paranoid */ verify( ready_schedule_islocked());
+
+	if( !thrd ) return;
+
+	if(__must_unpark(thrd)) {
+		// Wake lost the race,
+		__schedule_thread( thrd );
+	}
+
+	/* paranoid */ verify( ready_schedule_islocked());
+	/* paranoid */ verify( ! __preemption_enabled() );
+}
+
 void unpark( $thread * thrd ) {
 	if( !thrd ) return;
@@ -744,4 +886,5 @@
 
 static inline bool __maybe_io_drain( processor * proc ) {
+	bool ret = false;
 	#if defined(CFA_HAVE_LINUX_IO_URING_H)
 		__cfadbg_print_safe(runtime_core, "Kernel : core %p checking io for ring %d\n", proc, proc->io.ctx->fd);
@@ -752,6 +895,9 @@
 		unsigned tail = *ctx->cq.tail;
 		if(head == tail) return false;
-		return __cfa_io_drain( proc );
+		ready_schedule_lock();
+		ret = __cfa_io_drain( proc );
+		ready_schedule_unlock();
 	#endif
+	return ret;
 }
 
Index: libcfa/src/concurrency/kernel/startup.cfa
===================================================================
--- libcfa/src/concurrency/kernel/startup.cfa	(revision a6c45c6127052add9b2043d7186d1f3f7c51f00c)
+++ libcfa/src/concurrency/kernel/startup.cfa	(revision 2d8a770eb55adae00a48d7d9da1b194d16f41862)
@@ -447,4 +447,6 @@
 	link.next = 0p;
 	link.prev = 0p;
+	link.preferred = -1u;
+	last_proc = 0p;
 	#if defined( __CFA_WITH_VERIFY__ )
 		canary = 0x0D15EA5E0D15EA5Ep;
Index: libcfa/src/concurrency/kernel_private.hfa
===================================================================
--- libcfa/src/concurrency/kernel_private.hfa	(revision a6c45c6127052add9b2043d7186d1f3f7c51f00c)
+++ libcfa/src/concurrency/kernel_private.hfa	(revision 2d8a770eb55adae00a48d7d9da1b194d16f41862)
@@ -284,5 +284,5 @@
 
 //-----------------------------------------------------------------------
-// pop thread from the ready queue of a cluster
+// pop thread from the local queues of a cluster
 // returns 0p if empty
 // May return 0p spuriously
@@ -290,8 +290,14 @@
 
 //-----------------------------------------------------------------------
-// pop thread from the ready queue of a cluster
+// pop thread from any ready queue of a cluster
+// returns 0p if empty
+// May return 0p spuriously
+__attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr);
+
+//-----------------------------------------------------------------------
+// search all ready queues of a cluster for any thread
 // returns 0p if empty
 // guaranteed to find any threads added before this call
-__attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr);
+__attribute__((hot)) struct $thread * pop_search(struct cluster * cltr);
 
 //-----------------------------------------------------------------------
Index: libcfa/src/concurrency/ready_queue.cfa
===================================================================
--- libcfa/src/concurrency/ready_queue.cfa	(revision a6c45c6127052add9b2043d7186d1f3f7c51f00c)
+++ libcfa/src/concurrency/ready_queue.cfa	(revision 2d8a770eb55adae00a48d7d9da1b194d16f41862)
@@ -344,5 +344,6 @@
 	}
 
-	__attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr) {
+	__attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr) { return pop_fast(cltr); }
+	__attribute__((hot)) struct $thread * pop_search(struct cluster * cltr) {
 		return search(cltr);
 	}
@@ -436,10 +437,9 @@
 
 	__attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr) with (cltr->ready_queue) {
-		for(25) {
-			unsigned i = __tls_rand() % lanes.count;
-			$thread * t = try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.steal));
-			if(t) return t;
-		}
-
+		unsigned i = __tls_rand() % lanes.count;
+		return try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.steal));
+	}
+
+	__attribute__((hot)) struct $thread * pop_search(struct cluster * cltr) with (cltr->ready_queue) {
 		return search(cltr);
 	}
Index: libcfa/src/concurrency/stats.cfa
===================================================================
--- libcfa/src/concurrency/stats.cfa	(revision a6c45c6127052add9b2043d7186d1f3f7c51f00c)
+++ libcfa/src/concurrency/stats.cfa	(revision 2d8a770eb55adae00a48d7d9da1b194d16f41862)
@@ -38,4 +38,5 @@
 		stats->ready.pop.search.espec   = 0;
 		stats->ready.threads.migration = 0;
+		stats->ready.threads.extunpark = 0;
 		stats->ready.threads.threads   = 0;
 		stats->ready.sleep.halts   = 0;
@@ -95,4 +96,5 @@
 		__atomic_fetch_add( &cltr->ready.pop.search.espec  , proc->ready.pop.search.espec  , __ATOMIC_SEQ_CST ); proc->ready.pop.search.espec   = 0;
 		__atomic_fetch_add( &cltr->ready.threads.migration , proc->ready.threads.migration , __ATOMIC_SEQ_CST ); proc->ready.threads.migration  = 0;
+		__atomic_fetch_add( &cltr->ready.threads.extunpark , proc->ready.threads.extunpark , __ATOMIC_SEQ_CST ); proc->ready.threads.extunpark  = 0;
 		__atomic_fetch_add( &cltr->ready.threads.threads   , proc->ready.threads.threads   , __ATOMIC_SEQ_CST ); proc->ready.threads.threads    = 0;
 		__atomic_fetch_add( &cltr->ready.sleep.halts       , proc->ready.sleep.halts       , __ATOMIC_SEQ_CST ); proc->ready.sleep.halts        = 0;
@@ -124,5 +126,5 @@
 
 		char buf[1024];
-		strstream sstr = { buf, 1024 };
+		ostrstream sstr = { buf, 1024 };
 
 		if( flags & CFA_STATS_READY_Q ) {
@@ -132,5 +134,5 @@
 			uint64_t totalR = ready.pop.local.success + ready.pop.help.success + ready.pop.steal.success + ready.pop.search.success;
 			uint64_t totalS = ready.push.local.success + ready.push.share.success + ready.push.extrn.success;
-			sstr | "- totals   : " | eng3(totalR) | "run," | eng3(totalS) | "schd (" | eng3(ready.push.extrn.success) | "ext," | eng3(ready.threads.migration) | "mig)";
+			sstr | "- totals   : " | eng3(totalR) | "run," | eng3(totalS) | "schd (" | eng3(ready.push.extrn.success) | "ext," | eng3(ready.threads.migration) | "mig," | eng3(ready.threads.extunpark) | " eupk)";
 
 			double push_len = ((double)ready.push.local.attempt + ready.push.share.attempt + ready.push.extrn.attempt) / totalS;
Index: libcfa/src/concurrency/stats.hfa
===================================================================
--- libcfa/src/concurrency/stats.hfa	(revision a6c45c6127052add9b2043d7186d1f3f7c51f00c)
+++ libcfa/src/concurrency/stats.hfa	(revision 2d8a770eb55adae00a48d7d9da1b194d16f41862)
@@ -70,4 +70,5 @@
 		struct {
 			volatile uint64_t migration;
+			volatile uint64_t extunpark;
 			volatile  int64_t threads; // number of threads in the system, includes only local change
 		} threads;
Index: libcfa/src/concurrency/thread.cfa
===================================================================
--- libcfa/src/concurrency/thread.cfa	(revision a6c45c6127052add9b2043d7186d1f3f7c51f00c)
+++ libcfa/src/concurrency/thread.cfa	(revision 2d8a770eb55adae00a48d7d9da1b194d16f41862)
@@ -39,4 +39,6 @@
 	link.next = 0p;
 	link.prev = 0p;
+	link.preferred = -1u;
+	last_proc = 0p;
 	#if defined( __CFA_WITH_VERIFY__ )
 		canary = 0x0D15EA5E0D15EA5Ep;
Index: libcfa/src/fstream.cfa
===================================================================
--- libcfa/src/fstream.cfa	(revision a6c45c6127052add9b2043d7186d1f3f7c51f00c)
+++ libcfa/src/fstream.cfa	(revision 2d8a770eb55adae00a48d7d9da1b194d16f41862)
@@ -10,9 +10,9 @@
 // Created On       : Wed May 27 17:56:53 2015
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sat Apr 24 09:05:16 2021
-// Update Count     : 426
-//
-
-#include "fstream.hfa"
+// Last Modified On : Wed Apr 28 20:37:53 2021
+// Update Count     : 445
+//
+
+#include "fstream.hfa"									// also includes iostream.hfa
 
 #include <stdio.h>										// vfprintf, vfscanf
@@ -114,4 +114,8 @@
 } // fail
 
+void clear( ofstream & os ) {
+	clearerr( (FILE *)(os.file$) );
+} // clear
+
 int flush( ofstream & os ) {
 	return fflush( (FILE *)(os.file$) );
@@ -196,4 +200,15 @@
 ofstream & abort = abortFile;
 
+ofstream & nl( ofstream & os ) {
+	nl$( os );											// call basic_ostream nl
+	flush( os );
+	return os;
+	// (ofstream &)(os | '\n');
+	// setPrt$( os, false );							// turn off
+	// setNL$( os, true );
+	// flush( os );
+	// return sepOff( os );							// prepare for next line
+} // nl
+
 
 // *********************************** ifstream ***********************************
@@ -230,9 +245,13 @@
 } // fail
 
+void clear( ifstream & is ) {
+	clearerr( (FILE *)(is.file$) );
+} // clear
+
 void ends( ifstream & is ) {
 	if ( is.acquired$ ) { is.acquired$ = false; release( is ); }
 } // ends
 
-int eof( ifstream & is ) {
+bool eof( ifstream & is ) {
 	return feof( (FILE *)(is.file$) );
 } // eof
@@ -263,5 +282,5 @@
 } // close
 
-ifstream & read( ifstream & is, char * data, size_t size ) {
+ifstream & read( ifstream & is, char data[], size_t size ) {
 	if ( fail( is ) ) {
 		abort | IO_MSG "attempt read I/O on failed stream";
Index: libcfa/src/fstream.hfa
===================================================================
--- libcfa/src/fstream.hfa	(revision a6c45c6127052add9b2043d7186d1f3f7c51f00c)
+++ libcfa/src/fstream.hfa	(revision 2d8a770eb55adae00a48d7d9da1b194d16f41862)
@@ -10,6 +10,6 @@
 // Created On       : Wed May 27 17:56:53 2015
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sat Apr 24 09:04:03 2021
-// Update Count     : 219
+// Last Modified On : Wed Apr 28 20:37:57 2021
+// Update Count     : 230
 //
 
@@ -70,6 +70,7 @@
 
 bool fail( ofstream & );
+void clear( ofstream & );
 int flush( ofstream & );
-void open( ofstream &, const char name[], const char mode[] );
+void open( ofstream &, const char name[], const char mode[] ); // FIX ME: use default = "w"
 void open( ofstream &, const char name[] );
 void close( ofstream & );
@@ -86,7 +87,12 @@
 
 void ?{}( ofstream & );
-void ?{}( ofstream &, const char name[], const char mode[] );
+void ?{}( ofstream &, const char name[], const char mode[] ); // FIX ME: use default = "w"
 void ?{}( ofstream &, const char name[] );
 void ^?{}( ofstream & );
+
+// private
+static inline ofstream & nl$( ofstream & os ) { return nl( os ); } // remember basic_ostream nl
+// public
+ofstream & nl( ofstream & os );							// override basic_ostream nl
 
 extern ofstream & sout, & stdout, & serr, & stderr;		// aliases
@@ -111,12 +117,15 @@
 bool getANL( ifstream & );
 void ends( ifstream & );
+int fmt( ifstream &, const char format[], ... ) __attribute__(( format(scanf, 2, 3) ));
+
 bool fail( ifstream & is );
-int eof( ifstream & is );
-void open( ifstream & is, const char name[], const char mode[] );
+void clear( ifstream & );
+bool eof( ifstream & is );
+void open( ifstream & is, const char name[], const char mode[] ); // FIX ME: use default = "r"
 void open( ifstream & is, const char name[] );
 void close( ifstream & is );
-ifstream & read( ifstream & is, char * data, size_t size );
+ifstream & read( ifstream & is, char data[], size_t size );
 ifstream & ungetc( ifstream & is, char c );
-int fmt( ifstream &, const char format[], ... ) __attribute__(( format(scanf, 2, 3) ));
+
 void acquire( ifstream & is );
 void release( ifstream & is );
@@ -129,5 +138,5 @@
 
 void ?{}( ifstream & is );
-void ?{}( ifstream & is, const char name[], const char mode[] );
+void ?{}( ifstream & is, const char name[], const char mode[] ); // FIX ME: use default = "r"
 void ?{}( ifstream & is, const char name[] );
 void ^?{}( ifstream & is );
Index: libcfa/src/iostream.cfa
===================================================================
--- libcfa/src/iostream.cfa	(revision a6c45c6127052add9b2043d7186d1f3f7c51f00c)
+++ libcfa/src/iostream.cfa	(revision 2d8a770eb55adae00a48d7d9da1b194d16f41862)
@@ -10,6 +10,6 @@
 // Created On       : Wed May 27 17:56:53 2015
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sat Apr 24 10:03:54 2021
-// Update Count     : 1329
+// Last Modified On : Tue Apr 27 18:01:03 2021
+// Update Count     : 1330
 //
 
@@ -145,14 +145,14 @@
 	} // ?|?
 
-#if defined( __SIZEOF_INT128__ )
+	#if defined( __SIZEOF_INT128__ )
 	//      UINT64_MAX 18_446_744_073_709_551_615_ULL
 	#define P10_UINT64 10_000_000_000_000_000_000_ULL	// 19 zeroes
 
 	static inline void base10_128( ostype & os, unsigned int128 val ) {
-#if defined(__GNUC__) && __GNUC_PREREQ(7,0)				// gcc version >= 7
+		#if defined(__GNUC__) && __GNUC_PREREQ(7,0)		// gcc version >= 7
 		if ( val > P10_UINT64 ) {
-#else
+		#else
 		if ( (uint64_t)(val >> 64) != 0 || (uint64_t)val > P10_UINT64 ) { // patch gcc 5 & 6 -O3 bug
-#endif // __GNUC_PREREQ(7,0)
+		#endif // __GNUC_PREREQ(7,0)
 			base10_128( os, val / P10_UINT64 );			// recursive
 			fmt( os, "%.19lu", (uint64_t)(val % P10_UINT64) );
@@ -187,5 +187,5 @@
 		(ostype &)(os | ullli); ends( os );
 	} // ?|?
-#endif // __SIZEOF_INT128__
+	#endif // __SIZEOF_INT128__
 
 	#define PrintWithDP( os, format, val, ... ) \
@@ -361,5 +361,4 @@
 		setPrt$( os, false );							// turn off
 		setNL$( os, true );
-		flush( os );
 		return sepOff( os );							// prepare for next line
 	} // nl
@@ -808,5 +807,5 @@
 
 
-forall( istype & | istream( istype ) ) {
+forall( istype & | basic_istream( istype ) ) {
 	istype & ?|?( istype & is, bool & b ) {
 		char val[6];
@@ -918,5 +917,5 @@
 	} // ?|?
 
-#if defined( __SIZEOF_INT128__ )
+	#if defined( __SIZEOF_INT128__ )
 	istype & ?|?( istype & is, int128 & llli ) {
 		return (istype &)(is | (unsigned int128 &)llli);
@@ -944,5 +943,5 @@
 		(istype &)(is | ullli); ends( is );
 	} // ?|?
-#endif // __SIZEOF_INT128__
+	#endif // __SIZEOF_INT128__
 
 	istype & ?|?( istype & is, float & f ) {
@@ -1035,5 +1034,7 @@
 		return is;
 	} // nlOff
-
+} // distribution
+
+forall( istype & | istream( istype ) ) {
 	istype & acquire( istype & is ) {
 		acquire( is );									// call void returning
@@ -1044,5 +1045,5 @@
 // *********************************** manipulators ***********************************
 
-forall( istype & | istream( istype ) ) {
+forall( istype & | basic_istream( istype ) ) {
 	istype & ?|?( istype & is, _Istream_Cstr f ) {
 		// skip xxx
@@ -1092,5 +1093,5 @@
 
 #define InputFMTImpl( T, CODE ) \
-forall( istype & | istream( istype ) ) { \
+forall( istype & | basic_istream( istype ) ) { \
 	istype & ?|?( istype & is, _Istream_Manip(T) f ) { \
 		enum { size = 16 }; \
@@ -1125,5 +1126,5 @@
 InputFMTImpl( long double, "Lf" )
 
-forall( istype & | istream( istype ) ) {
+forall( istype & | basic_istream( istype ) ) {
 	istype & ?|?( istype & is, _Istream_Manip(float _Complex) fc ) {
 		float re, im;
Index: libcfa/src/iostream.hfa
===================================================================
--- libcfa/src/iostream.hfa	(revision a6c45c6127052add9b2043d7186d1f3f7c51f00c)
+++ libcfa/src/iostream.hfa	(revision 2d8a770eb55adae00a48d7d9da1b194d16f41862)
@@ -10,6 +10,6 @@
 // Created On       : Wed May 27 17:56:53 2015
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sun Apr 25 11:22:03 2021
-// Update Count     : 397
+// Last Modified On : Wed Apr 28 20:37:56 2021
+// Update Count     : 401
 //
 
@@ -49,9 +49,10 @@
 	void ends( ostype & );								// end of output statement
 	int fmt( ostype &, const char format[], ... ) __attribute__(( format(printf, 2, 3) ));
-	int flush( ostype & );
 }; // basic_ostream
 	
 trait ostream( ostype & | basic_ostream( ostype ) ) {
 	bool fail( ostype & );								// operation failed?
+	void clear( ostype & );
+	int flush( ostype & );
 	void open( ostype &, const char name[], const char mode[] );
 	void close( ostype & );
@@ -97,10 +98,10 @@
 	ostype & ?|?( ostype &, unsigned long long int );
 	void ?|?( ostype &, unsigned long long int );
-#if defined( __SIZEOF_INT128__ )
+	#if defined( __SIZEOF_INT128__ )
 	ostype & ?|?( ostype &, int128 );
 	void ?|?( ostype &, int128 );
 	ostype & ?|?( ostype &, unsigned int128 );
 	void ?|?( ostype &, unsigned int128 );
-#endif // __SIZEOF_INT128__
+	#endif // __SIZEOF_INT128__
 
 	ostype & ?|?( ostype &, float );
@@ -121,7 +122,7 @@
 	void ?|?( ostype &, const char [] );
 	// ostype & ?|?( ostype &, const char16_t * );
-#if ! ( __ARM_ARCH_ISA_ARM == 1 && __ARM_32BIT_STATE == 1 ) // char32_t == wchar_t => ambiguous
+	#if ! ( __ARM_ARCH_ISA_ARM == 1 && __ARM_32BIT_STATE == 1 ) // char32_t == wchar_t => ambiguous
 	// ostype & ?|?( ostype &, const char32_t * );
-#endif // ! ( __ARM_ARCH_ISA_ARM == 1 && __ARM_32BIT_STATE == 1 )
+	#endif // ! ( __ARM_ARCH_ISA_ARM == 1 && __ARM_32BIT_STATE == 1 )
 	// ostype & ?|?( ostype &, const wchar_t * );
 	ostype & ?|?( ostype &, const void * );
@@ -294,18 +295,22 @@
 
 
-trait istream( istype & ) {
+trait basic_istream( istype & ) {
+	bool getANL( istype & );							// get scan newline (on/off)
 	void nlOn( istype & );								// read newline
 	void nlOff( istype & );								// scan newline
-	bool getANL( istype & );							// get scan newline (on/off)
 
 	void ends( istype & os );							// end of output statement
+	int fmt( istype &, const char format[], ... ) __attribute__(( format(scanf, 2, 3) ));
+	istype & ungetc( istype &, char );
+	bool eof( istype & );
+}; // basic_istream
+
+trait istream( istype & | basic_istream( istype ) ) {
 	bool fail( istype & );
-	int eof( istype & );
+	void clear( istype & );
 	void open( istype & is, const char name[] );
 	void close( istype & is );
-	istype & read( istype &, char *, size_t );
-	istype & ungetc( istype &, char );
-	int fmt( istype &, const char format[], ... ) __attribute__(( format(scanf, 2, 3) ));
-	void acquire( istype & );
+	istype & read( istype &, char [], size_t );
+	void acquire( istype & );							// concurrent access
 }; // istream
 
@@ -314,5 +319,5 @@
 }; // readable
 
-forall( istype & | istream( istype ) ) {
+forall( istype & | basic_istream( istype ) ) {
 	istype & ?|?( istype &, bool & );
 	void ?|?( istype &, bool & );
@@ -341,10 +346,10 @@
 	istype & ?|?( istype &, unsigned long long int & );
 	void ?|?( istype &, unsigned long long int & );
-#if defined( __SIZEOF_INT128__ )
+	#if defined( __SIZEOF_INT128__ )
 	istype & ?|?( istype &, int128 & );
 	void ?|?( istype &, int128 & );
 	istype & ?|?( istype &, unsigned int128 & );
 	void ?|?( istype &, unsigned int128 & );
-#endif // __SIZEOF_INT128__
+	#endif // __SIZEOF_INT128__
 
 	istype & ?|?( istype &, float & );
@@ -372,4 +377,7 @@
 	istype & nlOn( istype & );
 	istype & nlOff( istype & );
+} // distribution
+
+forall( istype & | istream( istype ) ) {
 	istype & acquire( istype & );
 } // distribution
@@ -402,5 +410,5 @@
 	_Istream_Cstr & wdi( unsigned int w, _Istream_Cstr & fmt ) { fmt.wd = w; return fmt; }
 } // distribution
-forall( istype & | istream( istype ) ) {
+forall( istype & | basic_istream( istype ) ) {
 	istype & ?|?( istype & is, _Istream_Cstr f );
 	void ?|?( istype & is, _Istream_Cstr f );
@@ -415,5 +423,5 @@
 	_Istream_Char & ignore( _Istream_Char & fmt ) { fmt.ignore = true; return fmt; }
 } // distribution
-forall( istype & | istream( istype ) ) {
+forall( istype & | basic_istream( istype ) ) {
 	istype & ?|?( istype & is, _Istream_Char f );
 	void ?|?( istype & is, _Istream_Char f );
@@ -434,5 +442,5 @@
 	_Istream_Manip(T) & wdi( unsigned int w, _Istream_Manip(T) & fmt ) { fmt.wd = w; return fmt; } \
 } /* distribution */ \
-forall( istype & | istream( istype ) ) { \
+forall( istype & | basic_istream( istype ) ) { \
 	istype & ?|?( istype & is, _Istream_Manip(T) f ); \
 	void ?|?( istype & is, _Istream_Manip(T) f ); \
Index: libcfa/src/strstream.cfa
===================================================================
--- libcfa/src/strstream.cfa	(revision a6c45c6127052add9b2043d7186d1f3f7c51f00c)
+++ libcfa/src/strstream.cfa	(revision 2d8a770eb55adae00a48d7d9da1b194d16f41862)
@@ -10,6 +10,6 @@
 // Created On       : Thu Apr 22 22:24:35 2021
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sat Apr 24 11:15:47 2021
-// Update Count     : 73
+// Last Modified On : Tue Apr 27 20:59:53 2021
+// Update Count     : 78
 // 
 
@@ -23,4 +23,5 @@
 #include <unistd.h>										// sbrk, sysconf
 
+
 // *********************************** strstream ***********************************
 
@@ -29,17 +30,17 @@
 
 // private
-bool sepPrt$( strstream & os ) { setNL$( os, false ); return os.sepOnOff$; }
-void sepReset$( strstream & os ) { os.sepOnOff$ = os.sepDefault$; }
-void sepReset$( strstream & os, bool reset ) { os.sepDefault$ = reset; os.sepOnOff$ = os.sepDefault$; }
-const char * sepGetCur$( strstream & os ) { return os.sepCur$; }
-void sepSetCur$( strstream & os, const char sepCur[] ) { os.sepCur$ = sepCur; }
-bool getNL$( strstream & os ) { return os.sawNL$; }
-void setNL$( strstream & os, bool state ) { os.sawNL$ = state; }
-bool getANL$( strstream & os ) { return os.nlOnOff$; }
-bool getPrt$( strstream & os ) { return os.prt$; }
-void setPrt$( strstream & os, bool state ) { os.prt$ = state; }
+bool sepPrt$( ostrstream & os ) { setNL$( os, false ); return os.sepOnOff$; }
+void sepReset$( ostrstream & os ) { os.sepOnOff$ = os.sepDefault$; }
+void sepReset$( ostrstream & os, bool reset ) { os.sepDefault$ = reset; os.sepOnOff$ = os.sepDefault$; }
+const char * sepGetCur$( ostrstream & os ) { return os.sepCur$; }
+void sepSetCur$( ostrstream & os, const char sepCur[] ) { os.sepCur$ = sepCur; }
+bool getNL$( ostrstream & os ) { return os.sawNL$; }
+void setNL$( ostrstream & os, bool state ) { os.sawNL$ = state; }
+bool getANL$( ostrstream & os ) { return os.nlOnOff$; }
+bool getPrt$( ostrstream & os ) { return os.prt$; }
+void setPrt$( ostrstream & os, bool state ) { os.prt$ = state; }
 
 // public
-void ?{}( strstream & os, char buf[], size_t size ) {
+void ?{}( ostrstream & os, char buf[], size_t size ) {
 	os.buf$ = buf;
 	os.size$ = size;
@@ -55,8 +56,8 @@
 } // ?{}
 
-void sepOn( strstream & os ) { os.sepOnOff$ = ! getNL$( os ); }
-void sepOff( strstream & os ) { os.sepOnOff$ = false; }
+void sepOn( ostrstream & os ) { os.sepOnOff$ = ! getNL$( os ); }
+void sepOff( ostrstream & os ) { os.sepOnOff$ = false; }
 
-bool sepDisable( strstream & os ) {
+bool sepDisable( ostrstream & os ) {
 	bool temp = os.sepDefault$;
 	os.sepDefault$ = false;
@@ -65,5 +66,5 @@
 } // sepDisable
 
-bool sepEnable( strstream & os ) {
+bool sepEnable( ostrstream & os ) {
 	bool temp = os.sepDefault$;
 	os.sepDefault$ = true;
@@ -72,37 +73,37 @@
 } // sepEnable
 
-void nlOn( strstream & os ) { os.nlOnOff$ = true; }
-void nlOff( strstream & os ) { os.nlOnOff$ = false; }
+void nlOn( ostrstream & os ) { os.nlOnOff$ = true; }
+void nlOff( ostrstream & os ) { os.nlOnOff$ = false; }
 
-const char * sepGet( strstream & os ) { return os.separator$; }
-void sepSet( strstream & os, const char s[] ) {
+const char * sepGet( ostrstream & os ) { return os.separator$; }
+void sepSet( ostrstream & os, const char s[] ) {
 	assert( s );
-	strncpy( os.separator$, s, strstream_sepSize - 1 );
-	os.separator$[strstream_sepSize - 1] = '\0';
+	strncpy( os.separator$, s, ostrstream_sepSize - 1 );
+	os.separator$[ostrstream_sepSize - 1] = '\0';
 } // sepSet
 
-const char * sepGetTuple( strstream & os ) { return os.tupleSeparator$; }
-void sepSetTuple( strstream & os, const char s[] ) {
+const char * sepGetTuple( ostrstream & os ) { return os.tupleSeparator$; }
+void sepSetTuple( ostrstream & os, const char s[] ) {
 	assert( s );
-	strncpy( os.tupleSeparator$, s, strstream_sepSize - 1 );
-	os.tupleSeparator$[strstream_sepSize - 1] = '\0';
+	strncpy( os.tupleSeparator$, s, ostrstream_sepSize - 1 );
+	os.tupleSeparator$[ostrstream_sepSize - 1] = '\0';
 } // sepSet
 
-void ends( strstream & os ) {
+void ends( ostrstream & os ) {
 	if ( getANL$( os ) ) nl( os );
 	else setPrt$( os, false );							// turn off
 } // ends
 
-int fmt( strstream & os, const char format[], ... ) {
+int fmt( ostrstream & os, const char format[], ... ) {
 	va_list args;
 	va_start( args, format );
 	int len = vsnprintf( os.buf$ + os.cursor$, os.size$ - os.cursor$, format, args );
+	va_end( args );
 	os.cursor$ += len;
 	if ( os.cursor$ >= os.size$ ) {						// cursor exceeded buffer size?
-		#define fmtmsg IO_MSG "strstream truncated write, buffer too small.\n"
+		#define fmtmsg IO_MSG "ostrstream truncated write, buffer too small.\n"
 		write( STDERR_FILENO, fmtmsg, sizeof(fmtmsg) - 1 );
 		abort();
 	} // if
-	va_end( args );
 
 	setPrt$( os, true );								// called in output cascade
@@ -111,15 +112,8 @@
 } // fmt
 
-int flush( strstream & ) {								// match trait, not used
-	return 0;
-} // flush
-
-strstream & write( strstream & os ) {
-	return write( os, stdout );
-} // write
-strstream & write( strstream & os, FILE * stream ) {
+ostrstream & write( ostrstream & os, FILE * stream ) {
 	if ( fwrite( os.buf$, 1, os.cursor$, stream ) != os.cursor$ ) {
-		#define writemsg IO_MSG "strstream write error.\n"
-		write( STDERR_FILENO, writemsg, sizeof(writemsg) - 1 );
+		#define ostrwritemsg IO_MSG "ostrstream write error.\n"
+		write( STDERR_FILENO, ostrwritemsg, sizeof(ostrwritemsg) - 1 );
 		abort();
 	} // if
@@ -127,5 +121,50 @@
 } // write
 
-strstream & sstr;
+ostrstream & write( ostrstream & os ) {
+	return write( os, stdout );
+} // write
+
+
+// *********************************** istrstream ***********************************
+
+
+// public
+void ?{}( istrstream & is, char buf[] ) {
+	is.buf$ = buf;
+	is.cursor$ = 0;
+	is.nlOnOff$ = false;
+} // ?{}
+
+bool getANL( istrstream & is ) { return is.nlOnOff$; }
+void nlOn( istrstream & is ) { is.nlOnOff$ = true; }
+void nlOff( istrstream & is ) { is.nlOnOff$ = false; }
+
+void ends( istrstream & is ) {
+} // ends
+
+int eof( istrstream & is ) {
+	return 0;
+} // eof
+
+istrstream &ungetc( istrstream & is, char c ) {
+	// if ( ungetc( c, (FILE *)(is.file$) ) == EOF ) {
+	// 	abort | IO_MSG "ungetc" | nl | strerror( errno );
+	// } // if
+	return is;
+} // ungetc
+
+int fmt( istrstream & is, const char format[], ... ) {
+	va_list args;
+	va_start( args, format );
+	// This does not work because vsscanf does not return buffer position.
+	int len = vsscanf( is.buf$ + is.cursor$, format, args );
+	va_end( args );
+	if ( len == EOF ) {
+		int j;
+		printf( "X %d%n\n", len, &j );
+	} // if
+	is.cursor$ += len;
+	return len;
+} // fmt
 
 // Local Variables: //
Index: libcfa/src/strstream.hfa
===================================================================
--- libcfa/src/strstream.hfa	(revision a6c45c6127052add9b2043d7186d1f3f7c51f00c)
+++ libcfa/src/strstream.hfa	(revision 2d8a770eb55adae00a48d7d9da1b194d16f41862)
@@ -10,6 +10,6 @@
 // Created On       : Thu Apr 22 22:20:59 2021
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sat Apr 24 11:17:33 2021
-// Update Count     : 37
+// Last Modified On : Tue Apr 27 20:58:50 2021
+// Update Count     : 41
 // 
 
@@ -20,9 +20,9 @@
 
 
-// *********************************** strstream ***********************************
+// *********************************** ostrstream ***********************************
 
 
-enum { strstream_sepSize = 16 };
-struct strstream {										// satisfied basic_ostream
+enum { ostrstream_sepSize = 16 };
+struct ostrstream {										// satisfied basic_ostream
 	char * buf$;
 	size_t size$;
@@ -34,45 +34,65 @@
 	bool sawNL$;
 	const char * sepCur$;
-	char separator$[strstream_sepSize];
-	char tupleSeparator$[strstream_sepSize];
-}; // strstream
+	char separator$[ostrstream_sepSize];
+	char tupleSeparator$[ostrstream_sepSize];
+}; // ostrstream
 
 // Satisfies basic_ostream
 
 // private
-bool sepPrt$( strstream & );
-void sepReset$( strstream & );
-void sepReset$( strstream &, bool );
-const char * sepGetCur$( strstream & );
-void sepSetCur$( strstream &, const char [] );
-bool getNL$( strstream & );
-void setNL$( strstream &, bool );
-bool getANL$( strstream & );
-bool getPrt$( strstream & );
-void setPrt$( strstream &, bool );
+bool sepPrt$( ostrstream & );
+void sepReset$( ostrstream & );
+void sepReset$( ostrstream &, bool );
+const char * sepGetCur$( ostrstream & );
+void sepSetCur$( ostrstream &, const char [] );
+bool getNL$( ostrstream & );
+void setNL$( ostrstream &, bool );
+bool getANL$( ostrstream & );
+bool getPrt$( ostrstream & );
+void setPrt$( ostrstream &, bool );
 
 // public
-void sepOn( strstream & );
-void sepOff( strstream & );
-bool sepDisable( strstream & );
-bool sepEnable( strstream & );
-void nlOn( strstream & );
-void nlOff( strstream & );
+void sepOn( ostrstream & );
+void sepOff( ostrstream & );
+bool sepDisable( ostrstream & );
+bool sepEnable( ostrstream & );
+void nlOn( ostrstream & );
+void nlOff( ostrstream & );
 
-const char * sepGet( strstream & );
-void sepSet( strstream &, const char [] );
-const char * sepGetTuple( strstream & );
-void sepSetTuple( strstream &, const char [] );
+const char * sepGet( ostrstream & );
+void sepSet( ostrstream &, const char [] );
+const char * sepGetTuple( ostrstream & );
+void sepSetTuple( ostrstream &, const char [] );
 
-void ends( strstream & );
-int fmt( strstream &, const char format[], ... ) __attribute__(( format(printf, 2, 3) ));
-int flush( strstream & );
+void ends( ostrstream & );
+int fmt( ostrstream &, const char format[], ... ) __attribute__(( format(printf, 2, 3) ));
 
-strstream & write( strstream & os );					// use stdout, default value not working
-strstream & write( strstream & os, FILE * stream = stdout );
+ostrstream & write( ostrstream & os, FILE * stream ); // FIX ME: use default = stdout
+ostrstream & write( ostrstream & os );
 
-void ?{}( strstream &, char buf[], size_t size );
+void ?{}( ostrstream &, char buf[], size_t size );
 
-extern strstream & sstr;
+
+// *********************************** istrstream ***********************************
+
+
+struct istrstream {
+	char * buf$;
+	size_t cursor$;
+	bool nlOnOff$;
+}; // istrstream
+
+// Satisfies basic_istream
+
+// public
+bool getANL( istrstream & );
+void nlOn( istrstream & );
+void nlOff( istrstream & );
+void ends( istrstream & );
+int fmt( istrstream &, const char format[], ... ) __attribute__(( format(scanf, 2, 3) ));
+istrstream & ungetc( istrstream & is, char c );
+int eof( istrstream & is );
+
+void ?{}( istrstream & is, char buf[] );
 
 // Local Variables: //
Index: src/Parser/parser.yy
===================================================================
--- src/Parser/parser.yy	(revision a6c45c6127052add9b2043d7186d1f3f7c51f00c)
+++ src/Parser/parser.yy	(revision 2d8a770eb55adae00a48d7d9da1b194d16f41862)
@@ -10,6 +10,6 @@
 // Created On       : Sat Sep  1 20:22:55 2001
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Wed Apr 14 18:13:44 2021
-// Update Count     : 4983
+// Last Modified On : Mon Apr 26 18:41:54 2021
+// Update Count     : 4990
 //
 
@@ -211,5 +211,5 @@
 } // forCtrl
 
-bool forall = false, yyy = false;						// aggregate have one or more forall qualifiers ?
+bool forall = false;									// aggregate have one or more forall qualifiers ?
 
 // https://www.gnu.org/software/bison/manual/bison.html#Location-Type
@@ -812,4 +812,6 @@
 		{ $$ = new ExpressionNode( build_cast( $2, $4 ) ); }
 	| '(' aggregate_control '&' ')' cast_expression		// CFA
+		{ $$ = new ExpressionNode( build_keyword_cast( $2, $5 ) ); }
+	| '(' aggregate_control '*' ')' cast_expression		// CFA
 		{ $$ = new ExpressionNode( build_keyword_cast( $2, $5 ) ); }
 	| '(' VIRTUAL ')' cast_expression					// CFA
@@ -2128,9 +2130,9 @@
 aggregate_data:
 	STRUCT vtable_opt
-		{ yyy = true; $$ = AggregateDecl::Struct; }
+		{ $$ = AggregateDecl::Struct; }
 	| UNION
-		{ yyy = true; $$ = AggregateDecl::Union; }
+		{ $$ = AggregateDecl::Union; }
 	| EXCEPTION											// CFA
-		{ yyy = true; $$ = AggregateDecl::Exception; }
+		{ $$ = AggregateDecl::Exception; }
 	  //		{ SemanticError( yylloc, "exception aggregate is currently unimplemented." ); $$ = AggregateDecl::NoAggregate; }
 	;
@@ -2138,17 +2140,17 @@
 aggregate_control:										// CFA
 	MONITOR
-		{ yyy = true; $$ = AggregateDecl::Monitor; }
+		{ $$ = AggregateDecl::Monitor; }
 	| MUTEX STRUCT
-		{ yyy = true; $$ = AggregateDecl::Monitor; }
+		{ $$ = AggregateDecl::Monitor; }
 	| GENERATOR
-		{ yyy = true; $$ = AggregateDecl::Generator; }
+		{ $$ = AggregateDecl::Generator; }
 	| MUTEX GENERATOR
 		{ SemanticError( yylloc, "monitor generator is currently unimplemented." ); $$ = AggregateDecl::NoAggregate; }
 	| COROUTINE
-		{ yyy = true; $$ = AggregateDecl::Coroutine; }
+		{ $$ = AggregateDecl::Coroutine; }
 	| MUTEX COROUTINE
 		{ SemanticError( yylloc, "monitor coroutine is currently unimplemented." ); $$ = AggregateDecl::NoAggregate; }
 	| THREAD
-		{ yyy = true; $$ = AggregateDecl::Thread; }
+		{ $$ = AggregateDecl::Thread; }
 	| MUTEX THREAD
 		{ SemanticError( yylloc, "monitor thread is currently unimplemented." ); $$ = AggregateDecl::NoAggregate; }
Index: tests/io/io-acquire.cfa
===================================================================
--- tests/io/io-acquire.cfa	(revision a6c45c6127052add9b2043d7186d1f3f7c51f00c)
+++ tests/io/io-acquire.cfa	(revision 2d8a770eb55adae00a48d7d9da1b194d16f41862)
@@ -10,6 +10,6 @@
 // Created On       : Mon Mar  1 18:40:09 2021
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Mar  2 12:06:35 2021
-// Update Count     : 17
+// Last Modified On : Tue Apr 27 11:49:34 2021
+// Update Count     : 18
 // 
 
@@ -43,5 +43,5 @@
 	int a, b, c, d, e, f, g, h, i;
 	for ( 100 ) {										// local protection
-		sin  | acquire | a | b | c | d | e | f | g | h | i;
+		sin | acquire | a | b | c | d | e | f | g | h | i;
 	}
 	{													// global protection (RAII)
Index: tests/strstream.cfa
===================================================================
--- tests/strstream.cfa	(revision a6c45c6127052add9b2043d7186d1f3f7c51f00c)
+++ tests/strstream.cfa	(revision 2d8a770eb55adae00a48d7d9da1b194d16f41862)
@@ -1,15 +1,37 @@
+// 
+// Cforall Version 1.0.0 Copyright (C) 2021 University of Waterloo
+// 
+// strstream.cfa -- 
+// 
+// Author           : Peter A. Buhr
+// Created On       : Wed Apr 28 21:47:35 2021
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Wed Apr 28 21:50:02 2021
+// Update Count     : 3
+// 
+
 #include <fstream.hfa>
 #include <strstream.hfa>
 
 int main() {
-    enum { size = 256 };
-    char buf[size];
-    strstream sstr = { buf, size };
-    int i = 3, j = 5, k = 7;
-    double x = 12345678.9, y = 98765.4321e-11;
+	enum { size = 256 };
+	char buf[size];										// output buffer
+	ostrstream osstr = { buf, size };					// bind output buffer/size
+	int i = 3, j = 5, k = 7;
+	double x = 12345678.9, y = 98765.4321e-11;
 
-    sstr | i | hex(j) | wd(10, k) | sci(x) | unit(eng(y));
-    write( sstr );
-    printf( "%s", buf );
-    sout | i | hex(j) | wd(10, k) | sci(x) | unit(eng(y));
+	osstr | i | hex(j) | wd(10, k) | sci(x) | unit(eng(y)); // same lines of output
+	write( osstr );
+	printf( "%s", buf );
+	sout | i | hex(j) | wd(10, k) | sci(x) | unit(eng(y));
+
+	// char buf2[] = "12 14 15 3.5 7e4";					// input buffer
+	// istrstream isstr = { buf2 };
+	// isstr | i | j | k | x | y;
+	// sout | i | j | k | x | y;
 }
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "cfa strstream.cfa" //
+// End: //
