Index: benchmark/readyQ/locality.cc
===================================================================
--- benchmark/readyQ/locality.cc	(revision a67279a7d35e6ea334963273724115ade26fe292)
+++ benchmark/readyQ/locality.cc	(revision a67279a7d35e6ea334963273724115ade26fe292)
@@ -0,0 +1,328 @@
+#include "rq_bench.hpp"
+
+#include <pthread.h>
+#include <semaphore.h>
+#include <sched.h>
+#include <unistd.h>
+
+#include <iostream>
+
+struct Result {
+	uint64_t count = 0;
+	uint64_t dmigs = 0;
+	uint64_t gmigs = 0;
+};
+
+struct Pthread {
+	static int usleep(useconds_t usec) {
+		return ::usleep(usec);
+	}
+};
+
+// ==================================================
+struct __attribute__((aligned(128))) MyData {
+	uint64_t _p1[16];  // padding
+	uint64_t * data;
+	size_t len;
+	int ttid;
+	size_t id;
+	uint64_t _p2[16];  // padding
+
+	MyData(size_t id, size_t size)
+		: data( (uintptr_t *)aligned_alloc(128, size * sizeof(uint64_t)) )
+		, len( size )
+		, ttid( sched_getcpu() )
+		, id( id )
+	{
+		for(size_t i = 0; i < this->len; i++) {
+			this->data[i] = 0;
+		}
+	}
+
+	uint64_t moved(int ttid) {
+		if(this->ttid == ttid) {
+			return 0;
+		}
+		this->ttid = ttid;
+		return 1;
+	}
+
+	__attribute__((noinline)) void access(size_t idx) {
+		size_t l = this->len;
+		this->data[idx % l] += 1;
+	}
+};
+
+// ==================================================
+struct __attribute__((aligned(128))) MyCtx {
+	struct MyData * volatile data;
+
+	struct {
+		struct MySpot ** ptr;
+		size_t len;
+	} spots;
+
+	sem_t sem;
+
+	Result result;
+
+	bool share;
+	size_t cnt;
+	int ttid;
+	size_t id;
+
+	MyCtx(MyData * d, MySpot ** spots, size_t len, size_t cnt, bool share, size_t id)
+		: data( d )
+		, spots{ .ptr = spots, .len = len }
+		, share( share )
+		, cnt( cnt )
+		, ttid( sched_getcpu() )
+		, id( id )
+	{
+		int ret = sem_init( &sem, false, 0 );
+		if(ret != 0) std::abort();
+	}
+
+	~MyCtx() {
+		int ret = sem_destroy( &sem );
+		if(ret != 0) std::abort();
+	}
+
+	uint64_t moved(int ttid) {
+		if(this->ttid == ttid) {
+			return 0;
+		}
+		this->ttid = ttid;
+		return 1;
+	}
+};
+
+// ==================================================
+// Atomic object where a single thread can wait
+// May exchanges data
+struct __attribute__((aligned(128))) MySpot {
+	MyCtx * volatile ptr;
+	size_t id;
+	uint64_t _p1[16];  // padding
+
+	MySpot(size_t id) : ptr( nullptr ), id( id ) {}
+
+
+	static inline MyCtx * one() {
+		return reinterpret_cast<MyCtx *>(1);
+	}
+
+	// Main handshake of the code
+	// Single seat, first thread arriving waits
+	// Next threads unblocks current one and blocks in its place
+	// if share == true, exchange data in the process
+	bool put( MyCtx & ctx, MyData * data, bool share) {
+		// Attempt to CAS our context into the seat
+		for(;;) {
+			MyCtx * expected = this->ptr;
+			if (expected == one()) { // Seat is closed, return
+				return true;
+			}
+
+			if (__atomic_compare_exchange_n(&this->ptr, &expected, &ctx, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
+				if(expected) {
+					if(share) {
+						expected->data = data;
+					}
+					sem_post(&expected->sem);
+				}
+				break; // We got the seat
+			}
+		}
+
+		// Block once on the seat
+		sem_wait(&ctx.sem);
+
+		// Someone woke us up, get the new data
+		return false;
+	}
+
+	// Shutdown the spot
+	// Wake current thread and mark seat as closed
+	void release() {
+		struct MyCtx * val = __atomic_exchange_n(&this->ptr, one(), __ATOMIC_SEQ_CST);
+		if (!val) {
+			return;
+		}
+
+		// Someone was there, release them
+		sem_post(&val->sem);
+	}
+};
+
+// ==================================================
+// Random number generator, Go's native one is to slow and global
+uint64_t __xorshift64( uint64_t & state ) {
+	uint64_t x = state;
+	x ^= x << 13;
+	x ^= x >> 7;
+	x ^= x << 17;
+	return state = x;
+}
+
+// ==================================================
+// Do some work by accessing 'cnt' cells in the array
+__attribute__((noinline)) void work(MyData & data, size_t cnt, uint64_t & state) {
+	for (size_t i = 0; i < cnt; i++) {
+		data.access(__xorshift64(state));
+	}
+}
+
+void thread_main( MyCtx & ctx ) {
+	uint64_t state = ctx.id;
+
+	// Wait for start
+	sem_wait(&ctx.sem);
+
+	// Main loop
+	for(;;) {
+		// Touch our current data, write to invalidate remote cache lines
+		work( *ctx.data, ctx.cnt, state );
+
+		// Wait on a random spot
+		uint64_t idx = __xorshift64(state) % ctx.spots.len;
+		bool closed = ctx.spots.ptr[idx]->put(ctx, ctx.data, ctx.share);
+
+		// Check if the experiment is over
+		if (closed) break;
+		if ( clock_mode && stop) break;
+		if (!clock_mode && ctx.result.count >= stop_count) break;
+
+		// Check everything is consistent
+		assert( ctx.data );
+
+		// write down progress and check migrations
+		int ttid = sched_getcpu();
+		ctx.result.count += 1;
+		ctx.result.gmigs += ctx.moved(ttid);
+		ctx.result.dmigs += ctx.data->moved(ttid);
+	}
+
+	__atomic_fetch_add(&threads_left, -1, __ATOMIC_SEQ_CST);
+}
+
+// ==================================================
+int main(int argc, char * argv[]) {
+	unsigned wsize = 2;
+	unsigned wcnt  = 2;
+	unsigned nspots = 0;
+	bool share = false;
+	option_t opt[] = {
+		BENCH_OPT,
+		{ 'n', "nspots", "Number of spots where threads sleep (nthreads - nspots are active at the same time)", nspots},
+		{ 'w', "worksize", "Size of the array for each threads, in words (64bit)", wsize},
+		{ 'c', "workcnt" , "Number of words to touch when working (random pick, cells can be picked more than once)", wcnt },
+		{ 's', "share"   , "Pass the work data to the next thread when blocking", share, parse_truefalse }
+	};
+	BENCH_OPT_PARSE("libfibre cycle benchmark");
+
+	std::cout.imbue(std::locale(""));
+	setlocale(LC_ALL, "");
+
+	unsigned long long global_count = 0;
+	unsigned long long global_gmigs = 0;
+	unsigned long long global_dmigs = 0;
+
+	if( nspots == 0 ) { nspots = nthreads - nprocs; }
+
+	uint64_t start, end;
+	{
+		cpu_set_t cpuset;
+		int ret = pthread_getaffinity_np( pthread_self(), sizeof(cpuset), &cpuset );
+		if(ret != 0) std::abort();
+
+		unsigned cnt = CPU_COUNT_S(sizeof(cpuset), &cpuset);
+		if(cnt > nprocs) {
+			unsigned extras = cnt - nprocs;
+			for(int i = 0; i < CPU_SETSIZE && extras > 0; i++) {
+				if(CPU_ISSET_S(i, sizeof(cpuset), &cpuset)) {
+					CPU_CLR_S(i, sizeof(cpuset), &cpuset);
+					extras--;
+				}
+			}
+
+			ret = pthread_setaffinity_np( pthread_self(), sizeof(cpuset), &cpuset );
+			if(ret != 0) std::abort();
+		}
+	}
+
+	{
+		MyData * data_arrays[nthreads];
+		for(size_t i = 0; i < nthreads; i++) {
+			data_arrays[i] = new MyData( i, wsize );
+		}
+
+		MySpot * spots[nspots];
+		for(unsigned i = 0; i < nspots; i++) {
+			spots[i] = new MySpot{ i };
+		}
+
+		threads_left = nthreads - nspots;
+		pthread_t threads[nthreads];
+		MyCtx * thddata[nthreads];
+		{
+			for(size_t i = 0; i < nthreads; i++) {
+				thddata[i] = new MyCtx(
+					data_arrays[i],
+					spots,
+					nspots,
+					wcnt,
+					share,
+					i
+				);
+				int ret = pthread_create( &threads[i], nullptr, reinterpret_cast<void * (*)(void *)>(thread_main), thddata[i] );
+				if(ret != 0) std::abort();
+			}
+
+			bool is_tty = isatty(STDOUT_FILENO);
+			start = getTimeNsec();
+
+			for(size_t i = 0; i < nthreads; i++) {
+				sem_post(&thddata[i]->sem);
+			}
+			wait<Pthread>(start, is_tty);
+
+			stop = true;
+			end = getTimeNsec();
+			printf("\nDone\n");
+
+			for(size_t i = 0; i < nthreads; i++) {
+				sem_post(&thddata[i]->sem);
+				int ret = pthread_join( threads[i], nullptr );
+				if(ret != 0) std::abort();
+				global_count += thddata[i]->result.count;
+				global_gmigs += thddata[i]->result.gmigs;
+				global_dmigs += thddata[i]->result.dmigs;
+			}
+		}
+
+		for(size_t i = 0; i < nthreads; i++) {
+			delete( data_arrays[i] );
+		}
+
+		for(size_t i = 0; i < nspots; i++) {
+			delete( spots[i] );
+		}
+	}
+
+	printf("Duration (ms)          : %'ld\n", to_miliseconds(end - start));
+	printf("Number of processors   : %'d\n", nprocs);
+	printf("Number of threads      : %'d\n", nthreads);
+	printf("Number of spots        : %'d\n", nspots);
+	printf("Work size (64bit words): %'15u\n", wsize);
+	printf("Total Operations(ops)  : %'15llu\n", global_count);
+	printf("Total G Migrations     : %'15llu\n", global_gmigs);
+	printf("Total D Migrations     : %'15llu\n", global_dmigs);
+	printf("Ops per second         : %'18.2lf\n", ((double)global_count) / to_fseconds(end - start));
+	printf("ns per ops             : %'18.2lf\n", ((double)(end - start)) / global_count);
+	printf("Ops per threads        : %'15llu\n", global_count / nthreads);
+	printf("Ops per procs          : %'15llu\n", global_count / nprocs);
+	printf("Ops/sec/procs          : %'18.2lf\n", (((double)global_count) / nprocs) / to_fseconds(end - start));
+	printf("ns per ops/procs       : %'18.2lf\n", ((double)(end - start)) / (global_count / nprocs));
+	fflush(stdout);
+}
Index: doc/LaTeXmacros/lstlang.sty
===================================================================
--- doc/LaTeXmacros/lstlang.sty	(revision a33dcd5cf6dd984b46de0674f101a57b65968bad)
+++ doc/LaTeXmacros/lstlang.sty	(revision a67279a7d35e6ea334963273724115ade26fe292)
@@ -8,6 +8,6 @@
 %% Created On       : Sat May 13 16:34:42 2017
 %% Last Modified By : Peter A. Buhr
-%% Last Modified On : Wed Sep 23 22:40:04 2020
-%% Update Count     : 24
+%% Last Modified On : Wed Feb 17 09:21:15 2021
+%% Update Count     : 27
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
@@ -113,10 +113,10 @@
 	morekeywords={
 		_Alignas, _Alignof, __alignof, __alignof__, asm, __asm, __asm__, __attribute, __attribute__,
-		auto, _Bool, catch, catchResume, choose, _Complex, __complex, __complex__, __const, __const__,
-		coroutine, disable, dtype, enable, exception, __extension__, fallthrough, fallthru, finally,
+		auto, basetypeof, _Bool, catch, catchResume, choose, _Complex, __complex, __complex__, __const, __const__,
+		coroutine, disable, dtype, enable, exception, __extension__, fallthrough, fallthru, finally, fixup,
 		__float80, float80, __float128, float128, forall, ftype, generator, _Generic, _Imaginary, __imag, __imag__,
 		inline, __inline, __inline__, __int128, int128, __label__, monitor, mutex, _Noreturn, one_t, or,
-		otype, restrict, __restrict, __restrict__, __signed, __signed__, _Static_assert, suspend, thread,
-		_Thread_local, throw, throwResume, timeout, trait, try, ttype, typeof, __typeof, __typeof__,
+		otype, restrict, __restrict, __restrict__, recover, report, __signed, __signed__, _Static_assert, suspend,
+		thread, _Thread_local, throw, throwResume, timeout, trait, try, ttype, typeof, __typeof, __typeof__,
 		virtual, __volatile, __volatile__, waitfor, when, with, zero_t,
     },
Index: doc/theses/andrew_beach_MMath/existing.tex
===================================================================
--- doc/theses/andrew_beach_MMath/existing.tex	(revision a33dcd5cf6dd984b46de0674f101a57b65968bad)
+++ doc/theses/andrew_beach_MMath/existing.tex	(revision a67279a7d35e6ea334963273724115ade26fe292)
@@ -14,5 +14,5 @@
 \section{Overloading and \lstinline{extern}}
 \CFA has extensive overloading, allowing multiple definitions of the same name
-to be defined.~\cite{Moss18}
+to be defined~\cite{Moss18}.
 \begin{cfa}
 char i; int i; double i;			$\C[3.75in]{// variable overload}$
@@ -46,5 +46,5 @@
 pointers using the ampersand (@&@) instead of the pointer asterisk (@*@). \CFA
 references may also be mutable or non-mutable. If mutable, a reference variable
-may be assigned to using the address-of operator (@&@), which converts the
+may be assigned using the address-of operator (@&@), which converts the
 reference to a pointer.
 \begin{cfa}
@@ -58,5 +58,5 @@
 \section{Constructors and Destructors}
 
-Both constructors and destructors are operators, which means they are just
+Both constructors and destructors are operators, which means they are
 functions with special operator names rather than type names in \Cpp. The
 special operator names may be used to call the functions explicitly (not
@@ -64,5 +64,5 @@
 
 In general, operator names in \CFA are constructed by bracketing an operator
-token with @?@, which indicates where the arguments. For example, infixed
+token with @?@, which indicates the position of the arguments. For example, infixed
 multiplication is @?*?@ while prefix dereference is @*?@. This syntax make it
 easy to tell the difference between prefix operations (such as @++?@) and
@@ -89,5 +89,5 @@
 definition, \CFA creates a default and copy constructor, destructor and
 assignment (like \Cpp). It is possible to define constructors/destructors for
-basic and existing types.
+basic and existing types (unlike \Cpp).
 
 \section{Polymorphism}
@@ -120,6 +120,6 @@
 	do_once(value);
 }
-void do_once(int i) { ... }  // provide assertion
-int i;
+void do_once(@int@ i) { ... }  // provide assertion
+@int@ i;
 do_twice(i); // implicitly pass assertion do_once to do_twice
 \end{cfa}
@@ -172,20 +172,21 @@
 declarations instead of parameters, returns, and local variable declarations.
 \begin{cfa}
-forall(dtype T)
+forall(dtype @T@)
 struct node {
-	node(T) * next;  // generic linked node
-	T * data;
-}
+	node(@T@) * next;  // generic linked node
+	@T@ * data;
+}
+node(@int@) inode;
 \end{cfa}
 The generic type @node(T)@ is an example of a polymorphic-type usage.  Like \Cpp
-templates usage, a polymorphic-type usage must specify a type parameter.
+template usage, a polymorphic-type usage must specify a type parameter.
 
 There are many other polymorphism features in \CFA but these are the ones used
 by the exception system.
 
-\section{Concurrency}
-\CFA has a number of concurrency features: @thread@, @monitor@, @mutex@
-parameters, @coroutine@ and @generator@. The two features that interact with
-the exception system are @thread@ and @coroutine@; they and their supporting
+\section{Control Flow}
+\CFA has a number of advanced control-flow features: @generator@, @coroutine@, @monitor@, @mutex@ parameters, and @thread@.
+The two features that interact with
+the exception system are @coroutine@ and @thread@; they and their supporting
 constructs are described here.
 
@@ -216,5 +217,5 @@
 CountUp countup;
 \end{cfa}
-Each coroutine has @main@ function, which takes a reference to a coroutine
+Each coroutine has a @main@ function, which takes a reference to a coroutine
 object and returns @void@.
 \begin{cfa}[numbers=left]
@@ -230,5 +231,5 @@
 In this function, or functions called by this function (helper functions), the
 @suspend@ statement is used to return execution to the coroutine's caller
-without terminating the coroutine.
+without terminating the coroutine's function.
 
 A coroutine is resumed by calling the @resume@ function, \eg @resume(countup)@.
@@ -242,5 +243,5 @@
 @resume(countup).next@.
 
-\subsection{Monitors and Mutex}
+\subsection{Monitor and Mutex Parameter}
 Concurrency does not guarantee ordering; without ordering results are
 non-deterministic. To claw back ordering, \CFA uses monitors and @mutex@
@@ -260,5 +261,5 @@
 and only one runs at a time.
 
-\subsection{Threads}
+\subsection{Thread}
 Functions, generators, and coroutines are sequential so there is only a single
 (but potentially sophisticated) execution path in a program. Threads introduce
@@ -268,6 +269,6 @@
 monitors and mutex parameters. For threads to work safely with other threads,
 also requires mutual exclusion in the form of a communication rendezvous, which
-also supports internal synchronization as for mutex objects. For exceptions
-only the basic two basic operations are important: thread fork and join.
+also supports internal synchronization as for mutex objects. For exceptions,
+only two basic thread operations are important: fork and join.
 
 Threads are created like coroutines with an associated @main@ function:
Index: doc/user/user.tex
===================================================================
--- doc/user/user.tex	(revision a33dcd5cf6dd984b46de0674f101a57b65968bad)
+++ doc/user/user.tex	(revision a67279a7d35e6ea334963273724115ade26fe292)
@@ -11,6 +11,6 @@
 %% Created On       : Wed Apr  6 14:53:29 2016
 %% Last Modified By : Peter A. Buhr
-%% Last Modified On : Mon Feb 15 13:48:53 2021
-%% Update Count     : 4452
+%% Last Modified On : Sun Mar  7 21:50:24 2021
+%% Update Count     : 4574
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
@@ -4076,4 +4076,117 @@
 
 
+\subsection{Concurrent Stream Access}
+
+When a stream is shared by multiple threads, input or output characters can be intermixed or cause failure.
+For example, if two threads execute the following:
+\begin{cfa}
+$\emph{thread\(_1\)}$ : sout | "abc " | "def ";
+$\emph{thread\(_2\)}$ : sout | "uvw " | "xyz ";
+\end{cfa}
+possible outputs are:
+\begin{cquote}
+\begin{tabular}{@{}l|l|l|l|l@{}}
+\begin{cfa}
+abc def
+uvw xyz 
+\end{cfa}
+&
+\begin{cfa}
+abc uvw xyz 
+def 
+\end{cfa}
+&
+\begin{cfa}
+uvw abc xyz def
+
+\end{cfa}
+&
+\begin{cfa}
+abuvwc dexf
+yz
+\end{cfa}
+&
+\begin{cfa}
+uvw abc def 
+xyz 
+\end{cfa}
+\end{tabular}
+\end{cquote}
+Concurrent operations can even corrupt the internal state of the stream resulting in failure.
+As a result, some form of mutual exclusion is required for concurrent stream access.
+
+A coarse-grained solution is to perform all stream operations via a single thread or within a monitor providing the necessary mutual exclusion for the stream.
+A fine-grained solution is to have a lock for each stream, which is acquired and released around stream operations by each thread.
+\CFA provides a fine-grained solution where a \Index{recursive lock} is acquired and released indirectly via a manipulator ©acquire© or instantiating an \Index{RAII} type specific for the kind of stream: ©osacquire©\index{ostream@©ostream©!osacquire@©osacquire©} for output streams and ©isacquire©\index{isacquire@©isacquire©}\index{istream@©istream©!isacquire@©isacquire©} for input streams.
+
+The common usage is manipulator ©acquire©\index{ostream@©ostream©!acquire@©acquire©} to lock a stream during a single cascaded I/O expression, where it should appear as the first item in a cascade list, \eg:
+\begin{cfa}
+$\emph{thread\(_1\)}$ : sout | @acquire@ | "abc " | "def ";   // manipulator
+$\emph{thread\(_2\)}$ : sout | @acquire@ | "uvw " | "xyz ";
+\end{cfa}
+Now, the order of the thread execution is still non-deterministic, but the output is constrained to two possible lines in either order.
+\begin{cquote}
+\def\VRfont{\fontfamily{pcr}\upshape\selectfont}
+\begin{tabular}{@{}l|l@{}}
+\begin{cfa}
+abc def
+uvw xyz
+\end{cfa}
+&
+\begin{cfa}
+uvw xyz
+abc def
+\end{cfa}
+\end{tabular}
+\end{cquote}
+In summary, the stream lock is acquired by the ©acquire© manipulator and implicitly released at the end of the cascaded I/O expression ensuring all operations in the expression occur atomically.
+
+To lock a stream across multiple I/O operations, declare an instance of the appropriate ©osacquire© or ©isacquire© type to implicitly acquire and release the stream lock for the object's duration, \eg:
+\begin{cfa}
+{	// acquire sout for block duration
+	@osacquire@ acq = { sout };				$\C{// named stream locker}$
+	sout | 1;
+	sout | @acquire@ | 2 | 3;				$\C{// unnecessary, but ok to acquire and release again}$
+	sout | 4;
+}	// implicitly release the lock when "acq" is deallocated
+\end{cfa}
+Note, the unnecessary ©acquire© manipulator works because the recursive stream-lock can be acquired/released multiple times by the owner thread.
+Hence, calls to functions that also acquire a stream lock for their output do not result in \Index{deadlock}.
+
+The previous values written by threads 1 and 2 can be read in concurrently:
+\begin{cfa}
+{	// acquire sin lock for block duration
+	@isacquire acq = { sin };@				$\C{// named stream locker}$
+	int x, y, z, w;
+	sin | x;
+	sin | @acquire@ | y | z;				$\C{// unnecessary, but ok to acquire and release again}$
+	sin | w;
+}	// implicitly release the lock when "acq" is deallocated
+\end{cfa}
+Again, the order of the reading threads is non-deterministic.
+Note, non-deterministic reading is rare.
+
+\Textbf{WARNING:} The general problem of \Index{nested locking} can occur if routines are called in an I/O sequence that block, \eg:
+\begin{cfa}
+sout | @acquire@ | "data:" | rtn( mon );	$\C{// mutex call on monitor}$
+\end{cfa}
+If the thread executing the I/O expression blocks in the monitor with the ©sout© lock, other threads writing to ©sout© also block until the thread holding the lock is unblocked and releases it.
+This scenario can lead to \Index{deadlock}, if the thread that is going to unblock the thread waiting in the monitor first writes to ©sout© (deadly embrace).
+To prevent nested locking, a simple precaution is to factor out the blocking call from the expression, \eg:
+\begin{cfa}
+int @data@ = rtn( mon );
+sout | acquire | "data:" | @data@;
+\end{cfa}
+
+\Textbf{WARNING:} ©printf©\index{printf@©printf©}, ©scanf©\index{scanf@©scanf©} and their derivatives are unsafe when used with user-level threading, as in \CFA.
+These stream routines use kernel-thread locking (©futex©\index{futex@©futex©}), which block kernel threads, to prevent interleaving of I/O.
+However, the following simple example illustrates how a deadlock can occur (other complex scenarios are possible).
+Assume a single kernel thread and two user-level threads calling ©printf©.
+One user-level thread acquires the I/O lock and is time-sliced while performing ©printf©.
+The other user-level thread then starts execution, calls ©printf©, and blocks the only kernel thread because it cannot acquire the I/O lock.
+It does not help if the kernel lock is multiple acquisition, \ie, the lock owner can acquire it multiple times, because it then results in two user threads in the ©printf© critical section, corrupting the stream.
+
+
+\begin{comment}
 \section{Types}
 
@@ -4154,4 +4267,5 @@
 process((int) s); // type is converted, no function is called
 \end{cfa}
+\end{comment}
 
 
@@ -4369,5 +4483,59 @@
 \begin{table}[hbt]
 \centering
-\input{../refrat/operidents}
+\begin{tabular}{@{}l@{\hspace{\parindentlnth}}l@{\hspace{\parindentlnth}}l@{}}
+\begin{tabular}{@{}ll@{}}
+©?[?]©	& subscripting \impl{?[?]}					\\
+©?()©	& function call \impl{?()}					\\
+©?++©	& postfix increment \impl{?++}				\\
+©?--©	& postfix decrement \impl{?--}				\\
+©++?©	& prefix increment \impl{++?}				\\
+©--?©	& prefix decrement \impl{--?}				\\
+©*?©	& dereference \impl{*?}						\\
+©+?©	& unary plus \impl{+?}						\\
+©-?©	& arithmetic negation \impl{-?}				\\
+©~?©	& bitwise negation \impl{~?}				\\
+©!?©	& logical complement \impl{"!?}				\\
+©?\?©	& exponentiation \impl{?\?}					\\
+©?*?©	& multiplication \impl{?*?}					\\
+©?/?©	& division \impl{?/?}						\\
+©?%?©	& remainder \impl{?%?}						\\
+\end{tabular}
+&
+\begin{tabular}{@{}ll@{}}
+©?+?©	& addition \impl{?+?}						\\
+©?-?©	& subtraction \impl{?-?}					\\
+©?<<?©	& left shift \impl{?<<?}					\\
+©?>>?©	& right shift \impl{?>>?}					\\
+©?<?©	& less than \impl{?<?}						\\
+©?<=?©	& less than or equal \impl{?<=?}			\\
+©?>=?©	& greater than or equal \impl{?>=?}			\\
+©?>?©	& greater than \impl{?>?}					\\
+©?==?©	& equality \impl{?==?}						\\
+©?!=?©	& inequality \impl{?"!=?}					\\
+©?&?©	& bitwise AND \impl{?&?}					\\
+©?^?©	& exclusive OR \impl{?^?}					\\
+©?|?©	& inclusive OR \impl{?"|?}					\\
+													\\
+													\\
+\end{tabular}
+&
+\begin{tabular}{@{}ll@{}}
+©?=?©	& simple assignment \impl{?=?}				\\
+©?\=?©	& exponentiation assignment \impl{?\=?}		\\
+©?*=?©	& multiplication assignment \impl{?*=?}		\\
+©?/=?©	& division assignment \impl{?/=?}			\\
+©?%=?©	& remainder assignment \impl{?%=?}			\\
+©?+=?©	& addition assignment \impl{?+=?}			\\
+©?-=?©	& subtraction assignment \impl{?-=?}		\\
+©?<<=?©	& left-shift assignment \impl{?<<=?}		\\
+©?>>=?©	& right-shift assignment \impl{?>>=?}		\\
+©?&=?©	& bitwise AND assignment \impl{?&=?}		\\
+©?^=?©	& exclusive OR assignment \impl{?^=?}		\\
+©?|=?©	& inclusive OR assignment \impl{?"|=?}		\\
+													\\
+													\\
+													\\
+\end{tabular}
+\end{tabular}
 \caption{Operator Identifiers}
 \label{opids}
@@ -6502,10 +6670,74 @@
 \label{s:CFAKeywords}
 
-\CFA introduces the following new keywords.
+\CFA introduces the following new \Index{keyword}s, which cannot be used as identifiers.
 
 \begin{cquote}
-\input{../refrat/keywords}
+\begin{tabular}{@{}lllllll@{}}
+\begin{tabular}{@{}l@{}}
+\Indexc{basetypeof}		\\
+\Indexc{choose}			\\
+\Indexc{coroutine}		\\
+\Indexc{disable}		\\
+\end{tabular}
+&
+\begin{tabular}{@{}l@{}}
+\Indexc{enable}			\\
+\Indexc{exception}		\\
+\Indexc{fallthrough}	\\
+\Indexc{fallthru}		\\
+\end{tabular}
+&
+\begin{tabular}{@{}l@{}}
+\Indexc{finally}		\\
+\Indexc{fixup}			\\
+\Indexc{forall}			\\
+\Indexc{generator}		\\
+\end{tabular}
+&
+\begin{tabular}{@{}l@{}}
+\Indexc{int128}			\\
+\Indexc{monitor}		\\
+\Indexc{mutex}			\\
+\Indexc{one_t}			\\
+\end{tabular}
+&
+\begin{tabular}{@{}l@{}}
+\Indexc{report}			\\
+\Indexc{suspend}		\\
+\Indexc{throw}			\\
+\Indexc{throwResume}	\\
+\end{tabular}
+&
+\begin{tabular}{@{}l@{}}
+\Indexc{trait}			\\
+\Indexc{try}			\\
+\Indexc{virtual}		\\
+\Indexc{waitfor}		\\
+\end{tabular}
+&
+\begin{tabular}{@{}l@{}}
+\Indexc{when}			\\
+\Indexc{with}			\\
+\Indexc{zero_t}			\\
+						\\
+\end{tabular}
+\end{tabular}
 \end{cquote}
-
+\CFA introduces the following new \Index{quasi-keyword}s, which can be used as identifiers.
+\begin{cquote}
+\begin{tabular}{@{}ll@{}}
+\begin{tabular}{@{}l@{}}
+\Indexc{catch}			\\
+\Indexc{catchResume}	\\
+\Indexc{finally}		\\
+\end{tabular}
+&
+\begin{tabular}{@{}l@{}}
+\Indexc{fixup}			\\
+\Indexc{or}				\\
+\Indexc{timeout}		\\
+\end{tabular}
+\end{tabular}
+\end{cquote}
 
 \section{Standard Headers}
Index: libcfa/src/concurrency/io/call.cfa.in
===================================================================
--- libcfa/src/concurrency/io/call.cfa.in	(revision a33dcd5cf6dd984b46de0674f101a57b65968bad)
+++ libcfa/src/concurrency/io/call.cfa.in	(revision a67279a7d35e6ea334963273724115ade26fe292)
@@ -132,5 +132,5 @@
 	extern int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event);
 
-	extern ssize_t splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags);
+	extern ssize_t splice(int fd_in, __off64_t *off_in, int fd_out, __off64_t *off_out, size_t len, unsigned int flags);
 	extern ssize_t tee(int fd_in, int fd_out, size_t len, unsigned int flags);
 }
@@ -366,5 +366,5 @@
 	}),
 	# CFA_HAVE_IORING_OP_SPLICE
-	Call('SPLICE', 'ssize_t splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags)', {
+	Call('SPLICE', 'ssize_t splice(int fd_in, __off64_t *off_in, int fd_out, __off64_t *off_out, size_t len, unsigned int flags)', {
 		'splice_fd_in': 'fd_in',
 		'splice_off_in': 'off_in ? (__u64)*off_in : (__u64)-1',
Index: libcfa/src/concurrency/iofwd.hfa
===================================================================
--- libcfa/src/concurrency/iofwd.hfa	(revision a33dcd5cf6dd984b46de0674f101a57b65968bad)
+++ libcfa/src/concurrency/iofwd.hfa	(revision a67279a7d35e6ea334963273724115ade26fe292)
@@ -91,5 +91,5 @@
 extern ssize_t cfa_read(int fd, void * buf, size_t count, __u64 submit_flags);
 extern ssize_t cfa_write(int fd, void * buf, size_t count, __u64 submit_flags);
-extern ssize_t cfa_splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags, __u64 submit_flags);
+extern ssize_t cfa_splice(int fd_in, __off64_t *off_in, int fd_out, __off64_t *off_out, size_t len, unsigned int flags, __u64 submit_flags);
 extern ssize_t cfa_tee(int fd_in, int fd_out, size_t len, unsigned int flags, __u64 submit_flags);
 
@@ -124,5 +124,5 @@
 void async_read(io_future_t & future, int fd, void * buf, size_t count, __u64 submit_flags);
 extern void async_write(io_future_t & future, int fd, void * buf, size_t count, __u64 submit_flags);
-extern void async_splice(io_future_t & future, int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags, __u64 submit_flags);
+extern void async_splice(io_future_t & future, int fd_in, __off64_t *off_in, int fd_out, __off64_t *off_out, size_t len, unsigned int flags, __u64 submit_flags);
 extern void async_tee(io_future_t & future, int fd_in, int fd_out, size_t len, unsigned int flags, __u64 submit_flags);
 
Index: libcfa/src/concurrency/monitor.hfa
===================================================================
--- libcfa/src/concurrency/monitor.hfa	(revision a33dcd5cf6dd984b46de0674f101a57b65968bad)
+++ libcfa/src/concurrency/monitor.hfa	(revision a67279a7d35e6ea334963273724115ade26fe292)
@@ -61,5 +61,5 @@
 static inline forall( T & | sized(T) | { void ^?{}( T & mutex ); } )
 void delete( T * th ) {
-	^(*th){};
+	if(th) ^(*th){};
 	free( th );
 }
Index: src/main.cc
===================================================================
--- src/main.cc	(revision a33dcd5cf6dd984b46de0674f101a57b65968bad)
+++ src/main.cc	(revision a67279a7d35e6ea334963273724115ade26fe292)
@@ -9,7 +9,7 @@
 // Author           : Peter Buhr and Rob Schluntz
 // Created On       : Fri May 15 23:12:02 2015
-// Last Modified By : Andrew Beach
-// Last Modified On : Fri Feb 19 14:59:00 2021
-// Update Count     : 643
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Sat Mar  6 15:49:00 2021
+// Update Count     : 656
 //
 
@@ -101,5 +101,5 @@
 static string PreludeDirector = "";
 
-static void parse_cmdline( int argc, char *argv[] );
+static void parse_cmdline( int argc, char * argv[] );
 static void parse( FILE * input, LinkageSpec::Spec linkage, bool shouldExit = false );
 static void dump( list< Declaration * > & translationUnit, ostream & out = cout );
@@ -159,5 +159,5 @@
 #define SIGPARMS int sig __attribute__(( unused )), siginfo_t * sfp __attribute__(( unused )), ucontext_t * cxt __attribute__(( unused ))
 
-static void Signal( int sig, void (*handler)(SIGPARMS), int flags ) {
+static void Signal( int sig, void (* handler)(SIGPARMS), int flags ) {
 	struct sigaction act;
 
@@ -166,5 +166,5 @@
 
 	if ( sigaction( sig, &act, nullptr ) == -1 ) {
-	    cerr << "*CFA runtime error* problem installing signal handler, error(" << errno << ") " << strerror( errno ) << endl;
+	    cerr << "*cfa-cpp compilation error* problem installing signal handler, error(" << errno << ") " << strerror( errno ) << endl;
 	    _exit( EXIT_FAILURE );
 	} // if
@@ -421,5 +421,5 @@
 			delete output;
 		} // if
-	} catch ( SemanticErrorException &e ) {
+	} catch ( SemanticErrorException & e ) {
 		if ( errorp ) {
 			cerr << "---AST at error:---" << endl;
@@ -432,5 +432,5 @@
 		} // if
 		return EXIT_FAILURE;
-	} catch ( UnimplementedError &e ) {
+	} catch ( UnimplementedError & e ) {
 		cout << "Sorry, " << e.get_what() << " is not currently implemented" << endl;
 		if ( output != &cout ) {
@@ -438,5 +438,5 @@
 		} // if
 		return EXIT_FAILURE;
-	} catch ( CompilerError &e ) {
+	} catch ( CompilerError & e ) {
 		cerr << "Compiler Error: " << e.get_what() << endl;
 		cerr << "(please report bugs to [REDACTED])" << endl;
@@ -445,4 +445,8 @@
 		} // if
 		return EXIT_FAILURE;
+	} catch ( std::bad_alloc & ) {
+		cerr << "*cfa-cpp compilation error* std::bad_alloc" << endl;
+		backtrace( 1 );
+		abort();
 	} catch ( ... ) {
 		exception_ptr eptr = current_exception();
@@ -451,8 +455,8 @@
 				rethrow_exception(eptr);
 			} else {
-				cerr << "Exception Uncaught and Unknown" << endl;
-			} // if
-		} catch(const exception& e) {
-			cerr << "Uncaught Exception \"" << e.what() << "\"\n";
+				cerr << "*cfa-cpp compilation error* exception uncaught and unknown" << endl;
+			} // if
+		} catch( const exception & e ) {
+			cerr << "*cfa-cpp compilation error* uncaught exception \"" << e.what() << "\"\n";
 		} // try
 		return EXIT_FAILURE;
@@ -544,5 +548,5 @@
 enum { printoptsSize = sizeof( printopts ) / sizeof( printopts[0] ) };
 
-static void usage( char *argv[] ) {
+static void usage( char * argv[] ) {
     cout << "Usage: " << argv[0] << " [options] [input-file (default stdin)] [output-file (default stdout)], where options are:" << endl;
 	int i = 0, j = 1;									// j skips starting colon
