Index: benchmark/benchcltr.hfa
===================================================================
--- benchmark/benchcltr.hfa	(revision 7f51b9dc349a76568aa4d3de2895e098988e60b4)
+++ benchmark/benchcltr.hfa	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
@@ -1,8 +1,16 @@
 #pragma once
+#include <assert.h>
+#include <stdint.h>
 
-#include <assert.h>
-#include <kernel.hfa>
-#include <thread.hfa>
-#include <stats.hfa>
+#ifdef __cforall
+	#include <kernel.hfa>
+	#include <thread.hfa>
+	#include <stats.hfa>
+#else
+#include <time.h>										// timespec
+#include <sys/time.h>									// timeval
+
+enum { TIMEGRAN = 1000000000LL };					// nanosecond granularity, except for timeval
+#endif
 
 #define BENCH_OPT_SHORT "d:p:t:SPV"
@@ -14,9 +22,4 @@
 	{"procstat",     no_argument      , 0, 'P'}, \
 	{"viewhalts",    no_argument      , 0, 'V'},
-
-#define BENCH_DECL \
-	double duration = 5; \
-	int nprocs = 1; \
-	int nthreads = 1;
 
 #define BENCH_OPT_CASE \
@@ -52,7 +55,24 @@
 		break;
 
+double duration = 5;
+int nprocs = 1;
+int nthreads = 1;
 bool silent = false;
+bool continuous = false;
 bool procstats = false;
 bool viewhalts = false;
+
+#define BENCH_OPT_CFA \
+	{'d', "duration",  "Duration of the experiments in seconds", duration }, \
+	{'t', "nthreads",  "Number of threads to use", nthreads }, \
+	{'p', "nprocs",    "Number of processors to use", nprocs }, \
+	{'S', "nostats",   "Don't print statistics", silent, parse_settrue }, \
+	{'C', "constats",  "Regularly print statistics", continuous, parse_settrue }, \
+	{'P', "procstat",  "Print statistics for each processors", procstats, parse_settrue }, \
+	{'V', "viewhalts", "Visualize halts, prints timestamp and Processor id for each halt.", viewhalts, parse_settrue },
+
+#ifdef __cforall
+#include <parseargs.hfa>
+
 struct cluster * the_benchmark_cluster = 0p;
 struct BenchCluster {
@@ -60,6 +80,6 @@
 };
 
-void ?{}( BenchCluster & this, int flags, int stats ) {
-	(this.self){ "Benchmark Cluster", flags };
+void ?{}( BenchCluster & this, int num_io, const io_context_params & io_params, int stats ) {
+	(this.self){ "Benchmark Cluster", num_io, io_params };
 
 	assert( the_benchmark_cluster == 0p );
@@ -105,4 +125,32 @@
 	}
 }
+#else
+uint64_t getTimeNsec() {
+	timespec curr;
+	clock_gettime( CLOCK_REALTIME, &curr );
+	return (int64_t)curr.tv_sec * TIMEGRAN + curr.tv_nsec;
+}
+
+uint64_t to_miliseconds( uint64_t durtn ) { return durtn / (TIMEGRAN / 1000LL); }
+double to_fseconds(uint64_t durtn ) { return durtn / (double)TIMEGRAN; }
+uint64_t from_fseconds(double sec) { return sec * TIMEGRAN; }
+
+
+void wait_duration(double duration, uint64_t & start, uint64_t & end, bool is_tty) {
+	for(;;) {
+		usleep(100000);
+		end = getTimeNsec();
+		uint64_t delta = end - start;
+		/*if(is_tty)*/ {
+			printf(" %.1f\r", to_fseconds(delta));
+			fflush(stdout);
+		}
+		if( delta >= from_fseconds(duration) ) {
+			break;
+		}
+	}
+}
+#endif
+
 
 void bench_usage( char * argv [] ) {
Index: benchmark/io/readv-posix.c
===================================================================
--- benchmark/io/readv-posix.c	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
+++ benchmark/io/readv-posix.c	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
@@ -0,0 +1,139 @@
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+extern "C" {
+	#include <locale.h>
+	#include <getopt.h>
+	#include <fcntl.h>
+	#include <sys/uio.h>
+}
+
+#include <unistd.h>
+
+#include <pthread.h>
+
+#include "../benchcltr.hfa"
+
+int fd;
+volatile bool run = false;
+volatile size_t count = 0;
+
+unsigned long int buflen = 50;
+
+void * reader_main( void * arg ) {
+      pthread_barrier_wait( (pthread_barrier_t*) arg );
+	/* paranoid */ assert( true == __atomic_load_n(&run, __ATOMIC_RELAXED) );
+
+	char data[buflen];
+	struct iovec iov = { data, buflen };
+
+	while(__atomic_load_n(&run, __ATOMIC_RELAXED)) {
+		int r = preadv2(fd, &iov, 1, 0, 0);
+		if(r < 0) {
+                  fprintf(stderr, "%s\n", strerror(-r));
+                  abort();
+            }
+
+		__atomic_fetch_add( &count, 1, __ATOMIC_SEQ_CST );
+	}
+
+      return NULL;
+}
+
+int main(int argc, char * argv[]) {
+	BENCH_DECL
+	unsigned flags = 0;
+	unsigned sublen = 16;
+
+      setlocale(LC_ALL, "");
+
+	for(;;) {
+		static struct option options[] = {
+			BENCH_OPT_LONG
+			{"polled-io",    required_argument, 0, 'i'},
+			{"bufsize",      required_argument, 0, 'b'},
+			{0, 0, 0, 0}
+		};
+
+		int idx = 0;
+		int opt = getopt_long(argc, argv, BENCH_OPT_SHORT "ib:", options, &idx);
+
+		const char * arg = optarg ? optarg : "";
+		char * end;
+		switch(opt) {
+			// Exit Case
+			case -1:
+				goto arg_loop;
+			BENCH_OPT_CASE
+			case 'i':
+				flags |= O_DIRECT;
+				break;
+			case 'b':
+				buflen = strtoul(arg, &end, 10);
+				if(*end != '\0' && buflen < 10) {
+					fprintf(stderr, "Buffer size must be at least 10, was %s\n", arg);
+					goto usage;
+				}
+				break;
+			default: /* ? */
+				fprintf(stderr, "%d\n", opt);
+			usage:
+				bench_usage( argv );
+				fprintf( stderr, "  -i, --polled-io          If set opens the file with O_DIRECT\n" );
+				fprintf( stderr, "  -b, --buflen=SIZE        Number of bytes to read per request\n" );
+				exit(EXIT_FAILURE);
+		}
+	}
+      arg_loop:
+
+	fd = open(__FILE__, flags);
+	if(fd < 0) {
+		fprintf(stderr, "Could not open source file\n");
+		exit(EXIT_FAILURE);
+	}
+
+	printf("Running %d threads, reading %lu bytes each, over %d processors for %f seconds\n", nthreads, buflen, nprocs, duration);
+
+	{
+		uint64_t start, end;
+		{
+			pthread_barrier_t barrier;
+                  pthread_barrier_init(&barrier, NULL, nthreads + 1);
+			{
+				pthread_t threads[nthreads];
+                        for(int i = 0; i < nthreads; i++) {
+                        	pthread_attr_t attr;
+                              pthread_attr_init( &attr );
+                              pthread_create( &threads[i], &attr, reader_main, &barrier );
+                        }
+
+				printf("Starting\n");
+				bool is_tty = isatty(STDOUT_FILENO);
+				start = getTimeNsec();
+				run = true;
+
+				pthread_barrier_wait( &barrier );
+				wait_duration(duration, start, end, is_tty);
+
+				run = false;
+				end = getTimeNsec();
+				printf("\nDone\n");
+
+                        for(int i = 0; i < nthreads; i++) {
+                              void * ret;
+                              pthread_join( threads[i], &ret );
+                        }
+			}
+                  pthread_barrier_destroy(&barrier);
+		}
+		printf("Took %'ld ms\n", to_miliseconds(end - start));
+		printf("Total reads      : %'15zu\n", count);
+		printf("Reads per second : %'18.2lf\n", ((double)count) / to_fseconds(end - start));
+		printf("Total read size  : %'15zu\n", buflen * count);
+		printf("Bytes per second : %'18.2lf\n", ((double)count * buflen) / to_fseconds(end - start));
+	}
+
+	close(fd);
+}
Index: benchmark/io/readv.cfa
===================================================================
--- benchmark/io/readv.cfa	(revision 7f51b9dc349a76568aa4d3de2895e098988e60b4)
+++ benchmark/io/readv.cfa	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
@@ -40,5 +40,11 @@
 int do_read(int fd, struct iovec * iov) {
 	// extern ssize_t cfa_preadv2(int, const struct iovec *, int, off_t, int, int = 0, Duration = -1`s, io_cancellation * = 0p, io_context * = 0p);
-	int sflags = 0;
+	int sflags = 0
+	#if defined(CFA_HAVE_IOSQE_ASYNC)
+		| CFA_IO_ASYNC
+	#else
+	#warning no CFA_IO_ASYNC support
+	#endif
+	;
 	if(fixed_file) {
 		sflags |= CFA_IO_FIXED_FD1;
@@ -63,80 +69,54 @@
 
 int main(int argc, char * argv[]) {
-	BENCH_DECL
+	int file_flags = 0;
 	unsigned num_io = 1;
-	io_context_params params;
-	int file_flags = 0;
 	unsigned sublen = 16;
+	unsigned nentries = 0;
 
-	arg_loop:
-	for(;;) {
-		static struct option options[] = {
-			BENCH_OPT_LONG
-			{"bufsize",       required_argument, 0, 'b'},
-			{"submitthread",  no_argument      , 0, 's'},
-			{"eagersubmit",   no_argument      , 0, 'e'},
-			{"kpollsubmit",   no_argument      , 0, 'k'},
-			{"kpollcomplete", no_argument      , 0, 'i'},
-			{"fixed-files",   no_argument      , 0, 'f'},
-			{"open-direct",   no_argument      , 0, 'o'},
-			{"submitlength",  required_argument, 0, 'l'},
-			{0, 0, 0, 0}
-		};
+	bool subthrd = false;
+	bool subeagr = false;
+	bool odirect = false;
+	bool kpollsb = false;
+	bool kpollcp = false;
 
-		int idx = 0;
-		int opt = getopt_long(argc, argv, BENCH_OPT_SHORT "b:sekil:", options, &idx);
+	cfa_option opt[] = {
+		BENCH_OPT_CFA
+		{'b', "bufsize",       "Number of bytes to read per request", buflen},
+		{'s', "submitthread",  "If set, cluster uses polling thread to submit I/O", subthrd, parse_settrue},
+		{'e', "eagersubmit",   "If set, cluster submits I/O eagerly but still aggregates submits", subeagr, parse_settrue},
+		{'f', "fixed-files",   "Pre-register files with the io_contexts", fixed_file, parse_settrue},
+		{'o', "open-direct",   "Open files with O_DIRECT flag, bypassing the file cache", odirect, parse_settrue},
+		{'k', "kpollsubmit",   "If set, cluster uses an in kernel thread to poll submission, implies -f, requires elevated permissions", kpollsb, parse_settrue},
+		{'i', "kpollcomplete", "If set, cluster polls fds for completions instead of relying on interrupts to get notifications, implies -o", kpollcp, parse_settrue},
+		{'l', "submitlength",  "Size of the buffer that stores ready submissions", sublen},
+		{'r', "numentries",    "Number of entries each of the io_context have", nentries},
+		{'n', "numcontexts",   "Number of io_contexts to the cluster", num_io},
+	};
+	int opt_cnt = sizeof(opt) / sizeof(cfa_option);
 
-		const char * arg = optarg ? optarg : "";
-		char * end;
-		switch(opt) {
-			// Exit Case
-			case -1:
-				break arg_loop;
-			BENCH_OPT_CASE
-			case 'b':
-				buflen = strtoul(arg, &end, 10);
-				if(*end != '\0' && buflen < 10) {
-					fprintf(stderr, "Buffer size must be at least 10, was %s\n", arg);
-					goto usage;
-				}
-				break;
-			case 's':
-				params.poller_submits = true;
-				break;
-			case 'e':
-				params.eager_submits = true;
-				break;
-			case 'k':
-				params.poll_submit = true;
-			case 'f':
-				fixed_file = true;
-				break;
-			case 'i':
-				params.poll_complete = true;
-			case 'o':
-				file_flags |= O_DIRECT;
-				break;
-			case 'l':
-				sublen = strtoul(arg, &end, 10);
-				if(*end != '\0' && sublen < 16) {
-					fprintf(stderr, "Submit length must be at least 16, was %s\n", arg);
-					goto usage;
-				}
-				// flags |= (sublen << CFA_CLUSTER_IO_BUFFLEN_OFFSET);
-				break;
-			default: /* ? */
-				fprintf(stderr, "%d\n", opt);
-			usage:
-				bench_usage( argv );
-				fprintf( stderr, "  -b, --buflen=SIZE        Number of bytes to read per request\n" );
-				fprintf( stderr, "  -u, --userthread         If set, cluster uses user-thread to poll I/O\n" );
-				fprintf( stderr, "  -s, --submitthread       If set, cluster uses polling thread to submit I/O\n" );
-				fprintf( stderr, "  -e, --eagersubmit        If set, cluster submits I/O eagerly but still aggregates submits\n" );
-				fprintf( stderr, "  -k, --kpollsubmit        If set, cluster uses IORING_SETUP_SQPOLL\n" );
-				fprintf( stderr, "  -i, --kpollcomplete      If set, cluster uses IORING_SETUP_IOPOLL\n" );
-				fprintf( stderr, "  -l, --submitlength=LEN   Max number of submitions that can be submitted together\n" );
-				exit(EXIT_FAILURE);
+	char **left;
+	parse_args( opt, opt_cnt, "[OPTIONS]...\ncforall yield benchmark", left );
+
+	if(kpollcp || odirect) {
+		if( (buflen % 512) != 0 ) {
+			fprintf(stderr, "Buffer length must be a multiple of 512 when using O_DIRECT, was %lu\n\n", buflen);
+			print_args_usage(opt, opt_cnt, "[OPTIONS]...\ncforall yield benchmark", true);
 		}
 	}
+
+	io_context_params params;
+
+	if( subthrd ) params.poller_submits = true;
+	if( subeagr ) params.eager_submits  = true;
+	if( kpollsb ) params.poll_submit    = true;
+	if( kpollcp ) params.poll_complete  = true;
+
+	if(params.poll_submit  ) fixed_file = true;
+	if(params.poll_complete) odirect    = true;
+
+	params.num_ready = sublen;
+	params.num_entries = nentries;
+
+	if(odirect) file_flags |= O_DIRECT;
 
 	int lfd = open(__FILE__, file_flags);
Index: benchmark/readyQ/yield.cfa
===================================================================
--- benchmark/readyQ/yield.cfa	(revision 7f51b9dc349a76568aa4d3de2895e098988e60b4)
+++ benchmark/readyQ/yield.cfa	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
@@ -43,29 +43,14 @@
 
 int main(int argc, char * argv[]) {
-	BENCH_DECL
+	unsigned num_io = 1;
+	io_context_params params;
 
-	for(;;) {
-		static struct option options[] = {
-			BENCH_OPT_LONG
-			{0, 0, 0, 0}
-		};
+	cfa_option opt[] = {
+		BENCH_OPT_CFA
+	};
+	int opt_cnt = sizeof(opt) / sizeof(cfa_option);
 
-		int idx = 0;
-		int opt = getopt_long(argc, argv, BENCH_OPT_SHORT, options, &idx);
-
-		const char * arg = optarg ? optarg : "";
-		char * end;
-		switch(opt) {
-			case -1:
-				goto run;
-			BENCH_OPT_CASE
-			default: /* ? */
-				fprintf( stderr, "Unkown option '%c'\n", opt);
-			usage:
-				bench_usage( argv );
-				exit(1);
-		}
-	}
-	run:
+	char **left;
+	parse_args( argc, argv, opt, opt_cnt, "[OPTIONS]...\ncforall yield benchmark", left );
 
 	{
@@ -73,5 +58,5 @@
 
 		Time start, end;
-		BenchCluster cl = { 0, CFA_STATS_READY_Q };
+		BenchCluster cl = { num_io, params, CFA_STATS_READY_Q };
 		{
 			BenchProc procs[nprocs];
Index: libcfa/src/concurrency/coroutine.cfa
===================================================================
--- libcfa/src/concurrency/coroutine.cfa	(revision 7f51b9dc349a76568aa4d3de2895e098988e60b4)
+++ libcfa/src/concurrency/coroutine.cfa	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
@@ -215,4 +215,8 @@
 		return cor;
 	}
+
+	struct $coroutine * __cfactx_cor_active(void) {
+		return active_coroutine();
+	}
 }
 
Index: libcfa/src/concurrency/invoke.c
===================================================================
--- libcfa/src/concurrency/invoke.c	(revision 7f51b9dc349a76568aa4d3de2895e098988e60b4)
+++ libcfa/src/concurrency/invoke.c	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
@@ -29,4 +29,5 @@
 // Called from the kernel when starting a coroutine or task so must switch back to user mode.
 
+extern struct $coroutine * __cfactx_cor_active(void);
 extern struct $coroutine * __cfactx_cor_finish(void);
 extern void __cfactx_cor_leave ( struct $coroutine * );
@@ -35,4 +36,8 @@
 extern void disable_interrupts() OPTIONAL_THREAD;
 extern void enable_interrupts( __cfaabi_dbg_ctx_param );
+
+struct exception_context_t * this_exception_context() {
+	return &__get_stack( __cfactx_cor_active() )->exception_context;
+}
 
 void __cfactx_invoke_coroutine(
Index: libcfa/src/concurrency/invoke.h
===================================================================
--- libcfa/src/concurrency/invoke.h	(revision 7f51b9dc349a76568aa4d3de2895e098988e60b4)
+++ libcfa/src/concurrency/invoke.h	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
@@ -98,4 +98,6 @@
 	}
 
+	struct exception_context_t * this_exception_context();
+
 	// struct which calls the monitor is accepting
 	struct __waitfor_mask_t {
Index: libcfa/src/concurrency/io.cfa
===================================================================
--- libcfa/src/concurrency/io.cfa	(revision 7f51b9dc349a76568aa4d3de2895e098988e60b4)
+++ libcfa/src/concurrency/io.cfa	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
@@ -359,12 +359,12 @@
 
 			// We got the lock
+			// Collect the submissions
 			unsigned to_submit = __collect_submitions( ring );
+
+			// Actually submit
 			int ret = __io_uring_enter( ring, to_submit, false );
-			if( ret < 0 ) {
-				unlock(ring.submit_q.lock);
-				return;
-			}
-
-			/* paranoid */ verify( ret > 0 || to_submit == 0 || (ring.ring_flags & IORING_SETUP_SQPOLL) );
+
+			unlock(ring.submit_q.lock);
+			if( ret < 0 ) return;
 
 			// Release the consumed SQEs
@@ -372,11 +372,9 @@
 
 			// update statistics
-			__STATS__( true,
+			__STATS__( false,
 				io.submit_q.submit_avg.rdy += to_submit;
 				io.submit_q.submit_avg.csm += ret;
 				io.submit_q.submit_avg.cnt += 1;
 			)
-
-			unlock(ring.submit_q.lock);
 		}
 		else {
Index: libcfa/src/concurrency/io/setup.cfa
===================================================================
--- libcfa/src/concurrency/io/setup.cfa	(revision 7f51b9dc349a76568aa4d3de2895e098988e60b4)
+++ libcfa/src/concurrency/io/setup.cfa	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
@@ -298,5 +298,11 @@
 		if( params_in.poll_complete ) params.flags |= IORING_SETUP_IOPOLL;
 
-		uint32_t nentries = params_in.num_entries;
+		uint32_t nentries = params_in.num_entries != 0 ? params_in.num_entries : 256;
+		if( !is_pow2(nentries) ) {
+			abort("ERROR: I/O setup 'num_entries' must be a power of 2\n");
+		}
+		if( params_in.poller_submits && params_in.eager_submits ) {
+			abort("ERROR: I/O setup 'poller_submits' and 'eager_submits' cannot be used together\n");
+		}
 
 		int fd = syscall(__NR_io_uring_setup, nentries, &params );
Index: libcfa/src/concurrency/iocall.cfa
===================================================================
--- libcfa/src/concurrency/iocall.cfa	(revision 7f51b9dc349a76568aa4d3de2895e098988e60b4)
+++ libcfa/src/concurrency/iocall.cfa	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
@@ -101,5 +101,4 @@
 	#endif
 
-
 	#define __submit_prelude \
 		if( 0 != (submit_flags & LINK_FLAGS) ) { errno = ENOTSUP; return -1; } \
@@ -110,6 +109,7 @@
 		struct io_uring_sqe * sqe; \
 		uint32_t idx; \
+		uint8_t sflags = REGULAR_FLAGS & submit_flags; \
 		[sqe, idx] = __submit_alloc( ring, (uint64_t)(uintptr_t)&data ); \
-		sqe->flags = REGULAR_FLAGS & submit_flags;
+		sqe->flags = sflags;
 
 	#define __submit_wait \
@@ -186,5 +186,12 @@
 			__submit_prelude
 
-			(*sqe){ IORING_OP_READV, fd, iov, iovcnt, offset };
+			sqe->opcode = IORING_OP_READV;
+			sqe->ioprio = 0;
+			sqe->fd = fd;
+			sqe->off = offset;
+			sqe->addr = (uint64_t)(uintptr_t)iov;
+			sqe->len = iovcnt;
+			sqe->rw_flags = 0;
+			sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0;
 
 			__submit_wait
Index: libcfa/src/exception.c
===================================================================
--- libcfa/src/exception.c	(revision 7f51b9dc349a76568aa4d3de2895e098988e60b4)
+++ libcfa/src/exception.c	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
@@ -59,11 +59,9 @@
 
 
-// Temperary global exception context. Does not work with concurency.
-static struct exception_context_t shared_stack = {NULL, NULL};
-
 // Get the current exception context.
 // There can be a single global until multithreading occurs, then each stack
-// needs its own. It will have to be updated to handle that.
-struct exception_context_t * this_exception_context() {
+// needs its own. We get this from libcfathreads (no weak attribute).
+__attribute__((weak)) struct exception_context_t * this_exception_context() {
+	static struct exception_context_t shared_stack = {NULL, NULL};
 	return &shared_stack;
 }
Index: libcfa/src/parseargs.cfa
===================================================================
--- libcfa/src/parseargs.cfa	(revision 7f51b9dc349a76568aa4d3de2895e098988e60b4)
+++ libcfa/src/parseargs.cfa	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
@@ -19,4 +19,5 @@
 	extern          long long int strtoll (const char* str, char** endptr, int base);
 	extern unsigned long long int strtoull(const char* str, char** endptr, int base);
+	extern                 double strtod  (const char* str, char** endptr);
 }
 
@@ -28,15 +29,5 @@
 extern char ** cfa_args_envp;
 
-void printopt(FILE * out, int width, int max, char sn, const char * ln, const char * help) {
-	int hwidth = max - (11 + width);
-	if(hwidth <= 0) hwidth = max;
-
-	fprintf(out, "  -%c, --%-*s   %.*s\n", sn, width, ln, hwidth, help);
-	for() {
-		help += min(strlen(help), hwidth);
-		if('\0' == *help) break;
-		fprintf(out, "%*s%.*s\n", width + 11, "", hwidth, help);
-	}
-}
+static void usage(char * cmd, cfa_option options[], size_t opt_count, const char * usage, FILE * out)  __attribute__ ((noreturn));
 
 void parse_args( cfa_option options[], size_t opt_count, const char * usage, char ** & left ) {
@@ -44,4 +35,6 @@
 }
 
+//-----------------------------------------------------------------------------
+// getopt_long wrapping
 void parse_args(
 	int argc,
@@ -53,6 +46,4 @@
 ) {
 	struct option optarr[opt_count + 2];
-	int width = 0;
-	int max_width = 1_000_000;
 	{
 		int idx = 0;
@@ -69,7 +60,4 @@
 				}
 				idx++;
-
-				int w = strlen(options[i].long_name);
-				if(w > width) width = w;
 			}
 		}
@@ -106,5 +94,5 @@
 				out = stdout;
 			case '?':
-				goto USAGE;
+				usage(argv[0], options, opt_count, usage, out);
 			default:
 				for(i; opt_count) {
@@ -115,5 +103,5 @@
 
 						fprintf(out, "Argument '%s' for option %c could not be parsed\n\n", arg, (char)opt);
-						goto USAGE;
+						usage(argv[0], options, opt_count, usage, out);
 					}
 				}
@@ -122,6 +110,41 @@
 
 	}
-
-	USAGE:;
+}
+
+//-----------------------------------------------------------------------------
+// Print usage
+static void printopt(FILE * out, int width, int max, char sn, const char * ln, const char * help) {
+	int hwidth = max - (11 + width);
+	if(hwidth <= 0) hwidth = max;
+
+	fprintf(out, "  -%c, --%-*s   %.*s\n", sn, width, ln, hwidth, help);
+	for() {
+		help += min(strlen(help), hwidth);
+		if('\0' == *help) break;
+		fprintf(out, "%*s%.*s\n", width + 11, "", hwidth, help);
+	}
+}
+
+void print_args_usage(cfa_option options[], size_t opt_count, const char * usage, bool error)  __attribute__ ((noreturn)) {
+	usage(cfa_args_argv[0], options, opt_count, usage, error ? stderr : stdout);
+}
+
+void print_args_usage(int , char * argv[], cfa_option options[], size_t opt_count, const char * usage, bool error)  __attribute__ ((noreturn)) {
+	usage(argv[0], options, opt_count, usage, error ? stderr : stdout);
+}
+
+static void usage(char * cmd, cfa_option options[], size_t opt_count, const char * help, FILE * out) __attribute__((noreturn)) {
+	int width = 0;
+	{
+		int idx = 0;
+		for(i; opt_count) {
+			if(options[i].long_name) {
+				int w = strlen(options[i].long_name);
+				if(w > width) width = w;
+			}
+		}
+	}
+
+	int max_width = 1_000_000;
 	int outfd = fileno(out);
 	if(isatty(outfd)) {
@@ -132,5 +155,5 @@
 	}
 
-	fprintf(out, "Usage:\n  %s %s\n", argv[0], usage);
+	fprintf(out, "Usage:\n  %s %s\n", cmd, help);
 
 	for(i; opt_count) {
@@ -141,4 +164,6 @@
 }
 
+//-----------------------------------------------------------------------------
+// Typed argument parsing
 bool parse_yesno(const char * arg, bool & value ) {
 	if(strcmp(arg, "yes") == 0) {
@@ -167,4 +192,13 @@
 bool parse(const char * arg, const char * & value ) {
 	value = arg;
+	return true;
+}
+
+bool parse(const char * arg, int & value) {
+	char * end;
+	int r = strtoll(arg, &end, 10);
+	if(*end != '\0') return false;
+
+	value = r;
 	return true;
 }
@@ -200,10 +234,10 @@
 }
 
-bool parse(const char * arg, int & value) {
-	char * end;
-	int r = strtoll(arg, &end, 10);
-	if(*end != '\0') return false;
-
-	value = r;
-	return true;
-}
+bool parse(const char * arg, double & value) {
+	char * end;
+	double r = strtod(arg, &end);
+	if(*end != '\0') return false;
+
+	value = r;
+	return true;
+}
Index: libcfa/src/parseargs.hfa
===================================================================
--- libcfa/src/parseargs.hfa	(revision 7f51b9dc349a76568aa4d3de2895e098988e60b4)
+++ libcfa/src/parseargs.hfa	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
@@ -34,4 +34,7 @@
 void parse_args( int argc, char * argv[], cfa_option options[], size_t opt_count, const char * usage, char ** & left );
 
+void print_args_usage(cfa_option options[], size_t opt_count, const char * usage, bool error)  __attribute__ ((noreturn));
+void print_args_usage(int argc, char * argv[], cfa_option options[], size_t opt_count, const char * usage, bool error)  __attribute__ ((noreturn));
+
 bool parse_yesno   (const char *, bool & );
 bool parse_settrue (const char *, bool & );
@@ -39,6 +42,7 @@
 
 bool parse(const char *, const char * & );
+bool parse(const char *, int & );
 bool parse(const char *, unsigned & );
 bool parse(const char *, unsigned long & );
 bool parse(const char *, unsigned long long & );
-bool parse(const char *, int & );
+bool parse(const char *, double & );
Index: tests/Makefile.am
===================================================================
--- tests/Makefile.am	(revision 7f51b9dc349a76568aa4d3de2895e098988e60b4)
+++ tests/Makefile.am	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
@@ -163,4 +163,11 @@
 	$(CFACOMPILETEST) -DERR2 -c -fsyntax-only -o $(abspath ${@})
 
+# Exception Tests
+# Test with libcfathread; it changes how storage works.
+
+exceptions/%-threads : exceptions/%.cfa $(CFACCBIN)
+	$(CFACOMPILETEST) -include exceptions/with-threads.hfa -c -o $(abspath ${@}).o
+	$(CFACCLOCAL) $($(shell echo "${@}_FLAGSLD" | sed 's/-\|\//_/g')) $(abspath ${@}).o -o $(abspath ${@})
+
 #------------------------------------------------------------------------------
 # Other targets
Index: tests/exceptions/.expect/conditional-threads.txt
===================================================================
--- tests/exceptions/.expect/conditional-threads.txt	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
+++ tests/exceptions/.expect/conditional-threads.txt	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
@@ -0,0 +1,2 @@
+Caught num_error: expected=2 actual=2.
+Caught num_error: expected=2 actual=2.
Index: tests/exceptions/.expect/defaults-threads.txt
===================================================================
--- tests/exceptions/.expect/defaults-threads.txt	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
+++ tests/exceptions/.expect/defaults-threads.txt	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
@@ -0,0 +1,10 @@
+Should be printed.
+jump catch handler.
+jump default handler.
+Catch unhandled_exception.
+cross terminate throw
+cross terminate default
+cross terminate catch
+cross resume throw
+cross resume default
+cross resume catch
Index: tests/exceptions/.expect/finally-threads.txt
===================================================================
--- tests/exceptions/.expect/finally-threads.txt	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
+++ tests/exceptions/.expect/finally-threads.txt	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
@@ -0,0 +1,21 @@
+termination throw
+finally during unwind
+Exiting: termination inner finally
+termination catch
+finally after catch
+Exiting: termination outer finally
+
+resumption throw
+resumption catch
+finally after resume
+Exiting: resumption inner finally
+finally after catch
+Exiting: resumption outer finally
+
+walking out of try
+walking through finally
+Exiting: walking finally
+
+jumping out of try
+jumping through finally
+Exiting: jumping finally
Index: tests/exceptions/.expect/resume-threads.txt
===================================================================
--- tests/exceptions/.expect/resume-threads.txt	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
+++ tests/exceptions/.expect/resume-threads.txt	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
@@ -0,0 +1,36 @@
+simple throw
+simple catch
+Exiting: simple catch clause
+end of try clause
+Exiting: simple try clause
+
+catch-all
+
+throwing child exception
+inner parent match
+
+caught yin as yin
+
+rethrow inner try
+caught throw, will rethrow
+Exiting: rethrowing catch clause
+caught rethrow
+Exiting: rethrow catch clause
+Exiting: rethrow inner try
+
+caught yin, will throw yang
+caught yang
+
+throwing first exception
+caught first exception
+throwing second exception
+caught second exception
+recaught first exception
+
+inner catch
+inner catch
+outer catch
+
+throw
+rethrow
+handle
Index: tests/exceptions/.expect/terminate-threads.txt
===================================================================
--- tests/exceptions/.expect/terminate-threads.txt	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
+++ tests/exceptions/.expect/terminate-threads.txt	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
@@ -0,0 +1,34 @@
+simple throw
+Exiting: simple try clause
+simple catch
+Exiting: simple catch clause
+
+catch-all
+
+throwing child exception
+inner parent match
+
+caught yin as yin
+
+rethrow inner try
+Exiting: rethrow inner try
+caught throw, will rethrow
+Exiting: rethrowing catch clause
+caught rethrow
+Exiting: rethrow catch clause
+
+caught yin, will throw yang
+caught yang
+
+throwing first exception
+caught first exception
+throwing second exception
+caught second exception
+recaught first exception
+
+inner catch
+outer catch
+
+throw
+rethrow
+handle
Index: tests/exceptions/terminate.cfa
===================================================================
--- tests/exceptions/terminate.cfa	(revision 7f51b9dc349a76568aa4d3de2895e098988e60b4)
+++ tests/exceptions/terminate.cfa	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
@@ -142,3 +142,2 @@
 	}
 }
-
Index: tests/exceptions/with-threads.hfa
===================================================================
--- tests/exceptions/with-threads.hfa	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
+++ tests/exceptions/with-threads.hfa	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
@@ -0,0 +1,8 @@
+// Header used to force linking with libcfathread.
+
+// I know its "with threads" but a coroutine is enough to bring it all in.
+#include <coroutine.hfa>
+
+coroutine DummyCoroutine {};
+
+DummyCoroutine ignored_dummy_coroutine;
Index: tests/linking/.expect/exception-nothreads.txt
===================================================================
--- tests/linking/.expect/exception-nothreads.txt	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
+++ tests/linking/.expect/exception-nothreads.txt	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
@@ -0,0 +1,1 @@
+no threads
Index: tests/linking/.expect/exception-withthreads.txt
===================================================================
--- tests/linking/.expect/exception-withthreads.txt	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
+++ tests/linking/.expect/exception-withthreads.txt	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
@@ -0,0 +1,1 @@
+with threads
Index: tests/linking/exception-nothreads.cfa
===================================================================
--- tests/linking/exception-nothreads.cfa	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
+++ tests/linking/exception-nothreads.cfa	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
@@ -0,0 +1,33 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// exception-nothreads.cfa --
+//
+// Author           : Andrew Beach
+// Created On       : Thr 13 16:12:00 2020
+// Last Modified By : Andrew Beach
+// Last Modified On : Thr 13 16:49:00 2020
+// Update Count     : 0
+//
+
+#include <stdlib.hfa>
+#include <exception.hfa>
+
+TRIVIAL_EXCEPTION(ping);
+
+int main(void) {
+	try {
+		throwResume (ping){};
+	} catchResume (ping *) {
+		printf("%s threads\n", threading_enabled() ? "with" : "no");
+	}
+	return 0;
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "cfa nothreads.cfa" //
+// End: //
Index: tests/linking/exception-withthreads.cfa
===================================================================
--- tests/linking/exception-withthreads.cfa	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
+++ tests/linking/exception-withthreads.cfa	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
@@ -0,0 +1,34 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// exception-withthreads.cfa --
+//
+// Author           : Andrew Beach
+// Created On       : Thr 13 16:12:00 2020
+// Last Modified By : Andrew Beach
+// Last Modified On : Fri 14 11:20:00 2020
+// Update Count     : 0
+//
+
+#include <stdlib.hfa>
+#include <exception.hfa>
+#include "../exceptions/with-threads.hfa"
+
+TRIVIAL_EXCEPTION(ping);
+
+int main(void) {
+	try {
+		throwResume (ping){};
+	} catchResume (ping *) {
+		printf("%s threads\n", threading_enabled() ? "with" : "no");
+	}
+	return 0;
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "cfa nothreads.cfa" //
+// End: //
Index: tests/linking/withthreads.cfa
===================================================================
--- tests/linking/withthreads.cfa	(revision 7f51b9dc349a76568aa4d3de2895e098988e60b4)
+++ tests/linking/withthreads.cfa	(revision 762fbc1ffb7ffc049f20e456e655c8289f8e19f5)
@@ -5,5 +5,5 @@
 // file "LICENCE" distributed with Cforall.
 //
-// nothreads.cfa --
+// withthreads.cfa --
 //
 // Author           : Thierry Delisle
