Index: libcfa/src/Makefile.am
===================================================================
--- libcfa/src/Makefile.am	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/Makefile.am	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -62,4 +62,5 @@
 	iterator.hfa \
 	limits.hfa \
+	memory.hfa \
 	parseargs.hfa \
 	rational.hfa \
@@ -87,4 +88,5 @@
 inst_thread_headers_nosrc = \
 	bits/random.hfa \
+	concurrency/clib/cfathread.h \
 	concurrency/invoke.h \
 	concurrency/kernel/fwd.hfa
@@ -102,4 +104,5 @@
 	concurrency/alarm.cfa \
 	concurrency/alarm.hfa \
+	concurrency/clib/cfathread.cfa \
 	concurrency/CtxSwitch-@ARCHITECTURE@.S \
 	concurrency/invoke.c \
@@ -107,5 +110,5 @@
 	concurrency/io/setup.cfa \
 	concurrency/io/types.hfa \
-	concurrency/iocall.cfa \
+	concurrency/io/call.cfa \
 	concurrency/iofwd.hfa \
 	concurrency/kernel_private.hfa \
Index: libcfa/src/bits/locks.hfa
===================================================================
--- libcfa/src/bits/locks.hfa	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/bits/locks.hfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -164,6 +164,6 @@
 
 	struct $thread;
-	extern void park( __cfaabi_dbg_ctx_param );
-	extern void unpark( struct $thread * this __cfaabi_dbg_ctx_param2 );
+	extern void park( void );
+	extern void unpark( struct $thread * this );
 	static inline struct $thread * active_thread ();
 
@@ -191,5 +191,5 @@
 					/* paranoid */ verify( expected == 0p );
 					if(__atomic_compare_exchange_n(&this.ptr, &expected, active_thread(), false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
-						park( __cfaabi_dbg_ctx );
+						park();
 						return true;
 					}
@@ -210,5 +210,5 @@
 				else {
 					if(__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
-						unpark( expected __cfaabi_dbg_ctx2 );
+						unpark( expected );
 						return true;
 					}
@@ -244,5 +244,5 @@
 				/* paranoid */ verify( expected == 0p );
 				if(__atomic_compare_exchange_n(&this.ptr, &expected, active_thread(), false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
-					park( __cfaabi_dbg_ctx );
+					park();
 					/* paranoid */ verify( this.ptr == 1p );
 					return true;
@@ -256,5 +256,5 @@
 			struct $thread * got = __atomic_exchange_n( &this.ptr, 1p, __ATOMIC_SEQ_CST);
 			if( got == 0p ) return false;
-			unpark( got __cfaabi_dbg_ctx2 );
+			unpark( got );
 			return true;
 		}
@@ -357,5 +357,12 @@
 				struct oneshot * expected = this.ptr;
 				// was this abandoned?
-				if( expected == 3p ) { free( &this ); return false; }
+				#if defined(__GNUC__) && __GNUC__ >= 7
+					#pragma GCC diagnostic push
+					#pragma GCC diagnostic ignored "-Wfree-nonheap-object"
+				#endif
+					if( expected == 3p ) { free( &this ); return false; }
+				#if defined(__GNUC__) && __GNUC__ >= 7
+					#pragma GCC diagnostic pop
+				#endif
 
 				/* paranoid */ verify( expected != 1p ); // Future is already fulfilled, should not happen
Index: libcfa/src/concurrency/CtxSwitch-i386.S
===================================================================
--- libcfa/src/concurrency/CtxSwitch-i386.S	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/concurrency/CtxSwitch-i386.S	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -10,6 +10,6 @@
 // Created On       : Tue Dec 6 12:27:26 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sun Aug 16 08:46:22 2020
-// Update Count     : 4
+// Last Modified On : Sun Sep  6 18:23:37 2020
+// Update Count     : 5
 //
 
@@ -35,5 +35,5 @@
 
 	// Copy the "from" context argument from the stack to register eax
-	// Return address is at 0(%esp), with parameters following
+	// Return address is at 0(%esp), with parameters following.
 
 	movl 4(%esp),%eax
@@ -50,7 +50,7 @@
 	movl %ebp,FP_OFFSET(%eax)
 
-	// Copy the "to" context argument from the stack to register eax
-	// Having pushed three words (= 12 bytes) on the stack, the
-	// argument is now at 8 + 12 = 20(%esp)
+	// Copy the "to" context argument from the stack to register eax. Having
+	// pushed 3 words (= 12 bytes) on the stack, the argument is now at
+	// 8 + 12 = 20(%esp).
 
 	movl 20(%esp),%eax
Index: libcfa/src/concurrency/alarm.cfa
===================================================================
--- libcfa/src/concurrency/alarm.cfa	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/concurrency/alarm.cfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -130,5 +130,5 @@
 
 	register_self( &node );
-	park( __cfaabi_dbg_ctx );
+	park();
 
 	/* paranoid */ verify( !node.set );
Index: libcfa/src/concurrency/clib/cfathread.cfa
===================================================================
--- libcfa/src/concurrency/clib/cfathread.cfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
+++ libcfa/src/concurrency/clib/cfathread.cfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -0,0 +1,66 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// clib/cfathread.cfa --
+//
+// Author           : Thierry Delisle
+// Created On       : Tue Sep 22 15:31:20 2020
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+#include "kernel.hfa"
+#include "thread.hfa"
+
+thread CRunner {
+	void (*themain)( CRunner * );
+};
+
+static void ?{}( CRunner & this, void (*themain)( CRunner * ) ) {
+	this.themain = themain;
+}
+
+void main( CRunner & this ) {
+	this.themain( &this );
+}
+
+processor * procs = 0p;
+int proc_cnt = 1;
+
+extern "C" {
+	//--------------------
+	// Basic thread management
+	CRunner * cfathread_create( void (*main)( CRunner * ) ) {
+		return new( main );
+	}
+
+	void cfathread_join( CRunner * thrd ) {
+		delete( thrd );
+	}
+
+	void cfathread_park( void ) {
+		park();
+	}
+
+	void cfathread_unpark( CRunner * thrd ) {
+		unpark( *thrd );
+	}
+
+	void cfathread_yield( void ) {
+		yield();
+	}
+
+	//--------------------
+	// Basic kernel features
+	void cfathread_setproccnt( int ncnt ) {
+		assert( ncnt >= 1 );
+		adelete(proc_cnt, procs);
+
+		proc_cnt = ncnt - 1;
+		procs = anew(proc_cnt);
+	}
+}
Index: libcfa/src/concurrency/clib/cfathread.h
===================================================================
--- libcfa/src/concurrency/clib/cfathread.h	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
+++ libcfa/src/concurrency/clib/cfathread.h	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -0,0 +1,43 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// clib/cfathread.h --
+//
+// Author           : Thierry Delisle
+// Created On       : Tue Sep 22 15:31:20 2020
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+#include "stddef.h"
+#include "invoke.h"
+
+#if defined(__cforall) || defined(__cplusplus)
+extern "C" {
+#endif
+	//--------------------
+	// Basic types
+	struct cfathread_CRunner_t;
+	typedef struct cfathread_CRunner_t * cfathread_t;
+
+	//--------------------
+	// Basic thread support
+	cfathread_t cfathread_create( void (*main)( cfathread_t ) );
+	void cfathread_join( cfathread_t );
+
+	void cfathread_park( void );
+	void cfathread_unpark( cfathread_t );
+	void cfathread_yield( void );
+
+	//--------------------
+	// Basic kernel features
+	void cfathread_setproccnt( int );
+
+
+#if defined(__cforall) || defined(__cplusplus)
+}
+#endif
Index: libcfa/src/concurrency/coroutine.cfa
===================================================================
--- libcfa/src/concurrency/coroutine.cfa	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/concurrency/coroutine.cfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -47,4 +47,45 @@
 
 //-----------------------------------------------------------------------------
+FORALL_DATA_INSTANCE(CoroutineCancelled,
+		(dtype coroutine_t | sized(coroutine_t)), (coroutine_t))
+
+struct __cfaehm_node {
+	struct _Unwind_Exception unwind_exception;
+	struct __cfaehm_node * next;
+	int handler_index;
+};
+
+forall(dtype T)
+void mark_exception(CoroutineCancelled(T) *) {}
+
+forall(dtype T | sized(T))
+void copy(CoroutineCancelled(T) * dst, CoroutineCancelled(T) * src) {
+	dst->the_coroutine = src->the_coroutine;
+	dst->the_exception = src->the_exception;
+}
+
+forall(dtype T)
+const char * msg(CoroutineCancelled(T) *) {
+	return "CoroutineCancelled(...)";
+}
+
+// This code should not be inlined. It is the error path on resume.
+forall(dtype T | is_coroutine(T))
+void __cfaehm_cancelled_coroutine( T & cor, $coroutine * desc ) {
+	verify( desc->cancellation );
+	desc->state = Cancelled;
+	exception_t * except = (exception_t *)(1 + (__cfaehm_node *)desc->cancellation);
+
+	CoroutineCancelled(T) except;
+	except.the_coroutine = &cor;
+	except.the_exception = except;
+	throwResume except;
+
+	except->virtual_table->free( except );
+	free( desc->cancellation );
+	desc->cancellation = 0p;
+}
+
+//-----------------------------------------------------------------------------
 // Global state variables
 
@@ -180,4 +221,6 @@
 	this->storage->limit = storage;
 	this->storage->base  = (void*)((intptr_t)storage + size);
+	this->storage->exception_context.top_resume = 0p;
+	this->storage->exception_context.current_exception = 0p;
 	__attribute__((may_alias)) intptr_t * istorage = (intptr_t*)&this->storage;
 	*istorage |= userStack ? 0x1 : 0x0;
Index: libcfa/src/concurrency/coroutine.hfa
===================================================================
--- libcfa/src/concurrency/coroutine.hfa	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/concurrency/coroutine.hfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -18,4 +18,23 @@
 #include <assert.h>
 #include "invoke.h"
+#include "../exception.hfa"
+
+//-----------------------------------------------------------------------------
+// Exception thrown from resume when a coroutine stack is cancelled.
+// Should not have to be be sized (see trac #196).
+FORALL_DATA_EXCEPTION(CoroutineCancelled,
+		(dtype coroutine_t | sized(coroutine_t)), (coroutine_t)) (
+	coroutine_t * the_coroutine;
+	exception_t * the_exception;
+);
+
+forall(dtype T)
+void mark_exception(CoroutineCancelled(T) *);
+
+forall(dtype T | sized(T))
+void copy(CoroutineCancelled(T) * dst, CoroutineCancelled(T) * src);
+
+forall(dtype T)
+const char * msg(CoroutineCancelled(T) *);
 
 //-----------------------------------------------------------------------------
@@ -23,7 +42,9 @@
 // Anything that implements this trait can be resumed.
 // Anything that is resumed is a coroutine.
-trait is_coroutine(dtype T) {
-      void main(T & this);
-      $coroutine * get_coroutine(T & this);
+trait is_coroutine(dtype T | sized(T)
+		| is_resumption_exception(CoroutineCancelled(T))
+		| VTABLE_ASSERTION(CoroutineCancelled, (T))) {
+	void main(T & this);
+	$coroutine * get_coroutine(T & this);
 };
 
@@ -112,4 +133,7 @@
 	}
 }
+
+forall(dtype T | is_coroutine(T))
+void __cfaehm_cancelled_coroutine( T & cor, $coroutine * desc );
 
 // Resume implementation inlined for performance
@@ -145,4 +169,7 @@
 	// always done for performance testing
 	$ctx_switch( src, dst );
+	if ( unlikely(dst->cancellation) ) {
+		__cfaehm_cancelled_coroutine( cor, dst );
+	}
 
 	return cor;
Index: libcfa/src/concurrency/exception.cfa
===================================================================
--- libcfa/src/concurrency/exception.cfa	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/concurrency/exception.cfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -57,5 +57,8 @@
 
 STOP_AT_END_FUNCTION(coroutine_cancelstop,
-	// TODO: Instead pass information to the last resumer.
+	struct $coroutine * src = ($coroutine *)stop_param;
+	struct $coroutine * dst = src->last;
+
+	$ctx_switch( src, dst );
 	abort();
 )
Index: libcfa/src/concurrency/exception.hfa
===================================================================
--- libcfa/src/concurrency/exception.hfa	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/concurrency/exception.hfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -18,11 +18,11 @@
 #include "bits/defs.hfa"
 #include "invoke.h"
-struct _Unwind_Exception;
-
-// It must also be usable as a C header file.
 
 #ifdef __cforall
 extern "C" {
+
+#define HIDE_EXPORTS
 #endif
+#include "unwind.h"
 
 struct exception_context_t * this_exception_context(void) OPTIONAL_THREAD;
@@ -32,4 +32,5 @@
 
 #ifdef __cforall
+#undef HIDE_EXPORTS
 }
 #endif
Index: libcfa/src/concurrency/invoke.h
===================================================================
--- libcfa/src/concurrency/invoke.h	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/concurrency/invoke.h	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -68,5 +68,5 @@
 	};
 
-	enum __Coroutine_State { Halted, Start, Primed, Blocked, Ready, Active };
+	enum __Coroutine_State { Halted, Start, Primed, Blocked, Ready, Active, Cancelled };
 
 	struct $coroutine {
@@ -93,4 +93,6 @@
 
 	};
+	// Wrapper for gdb
+	struct cfathread_coroutine_t { struct $coroutine debug; };
 
 	static inline struct __stack_t * __get_stack( struct $coroutine * cor ) {
@@ -129,4 +131,6 @@
 		struct __condition_node_t * dtor_node;
 	};
+	// Wrapper for gdb
+	struct cfathread_monitor_t { struct $monitor debug; };
 
 	struct __monitor_group_t {
@@ -186,16 +190,10 @@
 		} node;
 
-		#ifdef __CFA_DEBUG__
-			// previous function to park/unpark the thread
-			const char * park_caller;
-			int park_result;
-			enum __Coroutine_State park_state;
-			bool park_stale;
-			const char * unpark_caller;
-			int unpark_result;
-			enum __Coroutine_State unpark_state;
-			bool unpark_stale;
+		#if defined( __CFA_WITH_VERIFY__ )
+			unsigned long long canary;
 		#endif
 	};
+	// Wrapper for gdb
+	struct cfathread_thread_t { struct $thread debug; };
 
 	#ifdef __CFA_DEBUG__
Index: libcfa/src/concurrency/io.cfa
===================================================================
--- libcfa/src/concurrency/io.cfa	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/concurrency/io.cfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -69,5 +69,5 @@
 		if( block ) {
 			enable_interrupts( __cfaabi_dbg_ctx );
-			park( __cfaabi_dbg_ctx );
+			park();
 			disable_interrupts();
 		}
@@ -97,5 +97,5 @@
 
 		if(nextt) {
-			unpark( nextt __cfaabi_dbg_ctx2 );
+			unpark( nextt );
 			enable_interrupts( __cfaabi_dbg_ctx );
 			return true;
@@ -159,9 +159,8 @@
 
 	static inline void process(struct io_uring_cqe & cqe ) {
-		struct __io_user_data_t * data = (struct __io_user_data_t *)(uintptr_t)cqe.user_data;
-		__cfadbg_print_safe( io, "Kernel I/O : Syscall completed : cqe %p, result %d for %p\n", data, cqe.res, data->thrd );
-
-		data->result = cqe.res;
-		post( data->sem );
+		struct io_future_t * future = (struct io_future_t *)(uintptr_t)cqe.user_data;
+		__cfadbg_print_safe( io, "Kernel I/O : Syscall completed : cqe %p, result %d for %p\n", future, cqe.res, data->thrd );
+
+		fulfil( *future, cqe.res );
 	}
 
Index: libcfa/src/concurrency/io/call.cfa.in
===================================================================
--- libcfa/src/concurrency/io/call.cfa.in	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
+++ libcfa/src/concurrency/io/call.cfa.in	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -0,0 +1,505 @@
+#!python3
+#
+# Cforall Version 1.0.0 Copyright (C) 2020 University of Waterloo
+#
+# The contents of this file are covered under the licence agreement in the
+# file "LICENCE" distributed with Cforall.
+#
+# call.cfa.in -- Python script to generate io/call.cfa
+#
+# Author           : Thierry Delisle
+# Created On       : Fri Sep 11 12:41:16 2020
+# Last Modified By :
+# Last Modified On :
+# Update Count     :
+#
+
+Header = """//
+// Cforall Version 1.0.0 Copyright (C) 2020 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// call.cfa -- Api for cforall
+//
+// Author           : Generated from call.cfa.in
+// Created On       : {}
+//
+
+"""
+
+Prelude = """#define __cforall_thread__
+
+#include "bits/defs.hfa"
+#include "kernel.hfa"
+#include "io/types.hfa"
+
+//=============================================================================================
+// I/O uring backend
+//=============================================================================================
+
+#if defined(CFA_HAVE_LINUX_IO_URING_H)
+	#include <assert.h>
+	#include <stdint.h>
+	#include <errno.h>
+	#include <linux/io_uring.h>
+
+	#include "kernel/fwd.hfa"
+
+	#if defined(CFA_HAVE_IOSQE_FIXED_FILE) && defined(CFA_HAVE_IOSQE_IO_DRAIN) && defined(CFA_HAVE_IOSQE_ASYNC)
+		#define REGULAR_FLAGS (IOSQE_FIXED_FILE | IOSQE_IO_DRAIN | IOSQE_ASYNC)
+	#elif defined(CFA_HAVE_IOSQE_FIXED_FILE) && defined(CFA_HAVE_IOSQE_ASYNC)
+		#define REGULAR_FLAGS (IOSQE_FIXED_FILE | IOSQE_ASYNC)
+	#elif defined(CFA_HAVE_IOSQE_FIXED_FILE) && defined(CFA_HAVE_IOSQE_IO_DRAIN)
+		#define REGULAR_FLAGS (IOSQE_FIXED_FILE | IOSQE_IO_DRAIN)
+	#elif defined(CFA_HAVE_IOSQE_IO_DRAIN) && defined(CFA_HAVE_IOSQE_ASYNC)
+		#define REGULAR_FLAGS (IOSQE_IO_DRAIN | IOSQE_ASYNC)
+	#elif defined(CFA_HAVE_IOSQE_FIXED_FILE)
+		#define REGULAR_FLAGS (IOSQE_FIXED_FILE)
+	#elif defined(CFA_HAVE_IOSQE_IO_DRAIN)
+		#define REGULAR_FLAGS (IOSQE_IO_DRAIN)
+	#elif defined(CFA_HAVE_IOSQE_ASYNC)
+		#define REGULAR_FLAGS (IOSQE_ASYNC)
+	#else
+		#define REGULAR_FLAGS (0)
+	#endif
+
+	#if defined(CFA_HAVE_IOSQE_IO_LINK) && defined(CFA_HAVE_IOSQE_IO_HARDLINK)
+		#define LINK_FLAGS (IOSQE_IO_LINK | IOSQE_IO_HARDLINK)
+	#elif defined(CFA_HAVE_IOSQE_IO_LINK)
+		#define LINK_FLAGS (IOSQE_IO_LINK)
+	#elif defined(CFA_HAVE_IOSQE_IO_HARDLINK)
+		#define LINK_FLAGS (IOSQE_IO_HARDLINK)
+	#else
+		#define LINK_FLAGS (0)
+	#endif
+
+	#if defined(CFA_HAVE_SPLICE_F_FD_IN_FIXED)
+		#define SPLICE_FLAGS (SPLICE_F_FD_IN_FIXED)
+	#else
+		#define SPLICE_FLAGS (0)
+	#endif
+
+	extern [* struct io_uring_sqe, __u32] __submit_alloc( struct __io_data & ring, __u64 data );
+	extern void __submit( struct io_context * ctx, __u32 idx ) __attribute__((nonnull (1)));
+
+	static inline io_context * __get_io_context( void ) {
+		cluster * cltr = active_cluster();
+
+		/* paranoid */ verifyf( cltr, "No active cluster for io operation\\n");
+		assertf( cltr->io.cnt > 0, "Cluster %p has no default io contexts and no context was specified\\n", cltr );
+
+		/* paranoid */ verifyf( cltr->io.ctxs, "default io contexts for cluster %p are missing\\n", cltr);
+		return &cltr->io.ctxs[ __tls_rand() % cltr->io.cnt ];
+	}
+#endif
+
+//=============================================================================================
+// I/O Forwards
+//=============================================================================================
+#include <time.hfa>
+
+// Some forward declarations
+#include <errno.h>
+#include <unistd.h>
+
+extern "C" {
+	#include <sys/types.h>
+	#include <sys/socket.h>
+	#include <sys/syscall.h>
+
+#if defined(CFA_HAVE_PREADV2)
+	struct iovec;
+	extern ssize_t preadv2 (int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags);
+#endif
+#if defined(CFA_HAVE_PWRITEV2)
+	struct iovec;
+	extern ssize_t pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags);
+#endif
+
+	extern int fsync(int fd);
+
+	#if __OFF_T_MATCHES_OFF64_T
+		typedef __off64_t off_t;
+	#else
+		typedef __off_t off_t;
+	#endif
+	typedef __off64_t off64_t;
+	extern int sync_file_range(int fd, off64_t offset, off64_t nbytes, unsigned int flags);
+
+	struct msghdr;
+	struct sockaddr;
+	extern ssize_t sendmsg(int sockfd, const struct msghdr *msg, int flags);
+	extern ssize_t recvmsg(int sockfd, struct msghdr *msg, int flags);
+	extern ssize_t send(int sockfd, const void *buf, size_t len, int flags);
+	extern ssize_t recv(int sockfd, void *buf, size_t len, int flags);
+	extern int accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags);
+	extern int connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen);
+
+	extern int fallocate(int fd, int mode, off_t offset, off_t len);
+	extern int posix_fadvise(int fd, off_t offset, off_t len, int advice);
+	extern int madvise(void *addr, size_t length, int advice);
+
+	extern int openat(int dirfd, const char *pathname, int flags, mode_t mode);
+	extern int close(int fd);
+
+	extern ssize_t read (int fd, void *buf, size_t count);
+
+	struct epoll_event;
+	extern int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event);
+
+	extern ssize_t splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags);
+	extern ssize_t tee(int fd_in, int fd_out, size_t len, unsigned int flags);
+}
+
+//=============================================================================================
+// I/O Interface
+//=============================================================================================
+"""
+
+print(Header.format("A Date"))
+print(Prelude)
+
+import re
+import sys
+class Call:
+	def __init__(self, op, signature, body, define=None):
+		sig = re.search("(.*) (.*)\((.*)\)", signature)
+		if not sig:
+			print("OP '{}' has invalid signature {}".format(op, signature), file=sys.stderr)
+			sys.exit(1)
+
+		self.op     = op
+		self.ret    = sig.group(1)
+		self.name   = sig.group(2)
+		self.params = sig.group(3)
+		self.define = define
+		self.body = ""
+
+		accepted_keys = [ 'ioprio', 'fd', 'off', 'addr2','addr', 'splice_off_in','len',
+			'rw_flags', 'fsync_flags', 'poll_events', 'poll32_events',
+			'sync_range_flags', 'msg_flags', 'timeout_flags', 'accept_flags',
+			'cancel_flags', 'open_flags', 'statx_flags', 'fadvise_advice',
+			'splice_flags', 'buf_index' ,'buf_group' 'personality',
+			'splice_fd_in' ]
+
+		for k, v in body.items():
+			if not k in accepted_keys:
+				print("OP '{}' has invalid body kew {}".format(op, k), file=sys.stderr)
+				sys.exit(1)
+
+			self.body += "\n		sqe->{key} = {value};".format(key=k, value=v)
+
+
+	def args(self):
+		param_a = self.params.split(',')
+		args_a = [p.replace('*', ' ').split()[-1] for p in param_a]
+		for a in args_a:
+			if '*' in a:
+				print("OP '{}' has invalid * in argument {}".format(self.op, a), file=sys.stderr)
+				sys.exit(1)
+
+		return ', '.join(args_a)
+
+AsyncTemplate = """inline void async_{name}(io_future_t & future, {params}, int submit_flags, io_cancellation * cancellation, io_context * context) {{
+	#if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_{op})
+		ssize_t res = {name}({args});
+		if (res >= 0) {{
+			fulfil(future, res);
+		}}
+		else {{
+			fulfil(future, -errno);
+		}}
+	#else
+		// we don't support LINK yet
+		if( 0 != (submit_flags & LINK_FLAGS) ) {{
+			errno = ENOTSUP; return -1;
+		}}
+
+		if( !context ) {{
+			context = __get_io_context();
+		}}
+		if(cancellation) {{
+			cancellation->target = (__u64)(uintptr_t)&future;
+		}}
+
+		__u8 sflags = REGULAR_FLAGS & submit_flags;
+		struct __io_data & ring = *context->thrd.ring;
+
+		__u32 idx;
+		struct io_uring_sqe * sqe;
+		[sqe, idx] = __submit_alloc( ring, (__u64)(uintptr_t)&future );
+
+		sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0;
+		sqe->opcode = IORING_OP_{op};
+		sqe->flags = sflags;{body}
+
+		verify( sqe->user_data == (__u64)(uintptr_t)&future );
+		__submit( context, idx );
+	#endif
+}}"""
+
+SyncTemplate = """{ret} cfa_{name}({params}, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {{
+	if( timeout >= 0 ) {{
+		errno = ENOTSUP;
+		return -1;
+	}}
+	io_future_t future;
+
+	async_{name}( future, {args}, submit_flags, cancellation, context );
+
+	wait( future );
+	if( future.result < 0 ) {{
+		errno = -future.result;
+		return -1;
+	}}
+	return future.result;
+}}"""
+
+calls = [
+	# CFA_HAVE_IORING_OP_READV
+	Call('READV', 'ssize_t preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags)', {
+		'fd'  : 'fd',
+		'off' : 'offset',
+		'addr': '(__u64)iov',
+		'len' : 'iovcnt',
+	}, define = 'CFA_HAVE_PREADV2'),
+	# CFA_HAVE_IORING_OP_WRITEV
+	Call('WRITEV', 'ssize_t pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags)', {
+		'fd'  : 'fd',
+		'off' : 'offset',
+		'addr': '(__u64)iov',
+		'len' : 'iovcnt'
+	}, define = 'CFA_HAVE_PWRITEV2'),
+	# CFA_HAVE_IORING_OP_FSYNC
+	Call('FSYNC', 'int fsync(int fd)', {
+		'fd': 'fd'
+	}),
+	# CFA_HAVE_IORING_OP_EPOLL_CTL
+	Call('EPOLL_CTL', 'int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event)', {
+		'fd': 'epfd',
+		'addr': 'fd',
+		'len': 'op',
+		'off': '(__u64)event'
+	}),
+	# CFA_HAVE_IORING_OP_SYNC_FILE_RANGE
+	Call('SYNC_FILE_RANGE', 'int sync_file_range(int fd, off64_t offset, off64_t nbytes, unsigned int flags)', {
+		'fd': 'fd',
+		'off': 'offset',
+		'len': 'nbytes',
+		'sync_range_flags': 'flags'
+	}),
+	# CFA_HAVE_IORING_OP_SENDMSG
+	Call('SENDMSG', 'ssize_t sendmsg(int sockfd, const struct msghdr *msg, int flags)', {
+		'fd': 'sockfd',
+		'addr': '(__u64)(struct msghdr *)msg',
+		'len': '1',
+		'msg_flags': 'flags'
+	}),
+	# CFA_HAVE_IORING_OP_RECVMSG
+	Call('RECVMSG', 'ssize_t recvmsg(int sockfd, struct msghdr *msg, int flags)', {
+		'fd': 'sockfd',
+		'addr': '(__u64)(struct msghdr *)msg',
+		'len': '1',
+		'msg_flags': 'flags'
+	}),
+	# CFA_HAVE_IORING_OP_SEND
+	Call('SEND', 'ssize_t send(int sockfd, const void *buf, size_t len, int flags)', {
+		'fd': 'sockfd',
+		'addr': '(__u64)buf',
+		'len': 'len',
+		'msg_flags': 'flags'
+	}),
+	# CFA_HAVE_IORING_OP_RECV
+	Call('RECV', 'ssize_t recv(int sockfd, void *buf, size_t len, int flags)', {
+		'fd': 'sockfd',
+		'addr': '(__u64)buf',
+		'len': 'len',
+		'msg_flags': 'flags'
+	}),
+	# CFA_HAVE_IORING_OP_ACCEPT
+	Call('ACCEPT4', 'int accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags)', {
+		'fd': 'sockfd',
+		'addr': 'addr',
+		'addr2': 'addrlen',
+		'accept_flags': 'flags'
+	}),
+	# CFA_HAVE_IORING_OP_CONNECT
+	Call('CONNECT', 'int connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen)', {
+		'fd': 'sockfd',
+		'addr': '(__u64)addr',
+		'off': 'addrlen'
+	}),
+	# CFA_HAVE_IORING_OP_FALLOCATE
+	Call('FALLOCATE', 'int fallocate(int fd, int mode, off_t offset, off_t len)', {
+		'fd': 'fd',
+		'addr': '(__u64)len',
+		'len': 'mode',
+		'off': 'offset'
+	}),
+	# CFA_HAVE_IORING_OP_FADVISE
+	Call('FADVISE', 'int posix_fadvise(int fd, off_t offset, off_t len, int advice)', {
+		'fd': 'fd',
+		'off': 'offset',
+		'len': 'len',
+		'fadvise_advice': 'advice'
+	}),
+	# CFA_HAVE_IORING_OP_MADVISE
+	Call('MADVISE', 'int madvise(void *addr, size_t length, int advice)', {
+		'addr': '(__u64)addr',
+		'len': 'length',
+		'fadvise_advice': 'advice'
+	}),
+	# CFA_HAVE_IORING_OP_OPENAT
+	Call('OPENAT', 'int openat(int dirfd, const char *pathname, int flags, mode_t mode)', {
+		'fd': 'dirfd',
+		'addr': '(__u64)pathname',
+		'len': 'mode',
+		'open_flags': 'flags;'
+	}),
+	# CFA_HAVE_IORING_OP_OPENAT2
+	Call('OPENAT2', 'int openat2(int dirfd, const char *pathname, struct open_how * how, size_t size)', {
+		'fd': 'dirfd',
+		'addr': 'pathname',
+		'len': 'sizeof(*how)',
+		'off': '(__u64)how',
+	}, define = 'CFA_HAVE_OPENAT2'),
+	# CFA_HAVE_IORING_OP_CLOSE
+	Call('CLOSE', 'int close(int fd)', {
+		'fd': 'fd'
+	}),
+	# CFA_HAVE_IORING_OP_STATX
+	Call('STATX', 'int statx(int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf)', {
+		'fd': 'dirfd',
+		'off': '(__u64)statxbuf',
+		'addr': 'pathname',
+		'len': 'mask',
+		'statx_flags': 'flags'
+	}, define = 'CFA_HAVE_STATX'),
+	# CFA_HAVE_IORING_OP_READ
+	Call('READ', 'ssize_t read(int fd, void * buf, size_t count)', {
+		'fd': 'fd',
+		'addr': '(__u64)buf',
+		'len': 'count'
+	}),
+	# CFA_HAVE_IORING_OP_WRITE
+	Call('WRITE', 'ssize_t write(int fd, void * buf, size_t count)', {
+		'fd': 'fd',
+		'addr': '(__u64)buf',
+		'len': 'count'
+	}),
+	# CFA_HAVE_IORING_OP_SPLICE
+	Call('SPLICE', 'ssize_t splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags)', {
+		'splice_fd_in': 'fd_in',
+		'splice_off_in': 'off_in ? (__u64)*off_in : (__u64)-1',
+		'fd': 'fd_out',
+		'off': 'off_out ? (__u64)*off_out : (__u64)-1',
+		'len': 'len',
+		'splice_flags': 'flags'
+	}),
+	# CFA_HAVE_IORING_OP_TEE
+	Call('TEE', 'ssize_t tee(int fd_in, int fd_out, size_t len, unsigned int flags)', {
+		'splice_fd_in': 'fd_in',
+		'fd': 'fd_out',
+		'len': 'len',
+		'splice_flags': 'flags'
+	})
+]
+
+print("//----------")
+print("// synchronous calls")
+for c in calls:
+	if c.define:
+		print("""#if defined({define})
+	{ret} cfa_{name}({params}, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+#endif""".format(define=c.define,ret=c.ret, name=c.name, params=c.params))
+	else:
+		print("{ret} cfa_{name}({params}, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);"
+		.format(ret=c.ret, name=c.name, params=c.params))
+
+print("\n//----------")
+print("// asynchronous calls")
+for c in calls:
+	if c.define:
+		print("""#if defined({define})
+	void async_{name}(io_future_t & future, {params}, int submit_flags, io_cancellation * cancellation, io_context * context);
+#endif""".format(define=c.define,name=c.name, params=c.params))
+	else:
+		print("void async_{name}(io_future_t & future, {params}, int submit_flags, io_cancellation * cancellation, io_context * context);"
+		.format(name=c.name, params=c.params))
+print("\n")
+
+for c in calls:
+	print("//-----------------------------------------------------------------------------")
+	print("// {}".format(c.name))
+	Async = AsyncTemplate.format(
+		name   = c.name,
+		ret    = c.ret,
+		params = c.params,
+		args   = c.args(),
+		op     = c.op,
+		body   = c.body
+
+	)
+	Sync = SyncTemplate.format(
+		name   = c.name,
+		ret    = c.ret,
+		params = c.params,
+		args   = c.args()
+	)
+
+	if c.define:
+		print("""#if defined({})
+	//----------
+	// asynchronous call
+	{}
+
+	//----------
+	// synchronous call
+	{}
+#endif
+""".format(c.define, "\n\t".join( Async.splitlines() ), "\n\t".join( Sync.splitlines() )))
+	else :
+		print("""//----------
+// asynchronous call
+{}
+
+//----------
+// synchronous call
+{}
+""".format(Async, Sync))
+
+print("""
+//-----------------------------------------------------------------------------
+// Check if a function is has asynchronous
+bool has_user_level_blocking( fptr_t func ) {
+ 	#if defined(CFA_HAVE_LINUX_IO_URING_H)""")
+
+for c in calls:
+	if c.define:
+		print("""		#if defined({define})
+ 			if( /*func == (fptr_t)preadv2 || */
+ 				func == (fptr_t)cfa_{name} ||
+				func == (fptr_t)async_{name} ) {{
+ 				#if defined(CFA_HAVE_IORING_OP_{op})
+					return true;
+				#else
+					return false;
+				#endif
+ 			}}
+ 		#endif""".format(define=c.define, name=c.name, op=c.op))
+	else:
+		print("""		if( /*func == (fptr_t)preadv2 || */
+			func == (fptr_t)cfa_{name} ||
+			func == (fptr_t)async_{name} ) {{
+			#if defined(CFA_HAVE_IORING_OP_{op})
+				return true;
+			#else
+				return false;
+			#endif
+		}}""".format(name=c.name, op=c.op))
+
+print(""" 	#endif
+
+ 	return false;
+}""")
Index: libcfa/src/concurrency/io/setup.cfa
===================================================================
--- libcfa/src/concurrency/io/setup.cfa	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/concurrency/io/setup.cfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -147,4 +147,5 @@
 	static void * iopoll_loop( __attribute__((unused)) void * args ) {
 		__processor_id_t id;
+		id.full_proc = false;
 		id.id = doregister(&id);
 		__cfaabi_dbg_print_safe( "Kernel : IO poller thread starting\n" );
@@ -246,5 +247,4 @@
 					thrd.link.next = 0p;
 					thrd.link.prev = 0p;
-					__cfaabi_dbg_debug_do( thrd.unpark_stale = true );
 
 					// Fixup the thread state
@@ -266,5 +266,5 @@
 
 				// unpark the fast io_poller
-				unpark( &thrd __cfaabi_dbg_ctx2 );
+				unpark( &thrd );
 			}
 			else {
@@ -275,5 +275,5 @@
 			}
 		} else {
-			unpark( &thrd __cfaabi_dbg_ctx2 );
+			unpark( &thrd );
 		}
 
Index: libcfa/src/concurrency/io/types.hfa
===================================================================
--- libcfa/src/concurrency/io/types.hfa	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/concurrency/io/types.hfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -16,11 +16,11 @@
 #pragma once
 
+extern "C" {
+	#include <linux/types.h>
+}
+
+#include "bits/locks.hfa"
+
 #if defined(CFA_HAVE_LINUX_IO_URING_H)
-	extern "C" {
-		#include <linux/types.h>
-	}
-
-      #include "bits/locks.hfa"
-
 	#define LEADER_LOCK
 	struct __leaderlock_t {
@@ -101,12 +101,4 @@
 	};
 
-
-	//-----------------------------------------------------------------------
-	// IO user data
-	struct __io_user_data_t {
-		__s32 result;
-		oneshot sem;
-	};
-
 	//-----------------------------------------------------------------------
 	// Misc
@@ -143,2 +135,21 @@
 	void __ioctx_prepare_block($io_ctx_thread & ctx, struct epoll_event & ev);
 #endif
+
+//-----------------------------------------------------------------------
+// IO user data
+struct io_future_t {
+	future_t self;
+	__s32 result;
+};
+
+static inline {
+	bool fulfil( io_future_t & this, __s32 result ) {
+		this.result = result;
+		return fulfil(this.self);
+	}
+
+	// Wait for the future to be fulfilled
+	bool wait( io_future_t & this ) {
+		return wait(this.self);
+	}
+}
Index: libcfa/src/concurrency/iofwd.hfa
===================================================================
--- libcfa/src/concurrency/iofwd.hfa	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/concurrency/iofwd.hfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -40,4 +40,5 @@
 
 struct cluster;
+struct io_future_t;
 struct io_context;
 struct io_cancellation;
@@ -48,24 +49,70 @@
 struct statx;
 
-extern ssize_t cfa_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
-extern ssize_t cfa_pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
-extern int cfa_fsync(int fd, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
-extern int cfa_sync_file_range(int fd, int64_t offset, int64_t nbytes, unsigned int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
-extern ssize_t cfa_sendmsg(int sockfd, const struct msghdr *msg, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
-extern ssize_t cfa_recvmsg(int sockfd, struct msghdr *msg, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
-extern ssize_t cfa_send(int sockfd, const void *buf, size_t len, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
-extern ssize_t cfa_recv(int sockfd, void *buf, size_t len, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
-extern int cfa_accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
-extern int cfa_connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
-extern int cfa_fallocate(int fd, int mode, uint64_t offset, uint64_t len, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
-extern int cfa_fadvise(int fd, uint64_t offset, uint64_t len, int advice, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
-extern int cfa_madvise(void *addr, size_t length, int advice, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
-extern int cfa_openat(int dirfd, const char *pathname, int flags, mode_t mode, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
-extern int cfa_close(int fd, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
-extern int cfa_statx(int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
-extern ssize_t cfa_read(int fd, void *buf, size_t count, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
-extern ssize_t cfa_write(int fd, void *buf, size_t count, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
-extern ssize_t cfa_splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
-extern ssize_t cfa_tee(int fd_in, int fd_out, size_t len, unsigned int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
+//----------
+// synchronous calls
+#if defined(CFA_HAVE_PREADV2)
+	extern ssize_t cfa_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+#endif
+#if defined(CFA_HAVE_PWRITEV2)
+	extern ssize_t cfa_pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+#endif
+extern int cfa_fsync(int fd, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern int cfa_epoll_ctl(int epfd, int op, int fd, struct epoll_event *event, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern int cfa_sync_file_range(int fd, off64_t offset, off64_t nbytes, unsigned int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern  ssize_t cfa_sendmsg(int sockfd, const struct msghdr *msg, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern ssize_t cfa_recvmsg(int sockfd, struct msghdr *msg, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern ssize_t cfa_send(int sockfd, const void *buf, size_t len, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern ssize_t cfa_recv(int sockfd, void *buf, size_t len, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern int cfa_accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern int cfa_connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern int cfa_fallocate(int fd, int mode, off_t offset, off_t len, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern int cfa_posix_fadvise(int fd, off_t offset, off_t len, int advice, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern int cfa_madvise(void *addr, size_t length, int advice, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern int cfa_openat(int dirfd, const char *pathname, int flags, mode_t mode, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+#if defined(CFA_HAVE_OPENAT2)
+	extern int cfa_openat2(int dirfd, const char *pathname, struct open_how * how, size_t size, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+#endif
+extern int cfa_close(int fd, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+#if defined(CFA_HAVE_STATX)
+	extern int cfa_statx(int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+#endif
+extern ssize_t cfa_read(int fd, void * buf, size_t count, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern ssize_t cfa_write(int fd, void * buf, size_t count, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern ssize_t cfa_splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern ssize_t cfa_tee(int fd_in, int fd_out, size_t len, unsigned int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+
+//----------
+// asynchronous calls
+#if defined(CFA_HAVE_PREADV2)
+	extern void async_preadv2(io_future_t & future, int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
+#endif
+#if defined(CFA_HAVE_PWRITEV2)
+	extern void async_pwritev2(io_future_t & future, int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
+#endif
+extern void async_fsync(io_future_t & future, int fd, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_epoll_ctl(io_future_t & future, int epfd, int op, int fd, struct epoll_event *event, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_sync_file_range(io_future_t & future, int fd, off64_t offset, off64_t nbytes, unsigned int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_sendmsg(io_future_t & future, int sockfd, const struct msghdr *msg, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_recvmsg(io_future_t & future, int sockfd, struct msghdr *msg, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_send(io_future_t & future, int sockfd, const void *buf, size_t len, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_recv(io_future_t & future, int sockfd, void *buf, size_t len, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_accept4(io_future_t & future, int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_connect(io_future_t & future, int sockfd, const struct sockaddr *addr, socklen_t addrlen, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_fallocate(io_future_t & future, int fd, int mode, off_t offset, off_t len, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_posix_fadvise(io_future_t & future, int fd, off_t offset, off_t len, int advice, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_madvise(io_future_t & future, void *addr, size_t length, int advice, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_openat(io_future_t & future, int dirfd, const char *pathname, int flags, mode_t mode, int submit_flags, io_cancellation * cancellation, io_context * context);
+#if defined(CFA_HAVE_OPENAT2)
+	extern void async_openat2(io_future_t & future, int dirfd, const char *pathname, struct open_how * how, size_t size, int submit_flags, io_cancellation * cancellation, io_context * context);
+#endif
+extern void async_close(io_future_t & future, int fd, int submit_flags, io_cancellation * cancellation, io_context * context);
+#if defined(CFA_HAVE_STATX)
+	extern void async_statx(io_future_t & future, int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf, int submit_flags, io_cancellation * cancellation, io_context * context);
+#endif
+void async_read(io_future_t & future, int fd, void * buf, size_t count, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_write(io_future_t & future, int fd, void * buf, size_t count, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_splice(io_future_t & future, int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_tee(io_future_t & future, int fd_in, int fd_out, size_t len, unsigned int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
+
 
 //-----------------------------------------------------------------------------
Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/concurrency/kernel.cfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -237,7 +237,4 @@
 	$coroutine * proc_cor = get_coroutine(this->runner);
 
-	// Update global state
-	kernelTLS.this_thread = thrd_dst;
-
 	// set state of processor coroutine to inactive
 	verify(proc_cor->state == Active);
@@ -249,24 +246,29 @@
 		thrd_dst->state = Active;
 
-		__cfaabi_dbg_debug_do(
-			thrd_dst->park_stale   = true;
-			thrd_dst->unpark_stale = true;
-		)
+		// Update global state
+		kernelTLS.this_thread = thrd_dst;
 
 		/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
 		/* paranoid */ verify( kernelTLS.this_thread == thrd_dst );
+		/* paranoid */ verify( thrd_dst->context.SP );
 		/* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) < ((uintptr_t)__get_stack(thrd_dst->curr_cor)->base ) || thrd_dst->curr_cor == proc_cor, "ERROR : Destination $thread %p has been corrupted.\n StackPointer too small.\n", thrd_dst ); // add escape condition if we are setting up the processor
 		/* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) > ((uintptr_t)__get_stack(thrd_dst->curr_cor)->limit) || thrd_dst->curr_cor == proc_cor, "ERROR : Destination $thread %p has been corrupted.\n StackPointer too large.\n", thrd_dst ); // add escape condition if we are setting up the processor
+		/* paranoid */ verify( 0x0D15EA5E0D15EA5E == thrd_dst->canary );
+
+
 
 		// set context switch to the thread that the processor is executing
-		verify( thrd_dst->context.SP );
 		__cfactx_switch( &proc_cor->context, &thrd_dst->context );
 		// when __cfactx_switch returns we are back in the processor coroutine
 
+		/* paranoid */ verify( 0x0D15EA5E0D15EA5E == thrd_dst->canary );
 		/* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) > ((uintptr_t)__get_stack(thrd_dst->curr_cor)->limit), "ERROR : Destination $thread %p has been corrupted.\n StackPointer too large.\n", thrd_dst );
 		/* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) < ((uintptr_t)__get_stack(thrd_dst->curr_cor)->base ), "ERROR : Destination $thread %p has been corrupted.\n StackPointer too small.\n", thrd_dst );
+		/* paranoid */ verify( thrd_dst->context.SP );
 		/* paranoid */ verify( kernelTLS.this_thread == thrd_dst );
 		/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
 
+		// Reset global state
+		kernelTLS.this_thread = 0p;
 
 		// We just finished running a thread, there are a few things that could have happened.
@@ -286,5 +288,5 @@
 			// The thread has halted, it should never be scheduled/run again
 			// We may need to wake someone up here since
-			unpark( this->destroyer __cfaabi_dbg_ctx2 );
+			unpark( this->destroyer );
 			this->destroyer = 0p;
 			break RUNNING;
@@ -296,5 +298,4 @@
 		// set state of processor coroutine to active and the thread to inactive
 		int old_ticket = __atomic_fetch_sub(&thrd_dst->ticket, 1, __ATOMIC_SEQ_CST);
-		__cfaabi_dbg_debug_do( thrd_dst->park_result = old_ticket; )
 		switch(old_ticket) {
 			case 1:
@@ -313,5 +314,4 @@
 	// Just before returning to the processor, set the processor coroutine to active
 	proc_cor->state = Active;
-	kernelTLS.this_thread = 0p;
 
 	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
@@ -334,6 +334,8 @@
 			__x87_store;
 		#endif
-		verify( proc_cor->context.SP );
+		/* paranoid */ verify( proc_cor->context.SP );
+		/* paranoid */ verify( 0x0D15EA5E0D15EA5E == thrd_src->canary );
 		__cfactx_switch( &thrd_src->context, &proc_cor->context );
+		/* paranoid */ verify( 0x0D15EA5E0D15EA5E == thrd_src->canary );
 		#if defined( __i386 ) || defined( __x86_64 )
 			__x87_load;
@@ -367,4 +369,6 @@
 	/* paranoid */ #endif
 	/* paranoid */ verifyf( thrd->link.next == 0p, "Expected null got %p", thrd->link.next );
+	/* paranoid */ verify( 0x0D15EA5E0D15EA5E == thrd->canary );
+
 
 	if (thrd->preempted == __NO_PREEMPTION) thrd->state = Ready;
@@ -403,10 +407,6 @@
 
 // KERNEL ONLY unpark with out disabling interrupts
-void __unpark(  struct __processor_id_t * id, $thread * thrd __cfaabi_dbg_ctx_param2 ) {
-	// record activity
-	__cfaabi_dbg_record_thrd( *thrd, false, caller );
-
+void __unpark(  struct __processor_id_t * id, $thread * thrd ) {
 	int old_ticket = __atomic_fetch_add(&thrd->ticket, 1, __ATOMIC_SEQ_CST);
-	__cfaabi_dbg_debug_do( thrd->unpark_result = old_ticket; thrd->unpark_state = thrd->state; )
 	switch(old_ticket) {
 		case 1:
@@ -426,20 +426,17 @@
 }
 
-void unpark( $thread * thrd __cfaabi_dbg_ctx_param2 ) {
+void unpark( $thread * thrd ) {
 	if( !thrd ) return;
 
 	disable_interrupts();
-	__unpark( (__processor_id_t*)kernelTLS.this_processor, thrd __cfaabi_dbg_ctx_fwd2 );
+	__unpark( (__processor_id_t*)kernelTLS.this_processor, thrd );
 	enable_interrupts( __cfaabi_dbg_ctx );
 }
 
-void park( __cfaabi_dbg_ctx_param ) {
+void park( void ) {
 	/* paranoid */ verify( kernelTLS.preemption_state.enabled );
 	disable_interrupts();
 	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
 	/* paranoid */ verify( kernelTLS.this_thread->preempted == __NO_PREEMPTION );
-
-	// record activity
-	__cfaabi_dbg_record_thrd( *kernelTLS.this_thread, true, caller );
 
 	returnToKernel();
@@ -521,5 +518,5 @@
 	disable_interrupts();
 		/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-		bool ret = post( this->idle );
+		post( this->idle );
 	enable_interrupts( __cfaabi_dbg_ctx );
 }
@@ -649,5 +646,5 @@
 		// atomically release spin lock and block
 		unlock( lock );
-		park( __cfaabi_dbg_ctx );
+		park();
 		return true;
 	}
@@ -670,5 +667,5 @@
 
 	// make new owner
-	unpark( thrd __cfaabi_dbg_ctx2 );
+	unpark( thrd );
 
 	return thrd != 0p;
@@ -681,5 +678,5 @@
 	count += diff;
 	for(release) {
-		unpark( pop_head( waiting ) __cfaabi_dbg_ctx2 );
+		unpark( pop_head( waiting ) );
 	}
 
@@ -697,15 +694,4 @@
 			this.prev_thrd = kernelTLS.this_thread;
 		}
-
-		void __cfaabi_dbg_record_thrd($thread & this, bool park, const char prev_name[]) {
-			if(park) {
-				this.park_caller   = prev_name;
-				this.park_stale    = false;
-			}
-			else {
-				this.unpark_caller = prev_name;
-				this.unpark_stale  = false;
-			}
-		}
 	}
 )
Index: libcfa/src/concurrency/kernel.hfa
===================================================================
--- libcfa/src/concurrency/kernel.hfa	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/concurrency/kernel.hfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -23,5 +23,6 @@
 
 extern "C" {
-#include <bits/pthreadtypes.h>
+	#include <bits/pthreadtypes.h>
+	#include <linux/types.h>
 }
 
@@ -47,5 +48,6 @@
 // Processor id, required for scheduling threads
 struct __processor_id_t {
-	unsigned id;
+	unsigned id:24;
+	bool full_proc:1;
 
 	#if !defined(__CFA_NO_STATISTICS__)
@@ -157,5 +159,5 @@
 
 struct io_cancellation {
-	uint32_t target;
+	__u64 target;
 };
 
Index: libcfa/src/concurrency/kernel/fwd.hfa
===================================================================
--- libcfa/src/concurrency/kernel/fwd.hfa	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/concurrency/kernel/fwd.hfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -118,6 +118,6 @@
 
 	extern "Cforall" {
-		extern void park( __cfaabi_dbg_ctx_param );
-		extern void unpark( struct $thread * this __cfaabi_dbg_ctx_param2 );
+		extern void park( void );
+		extern void unpark( struct $thread * this );
 		static inline struct $thread * active_thread () { return TL_GET( this_thread ); }
 
Index: libcfa/src/concurrency/kernel/startup.cfa
===================================================================
--- libcfa/src/concurrency/kernel/startup.cfa	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/concurrency/kernel/startup.cfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -451,4 +451,7 @@
 	link.next = 0p;
 	link.prev = 0p;
+	#if defined( __CFA_WITH_VERIFY__ )
+		canary = 0x0D15EA5E0D15EA5E;
+	#endif
 
 	node.next = 0p;
@@ -470,5 +473,5 @@
 	this.name = name;
 	this.cltr = &_cltr;
-	id = -1u;
+	full_proc = true;
 	destroyer = 0p;
 	do_terminate = false;
Index: libcfa/src/concurrency/kernel_private.hfa
===================================================================
--- libcfa/src/concurrency/kernel_private.hfa	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/concurrency/kernel_private.hfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -64,5 +64,5 @@
 
 // KERNEL ONLY unpark with out disabling interrupts
-void __unpark( struct __processor_id_t *, $thread * thrd __cfaabi_dbg_ctx_param2 );
+void __unpark( struct __processor_id_t *, $thread * thrd );
 
 static inline bool __post(single_sem & this, struct __processor_id_t * id) {
@@ -77,5 +77,5 @@
 		else {
 			if(__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
-				__unpark( id, expected __cfaabi_dbg_ctx2 );
+				__unpark( id, expected );
 				return true;
 			}
Index: libcfa/src/concurrency/monitor.cfa
===================================================================
--- libcfa/src/concurrency/monitor.cfa	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/concurrency/monitor.cfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -89,5 +89,8 @@
 	__cfaabi_dbg_print_safe( "Kernel : %10p Entering mon %p (%p)\n", thrd, this, this->owner);
 
-	if( !this->owner ) {
+	if( unlikely(0 != (0x1 & (uintptr_t)this->owner)) ) {
+		abort( "Attempt by thread \"%.256s\" (%p) to access joined monitor %p.", thrd->self_cor.name, thrd, this );
+	}
+	else if( !this->owner ) {
 		// No one has the monitor, just take it
 		__set_owner( this, thrd );
@@ -119,5 +122,5 @@
 
 		unlock( this->lock );
-		park( __cfaabi_dbg_ctx );
+		park();
 
 		__cfaabi_dbg_print_safe( "Kernel : %10p Entered  mon %p\n", thrd, this);
@@ -137,5 +140,5 @@
 }
 
-static void __dtor_enter( $monitor * this, fptr_t func ) {
+static void __dtor_enter( $monitor * this, fptr_t func, bool join ) {
 	// Lock the monitor spinlock
 	lock( this->lock __cfaabi_dbg_ctx2 );
@@ -157,8 +160,22 @@
 		return;
 	}
-	else if( this->owner == thrd) {
+	else if( this->owner == thrd && !join) {
 		// We already have the monitor... but where about to destroy it so the nesting will fail
 		// Abort!
 		abort( "Attempt to destroy monitor %p by thread \"%.256s\" (%p) in nested mutex.", this, thrd->self_cor.name, thrd );
+	}
+	// SKULLDUGGERY: join will act as a dtor so it would normally trigger to above check
+	// to avoid that it sets the owner to the special value thrd | 1p before exiting
+	else if( this->owner == ($thread*)(1 | (uintptr_t)thrd) ) {
+		// restore the owner and just return
+		__cfaabi_dbg_print_safe( "Kernel : Destroying free mon %p\n", this);
+
+		// No one has the monitor, just take it
+		this->owner = thrd;
+
+		verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+
+		unlock( this->lock );
+		return;
 	}
 
@@ -184,8 +201,8 @@
 		// Release the next thread
 		/* paranoid */ verifyf( urgent->owner->waiting_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
-		unpark( urgent->owner->waiting_thread __cfaabi_dbg_ctx2 );
+		unpark( urgent->owner->waiting_thread );
 
 		// Park current thread waiting
-		park( __cfaabi_dbg_ctx );
+		park();
 
 		// Some one was waiting for us, enter
@@ -205,5 +222,5 @@
 
 		// Park current thread waiting
-		park( __cfaabi_dbg_ctx );
+		park();
 
 		/* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
@@ -247,17 +264,19 @@
 	//We need to wake-up the thread
 	/* paranoid */ verifyf( !new_owner || new_owner == this->owner, "Expected owner to be %p, got %p (m: %p)", new_owner, this->owner, this );
-	unpark( new_owner __cfaabi_dbg_ctx2 );
+	unpark( new_owner );
 }
 
 // Leave single monitor for the last time
-void __dtor_leave( $monitor * this ) {
+void __dtor_leave( $monitor * this, bool join ) {
 	__cfaabi_dbg_debug_do(
 		if( TL_GET( this_thread ) != this->owner ) {
 			abort( "Destroyed monitor %p has inconsistent owner, expected %p got %p.\n", this, TL_GET( this_thread ), this->owner);
 		}
-		if( this->recursion != 1 ) {
+		if( this->recursion != 1  && !join ) {
 			abort( "Destroyed monitor %p has %d outstanding nested calls.\n", this, this->recursion - 1);
 		}
 	)
+
+	this->owner = ($thread*)(1 | (uintptr_t)this->owner);
 }
 
@@ -307,4 +326,15 @@
 }
 
+// Join a thread
+forall( dtype T | is_thread(T) )
+T & join( T & this ) {
+	$monitor *    m = get_monitor(this);
+	void (*dtor)(T& mutex this) = ^?{};
+	monitor_dtor_guard_t __guard = { &m, (fptr_t)dtor, true };
+	{
+		return this;
+	}
+}
+
 // Enter multiple monitor
 // relies on the monitor array being sorted
@@ -366,5 +396,5 @@
 // Ctor for monitor guard
 // Sorts monitors before entering
-void ?{}( monitor_dtor_guard_t & this, $monitor * m [], fptr_t func ) {
+void ?{}( monitor_dtor_guard_t & this, $monitor * m [], fptr_t func, bool join ) {
 	// optimization
 	$thread * thrd = TL_GET( this_thread );
@@ -376,8 +406,11 @@
 	this.prev = thrd->monitors;
 
+	// Save whether we are in a join or not
+	this.join = join;
+
 	// Update thread context (needed for conditions)
 	(thrd->monitors){m, 1, func};
 
-	__dtor_enter( this.m, func );
+	__dtor_enter( this.m, func, join );
 }
 
@@ -385,5 +418,5 @@
 void ^?{}( monitor_dtor_guard_t & this ) {
 	// Leave the monitors in order
-	__dtor_leave( this.m );
+	__dtor_leave( this.m, this.join );
 
 	// Restore thread context
@@ -460,9 +493,9 @@
 	// Wake the threads
 	for(int i = 0; i < thread_count; i++) {
-		unpark( threads[i] __cfaabi_dbg_ctx2 );
+		unpark( threads[i] );
 	}
 
 	// Everything is ready to go to sleep
-	park( __cfaabi_dbg_ctx );
+	park();
 
 	// We are back, restore the owners and recursions
@@ -542,8 +575,8 @@
 
 	// unpark the thread we signalled
-	unpark( signallee __cfaabi_dbg_ctx2 );
+	unpark( signallee );
 
 	//Everything is ready to go to sleep
-	park( __cfaabi_dbg_ctx );
+	park();
 
 
@@ -646,8 +679,8 @@
 
 				// unpark the thread we signalled
-				unpark( next __cfaabi_dbg_ctx2 );
+				unpark( next );
 
 				//Everything is ready to go to sleep
-				park( __cfaabi_dbg_ctx );
+				park();
 
 				// We are back, restore the owners and recursions
@@ -691,5 +724,5 @@
 
 	//Everything is ready to go to sleep
-	park( __cfaabi_dbg_ctx );
+	park();
 
 
Index: libcfa/src/concurrency/monitor.hfa
===================================================================
--- libcfa/src/concurrency/monitor.hfa	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/concurrency/monitor.hfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -53,7 +53,8 @@
 	$monitor *    m;
 	__monitor_group_t prev;
+	bool join;
 };
 
-void ?{}( monitor_dtor_guard_t & this, $monitor ** m, void (*func)() );
+void ?{}( monitor_dtor_guard_t & this, $monitor ** m, void (*func)(), bool join );
 void ^?{}( monitor_dtor_guard_t & this );
 
Index: libcfa/src/concurrency/mutex.cfa
===================================================================
--- libcfa/src/concurrency/mutex.cfa	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/concurrency/mutex.cfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -42,5 +42,5 @@
 		append( blocked_threads, kernelTLS.this_thread );
 		unlock( lock );
-		park( __cfaabi_dbg_ctx );
+		park();
 	}
 	else {
@@ -65,5 +65,5 @@
 	this.is_locked = (this.blocked_threads != 0);
 	unpark(
-		pop_head( this.blocked_threads ) __cfaabi_dbg_ctx2
+		pop_head( this.blocked_threads )
 	);
 	unlock( this.lock );
@@ -97,5 +97,5 @@
 		append( blocked_threads, kernelTLS.this_thread );
 		unlock( lock );
-		park( __cfaabi_dbg_ctx );
+		park();
 	}
 }
@@ -124,5 +124,5 @@
 		owner = thrd;
 		recursion_count = (thrd ? 1 : 0);
-		unpark( thrd __cfaabi_dbg_ctx2 );
+		unpark( thrd );
 	}
 	unlock( lock );
@@ -142,5 +142,5 @@
 	lock( lock __cfaabi_dbg_ctx2 );
 	unpark(
-		pop_head( this.blocked_threads ) __cfaabi_dbg_ctx2
+		pop_head( this.blocked_threads )
 	);
 	unlock( lock );
@@ -151,5 +151,5 @@
 	while(this.blocked_threads) {
 		unpark(
-			pop_head( this.blocked_threads ) __cfaabi_dbg_ctx2
+			pop_head( this.blocked_threads )
 		);
 	}
@@ -161,5 +161,5 @@
 	append( this.blocked_threads, kernelTLS.this_thread );
 	unlock( this.lock );
-	park( __cfaabi_dbg_ctx );
+	park();
 }
 
@@ -170,5 +170,5 @@
 	unlock(l);
 	unlock(this.lock);
-	park( __cfaabi_dbg_ctx );
+	park();
 	lock(l);
 }
Index: libcfa/src/concurrency/preemption.cfa
===================================================================
--- libcfa/src/concurrency/preemption.cfa	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/concurrency/preemption.cfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -274,5 +274,5 @@
 		kernelTLS.this_stats = this->curr_cluster->stats;
 	#endif
-	__unpark( id, this __cfaabi_dbg_ctx2 );
+	__unpark( id, this );
 }
 
@@ -411,4 +411,5 @@
 static void * alarm_loop( __attribute__((unused)) void * args ) {
 	__processor_id_t id;
+	id.full_proc = false;
 	id.id = doregister(&id);
 
Index: libcfa/src/concurrency/thread.cfa
===================================================================
--- libcfa/src/concurrency/thread.cfa	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/concurrency/thread.cfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -39,4 +39,7 @@
 	link.prev = 0p;
 	link.preferred = -1;
+	#if defined( __CFA_WITH_VERIFY__ )
+		canary = 0x0D15EA5E0D15EA5E;
+	#endif
 
 	node.next = 0p;
@@ -48,4 +51,7 @@
 
 void ^?{}($thread& this) with( this ) {
+	#if defined( __CFA_WITH_VERIFY__ )
+		canary = 0xDEADDEADDEADDEAD;
+	#endif
 	unregister(curr_cluster, this);
 	^self_cor{};
Index: libcfa/src/concurrency/thread.hfa
===================================================================
--- libcfa/src/concurrency/thread.hfa	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/concurrency/thread.hfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -88,13 +88,13 @@
 //----------
 // Park thread: block until corresponding call to unpark, won't block if unpark is already called
-void park( __cfaabi_dbg_ctx_param );
+void park( void );
 
 //----------
 // Unpark a thread, if the thread is already blocked, schedule it
 //                  if the thread is not yet block, signal that it should rerun immediately
-void unpark( $thread * this __cfaabi_dbg_ctx_param2 );
+void unpark( $thread * this );
 
 forall( dtype T | is_thread(T) )
-static inline void unpark( T & this __cfaabi_dbg_ctx_param2 ) { if(!&this) return; unpark( get_thread( this ) __cfaabi_dbg_ctx_fwd2 );}
+static inline void unpark( T & this ) { if(!&this) return; unpark( get_thread( this ) );}
 
 //----------
@@ -106,4 +106,9 @@
 void sleep( Duration duration );
 
+//----------
+// join
+forall( dtype T | is_thread(T) )
+T & join( T & this );
+
 // Local Variables: //
 // mode: c //
Index: libcfa/src/exception.h
===================================================================
--- libcfa/src/exception.h	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/exception.h	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -76,22 +76,22 @@
 // implemented in the .c file either so they all have to be inline.
 
-trait is_exception(dtype T) {
+trait is_exception(dtype exceptT) {
 	/* The first field must be a pointer to a virtual table.
 	 * That virtual table must be a decendent of the base exception virtual tab$
 	 */
-	void mark_exception(T *);
+	void mark_exception(exceptT *);
 	// This is never used and should be a no-op.
 };
 
-trait is_termination_exception(dtype T | is_exception(T)) {
-	void defaultTerminationHandler(T &);
+trait is_termination_exception(dtype exceptT | is_exception(exceptT)) {
+	void defaultTerminationHandler(exceptT &);
 };
 
-trait is_resumption_exception(dtype T | is_exception(T)) {
-	void defaultResumptionHandler(T &);
+trait is_resumption_exception(dtype exceptT | is_exception(exceptT)) {
+	void defaultResumptionHandler(exceptT &);
 };
 
-forall(dtype T | is_termination_exception(T))
-static inline void $throw(T & except) {
+forall(dtype exceptT | is_termination_exception(exceptT))
+static inline void $throw(exceptT & except) {
 	__cfaehm_throw_terminate(
 		(exception_t *)&except,
@@ -100,6 +100,6 @@
 }
 
-forall(dtype T | is_resumption_exception(T))
-static inline void $throwResume(T & except) {
+forall(dtype exceptT | is_resumption_exception(exceptT))
+static inline void $throwResume(exceptT & except) {
 	__cfaehm_throw_resume(
 		(exception_t *)&except,
@@ -108,16 +108,16 @@
 }
 
-forall(dtype T | is_exception(T))
-static inline void cancel_stack(T & except) __attribute__((noreturn)) {
+forall(dtype exceptT | is_exception(exceptT))
+static inline void cancel_stack(exceptT & except) __attribute__((noreturn)) {
 	__cfaehm_cancel_stack( (exception_t *)&except );
 }
 
-forall(dtype T | is_exception(T))
-static inline void defaultTerminationHandler(T & except) {
+forall(dtype exceptT | is_exception(exceptT))
+static inline void defaultTerminationHandler(exceptT & except) {
 	return cancel_stack( except );
 }
 
-forall(dtype T | is_exception(T))
-static inline void defaultResumptionHandler(T & except) {
+forall(dtype exceptT | is_exception(exceptT))
+static inline void defaultResumptionHandler(exceptT & except) {
 	throw except;
 }
Index: libcfa/src/exception.hfa
===================================================================
--- libcfa/src/exception.hfa	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/exception.hfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -192,5 +192,5 @@
 		size_t size; \
 		void (*copy)(exception_name * this, exception_name * other); \
-		void (*free)(exception_name & this); \
+		void (*^?{})(exception_name & this); \
 		const char * (*msg)(exception_name * this); \
 		_CLOSE
@@ -213,5 +213,5 @@
 		size_t size; \
 		void (*copy)(exception_name parameters * this, exception_name parameters * other); \
-		void (*free)(exception_name parameters & this); \
+		void (*^?{})(exception_name parameters & this); \
 		const char * (*msg)(exception_name parameters * this); \
 		_CLOSE
Index: libcfa/src/heap.cfa
===================================================================
--- libcfa/src/heap.cfa	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/heap.cfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -10,6 +10,6 @@
 // Created On       : Tue Dec 19 21:58:35 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Thu Sep  3 16:22:54 2020
-// Update Count     : 943
+// Last Modified On : Mon Sep  7 22:17:46 2020
+// Update Count     : 957
 //
 
@@ -889,8 +889,8 @@
 		size_t bsize, oalign;
 		headers( "resize", oaddr, header, freeElem, bsize, oalign );
-
 		size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
+
 		// same size, DO NOT preserve STICKY PROPERTIES.
-		if ( oalign <= libAlign() && size <= odsize && odsize <= size * 2 ) { // allow 50% wasted storage for smaller size
+		if ( oalign == libAlign() && size <= odsize && odsize <= size * 2 ) { // allow 50% wasted storage for smaller size
 			header->kind.real.blockSize &= -2;			// no alignment and turn off 0 fill
 			header->kind.real.size = size;				// reset allocation size
@@ -931,6 +931,6 @@
 		size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
 		size_t osize = header->kind.real.size;			// old allocation size
-		bool ozfill = (header->kind.real.blockSize & 2) != 0; // old allocation zero filled
-	  if ( unlikely( size <= odsize ) && size > odsize / 2 ) { // allow up to 50% wasted storage
+		bool ozfill = (header->kind.real.blockSize & 2); // old allocation zero filled
+	  if ( unlikely( size <= odsize ) && odsize <= size * 2 ) { // allow up to 50% wasted storage
 	  		header->kind.real.size = size;				// reset allocation size
 	  		if ( unlikely( ozfill ) && size > osize ) {	// previous request zero fill and larger ?
@@ -947,5 +947,5 @@
 
 		void * naddr;
-		if ( likely( oalign <= libAlign() ) ) {			// previous request not aligned ?
+		if ( likely( oalign == libAlign() ) ) {			// previous request not aligned ?
 			naddr = mallocNoStats( size );				// create new area
 		} else {
@@ -1231,28 +1231,31 @@
 	} // if
 
-	// Attempt to reuse existing storage.
+	// Attempt to reuse existing alignment.
 	HeapManager.Storage.Header * header = headerAddr( oaddr );
-	bool isFakeHeader = header->kind.fake.alignment & 1 == 1;	// old fake header ?
-	if ( unlikely ( ( isFakeHeader &&
-				 (uintptr_t)oaddr % nalign == 0 && 				// lucky match ?
-				 header->kind.fake.alignment <= nalign &&		// ok to leave LSB at 1
-			 	 nalign <= 128 )								// not too much alignment storage wasted ?
-			||   ( (!isFakeHeader) &&							// old real header ( aligned on libAlign ) ?
-				 nalign == libAlign() ) ) ) {					// new alignment also on libAlign
-
-		HeapManager.FreeHeader * freeElem;
-		size_t bsize, oalign;
-		headers( "resize", oaddr, header, freeElem, bsize, oalign );
-		size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
-
-		if ( size <= odsize && odsize <= size * 2 ) { // allow 50% wasted data storage
-			if ( isFakeHeader ) {
+	bool isFakeHeader = header->kind.fake.alignment & 1; // old fake header ?
+	size_t oalign;
+	if ( isFakeHeader ) {
+		oalign = header->kind.fake.alignment & -2;		// old alignment
+		if ( (uintptr_t)oaddr % nalign == 0				// lucky match ?
+			 && ( oalign <= nalign						// going down
+				  || (oalign >= nalign && oalign <= 256) ) // little alignment storage wasted ?
+			) {
+			headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
+			HeapManager.FreeHeader * freeElem;
+			size_t bsize, oalign;
+			headers( "resize", oaddr, header, freeElem, bsize, oalign );
+			size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
+
+			if ( size <= odsize && odsize <= size * 2 ) { // allow 50% wasted data storage
 				headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
-			}
-
-			header->kind.real.blockSize &= -2;		// turn off 0 fill
-			header->kind.real.size = size;			// reset allocation size
-			return oaddr;
-		} // if
+
+				header->kind.real.blockSize &= -2;		// turn off 0 fill
+				header->kind.real.size = size;			// reset allocation size
+				return oaddr;
+			} // if
+		} // if
+	} else if ( ! isFakeHeader							// old real header (aligned on libAlign) ?
+				&& nalign == libAlign() ) {				// new alignment also on libAlign => no fake header needed
+		return resize( oaddr, size );					// duplicate special case checks
 	} // if
 
@@ -1284,26 +1287,20 @@
 	} // if
 
-	HeapManager.Storage.Header * header;
-	HeapManager.FreeHeader * freeElem;
-	size_t bsize, oalign;
-	headers( "realloc", oaddr, header, freeElem, bsize, oalign );
-
-	// Attempt to reuse existing storage.
-	bool isFakeHeader = header->kind.fake.alignment & 1 == 1;	// old fake header ?
-	if ( unlikely ( ( isFakeHeader &&
-				 (uintptr_t)oaddr % nalign == 0 && 				// lucky match ?
-				 header->kind.fake.alignment <= nalign &&		// ok to leave LSB at 1
-			 	 nalign <= 128 )								// not too much alignment storage wasted ?
-			||   ( (!isFakeHeader) &&							// old real header ( aligned on libAlign ) ?
-				 nalign == libAlign() ) ) ) {					// new alignment also on libAlign
-
-		if ( isFakeHeader ) {
+	// Attempt to reuse existing alignment.
+	HeapManager.Storage.Header * header = headerAddr( oaddr );
+	bool isFakeHeader = header->kind.fake.alignment & 1; // old fake header ?
+	size_t oalign;
+	if ( isFakeHeader ) {
+		oalign = header->kind.fake.alignment & -2;		// old alignment
+		if ( (uintptr_t)oaddr % nalign == 0				// lucky match ?
+			 && ( oalign <= nalign						// going down
+				  || (oalign >= nalign && oalign <= 256) ) // little alignment storage wasted ?
+			) {
 			headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
-		}
-		return realloc( oaddr, size );
-
-	} // if
-
-	// change size and copy old content to new storage
+			return realloc( oaddr, size );				// duplicate alignment and special case checks
+		} // if
+	} else if ( ! isFakeHeader							// old real header (aligned on libAlign) ?
+				&& nalign == libAlign() )				// new alignment also on libAlign => no fake header needed
+		return realloc( oaddr, size );					// duplicate alignment and special case checks
 
 	#ifdef __STATISTICS__
@@ -1312,6 +1309,12 @@
 	#endif // __STATISTICS__
 
+	HeapManager.FreeHeader * freeElem;
+	size_t bsize;
+	headers( "realloc", oaddr, header, freeElem, bsize, oalign );
+
+	// change size and copy old content to new storage
+
 	size_t osize = header->kind.real.size;				// old allocation size
-	bool ozfill = (header->kind.real.blockSize & 2) != 0; // old allocation zero filled
+	bool ozfill = (header->kind.real.blockSize & 2);	// old allocation zero filled
 
 	void * naddr = memalignNoStats( nalign, size );		// create new aligned area
Index: libcfa/src/limits.cfa
===================================================================
--- libcfa/src/limits.cfa	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/limits.cfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -10,6 +10,6 @@
 // Created On       : Wed Apr  6 18:06:52 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Thu Mar  1 16:22:51 2018
-// Update Count     : 74
+// Last Modified On : Wed Sep 30 22:56:32 2020
+// Update Count     : 76
 //
 
@@ -23,131 +23,131 @@
 // Integral Constants
 
-const signed char MIN = SCHAR_MIN;
-const unsigned char MIN = 0;
-const short int MIN = SHRT_MIN;
-const unsigned short int MIN = 0;
-const int MIN = INT_MIN;
-const unsigned int MIN = 0;
-const long int MIN = LONG_MIN;
-const unsigned long int MIN = 0;
-const long long int MIN = LLONG_MIN;
-const unsigned long long int MIN = 0;
+signed char MIN = SCHAR_MIN;
+unsigned char MIN = 0;
+short int MIN = SHRT_MIN;
+unsigned short int MIN = 0;
+int MIN = INT_MIN;
+unsigned int MIN = 0;
+long int MIN = LONG_MIN;
+unsigned long int MIN = 0;
+long long int MIN = LLONG_MIN;
+unsigned long long int MIN = 0;
 
-const signed char MAX = SCHAR_MAX;
-const unsigned char MAX = UCHAR_MAX;
-const short int MAX = SHRT_MAX;
-const unsigned short int MAX = USHRT_MAX;
-const int MAX = INT_MAX;
-const unsigned int MAX = UINT_MAX;
-const long int MAX = LONG_MAX;
-const unsigned long int MAX = ULONG_MAX;
-const long long int MAX = LLONG_MAX;
-const unsigned long long int MAX = ULLONG_MAX;
+signed char MAX = SCHAR_MAX;
+unsigned char MAX = UCHAR_MAX;
+short int MAX = SHRT_MAX;
+unsigned short int MAX = USHRT_MAX;
+int MAX = INT_MAX;
+unsigned int MAX = UINT_MAX;
+long int MAX = LONG_MAX;
+unsigned long int MAX = ULONG_MAX;
+long long int MAX = LLONG_MAX;
+unsigned long long int MAX = ULLONG_MAX;
 
 // Floating-Point Constants
 
-const float MIN = FLT_MIN;
-const double MIN = DBL_MIN;
-const long double MIN = LDBL_MIN;
-const float _Complex MIN = __FLT_MIN__ + __FLT_MIN__ * I;
-const double _Complex MIN = DBL_MIN +  DBL_MIN * I;
-const long double _Complex MIN = LDBL_MIN + LDBL_MIN * I;
+float MIN = FLT_MIN;
+double MIN = DBL_MIN;
+long double MIN = LDBL_MIN;
+float _Complex MIN = __FLT_MIN__ + __FLT_MIN__ * I;
+double _Complex MIN = DBL_MIN +  DBL_MIN * I;
+long double _Complex MIN = LDBL_MIN + LDBL_MIN * I;
 
-const float MAX = FLT_MAX;
-const double MAX = DBL_MAX;
-const long double MAX = LDBL_MAX;
-const float _Complex MAX = FLT_MAX + FLT_MAX * I;
-const double _Complex MAX = DBL_MAX + DBL_MAX * I;
-const long double _Complex MAX = LDBL_MAX + LDBL_MAX * I;
+float MAX = FLT_MAX;
+double MAX = DBL_MAX;
+long double MAX = LDBL_MAX;
+float _Complex MAX = FLT_MAX + FLT_MAX * I;
+double _Complex MAX = DBL_MAX + DBL_MAX * I;
+long double _Complex MAX = LDBL_MAX + LDBL_MAX * I;
 
-const float PI = (float)M_PI;							// pi
-const float PI_2 = (float)M_PI_2;						// pi / 2
-const float PI_4 = (float)M_PI_4;						// pi / 4
-const float _1_PI = (float)M_1_PI;						// 1 / pi
-const float _2_PI = (float)M_2_PI;						// 2 / pi
-const float _2_SQRT_PI = (float)M_2_SQRTPI;				// 2 / sqrt(pi)
+float PI = (float)M_PI;									// pi
+float PI_2 = (float)M_PI_2;								// pi / 2
+float PI_4 = (float)M_PI_4;								// pi / 4
+float _1_PI = (float)M_1_PI;							// 1 / pi
+float _2_PI = (float)M_2_PI;							// 2 / pi
+float _2_SQRT_PI = (float)M_2_SQRTPI;					// 2 / sqrt(pi)
 
-const double PI = M_PI;									// pi
-const double PI_2 = M_PI_2;								// pi / 2
-const double PI_4 = M_PI_4;								// pi / 4
-const double _1_PI = M_1_PI;							// 1 / pi
-const double _2_PI = M_2_PI;							// 2 / pi
-const double _2_SQRT_PI = M_2_SQRTPI;					// 2 / sqrt(pi)
+double PI = M_PI;										// pi
+double PI_2 = M_PI_2;									// pi / 2
+double PI_4 = M_PI_4;									// pi / 4
+double _1_PI = M_1_PI;									// 1 / pi
+double _2_PI = M_2_PI;									// 2 / pi
+double _2_SQRT_PI = M_2_SQRTPI;							// 2 / sqrt(pi)
 
-const long double PI = M_PIl;							// pi
-const long double PI_2 = M_PI_2l;						// pi / 2
-const long double PI_4 = M_PI_4l;						// pi / 4
-const long double _1_PI = M_1_PIl;						// 1 / pi
-const long double _2_PI = M_2_PIl;						// 2 / pi
-const long double _2_SQRT_PI = M_2_SQRTPIl;				// 2 / sqrt(pi)
+long double PI = M_PIl;									// pi
+long double PI_2 = M_PI_2l;								// pi / 2
+long double PI_4 = M_PI_4l;								// pi / 4
+long double _1_PI = M_1_PIl;							// 1 / pi
+long double _2_PI = M_2_PIl;							// 2 / pi
+long double _2_SQRT_PI = M_2_SQRTPIl;					// 2 / sqrt(pi)
 
-const float _Complex PI = (float)M_PI + 0.0_iF;			// pi
-const float _Complex PI_2 = (float)M_PI_2 + 0.0_iF;		// pi / 2
-const float _Complex PI_4 = (float)M_PI_4 + 0.0_iF;		// pi / 4
-const float _Complex _1_PI = (float)M_1_PI + 0.0_iF;	// 1 / pi
-const float _Complex _2_PI = (float)M_2_PI + 0.0_iF;	// 2 / pi
-const float _Complex _2_SQRT_PI = (float)M_2_SQRTPI + 0.0_iF; // 2 / sqrt(pi)
+float _Complex PI = (float)M_PI + 0.0_iF;				// pi
+float _Complex PI_2 = (float)M_PI_2 + 0.0_iF;			// pi / 2
+float _Complex PI_4 = (float)M_PI_4 + 0.0_iF;			// pi / 4
+float _Complex _1_PI = (float)M_1_PI + 0.0_iF;			// 1 / pi
+float _Complex _2_PI = (float)M_2_PI + 0.0_iF;			// 2 / pi
+float _Complex _2_SQRT_PI = (float)M_2_SQRTPI + 0.0_iF; // 2 / sqrt(pi)
 
-const double _Complex PI = M_PI + 0.0_iD;				// pi
-const double _Complex PI_2 = M_PI_2 + 0.0_iD;			// pi / 2
-const double _Complex PI_4 = M_PI_4 + 0.0_iD;			// pi / 4
-const double _Complex _1_PI = M_1_PI + 0.0_iD;			// 1 / pi
-const double _Complex _2_PI = M_2_PI + 0.0_iD;			// 2 / pi
-const double _Complex _2_SQRT_PI = M_2_SQRTPI + 0.0_iD;	// 2 / sqrt(pi)
+double _Complex PI = M_PI + 0.0_iD;						// pi
+double _Complex PI_2 = M_PI_2 + 0.0_iD;					// pi / 2
+double _Complex PI_4 = M_PI_4 + 0.0_iD;					// pi / 4
+double _Complex _1_PI = M_1_PI + 0.0_iD;				// 1 / pi
+double _Complex _2_PI = M_2_PI + 0.0_iD;				// 2 / pi
+double _Complex _2_SQRT_PI = M_2_SQRTPI + 0.0_iD;		// 2 / sqrt(pi)
 
-const long double _Complex PI = M_PIl + 0.0_iL;			// pi
-const long double _Complex PI_2 = M_PI_2l + 0.0_iL;		// pi / 2
-const long double _Complex PI_4 = M_PI_4l + 0.0_iL;		// pi / 4
-const long double _Complex _1_PI = M_1_PIl + 0.0_iL;	// 1 / pi
-const long double _Complex _2_PI = M_2_PIl + 0.0_iL;	// 2 / pi
-const long double _Complex _2_SQRT_PI = M_2_SQRTPIl + 0.0_iL; // 2 / sqrt(pi)
+long double _Complex PI = M_PIl + 0.0_iL;				// pi
+long double _Complex PI_2 = M_PI_2l + 0.0_iL;			// pi / 2
+long double _Complex PI_4 = M_PI_4l + 0.0_iL;			// pi / 4
+long double _Complex _1_PI = M_1_PIl + 0.0_iL;			// 1 / pi
+long double _Complex _2_PI = M_2_PIl + 0.0_iL;			// 2 / pi
+long double _Complex _2_SQRT_PI = M_2_SQRTPIl + 0.0_iL; // 2 / sqrt(pi)
 
-const float E = (float)M_E;								// e
-const float LOG2_E = (float)M_LOG2E;					// log_2(e)
-const float LOG10_E = (float)M_LOG10E;					// log_10(e)
-const float LN_2 = (float)M_LN2;						// log_e(2)
-const float LN_10 = (float)M_LN10;						// log_e(10)
-const float SQRT_2 = (float)M_SQRT2;					// sqrt(2)
-const float _1_SQRT_2 = (float)M_SQRT1_2;				// 1 / sqrt(2)
+float E = (float)M_E;									// e
+float LOG2_E = (float)M_LOG2E;							// log_2(e)
+float LOG10_E = (float)M_LOG10E;						// log_10(e)
+float LN_2 = (float)M_LN2;								// log_e(2)
+float LN_10 = (float)M_LN10;							// log_e(10)
+float SQRT_2 = (float)M_SQRT2;							// sqrt(2)
+float _1_SQRT_2 = (float)M_SQRT1_2;						// 1 / sqrt(2)
 
-const double E = M_E;									// e
-const double LOG2_E = M_LOG2E;							// log_2(e)
-const double LOG10_E = M_LOG10E;						// log_10(e)
-const double LN_2 = M_LN2;								// log_e(2)
-const double LN_10 = M_LN10;							// log_e(10)
-const double SQRT_2 = M_SQRT2;							// sqrt(2)
-const double _1_SQRT_2 = M_SQRT1_2;						// 1 / sqrt(2)
+double E = M_E;											// e
+double LOG2_E = M_LOG2E;								// log_2(e)
+double LOG10_E = M_LOG10E;								// log_10(e)
+double LN_2 = M_LN2;									// log_e(2)
+double LN_10 = M_LN10;									// log_e(10)
+double SQRT_2 = M_SQRT2;								// sqrt(2)
+double _1_SQRT_2 = M_SQRT1_2;							// 1 / sqrt(2)
 
-const long double E = M_El;								// e
-const long double LOG2_E = M_LOG2El;					// log_2(e)
-const long double LOG10_E = M_LOG10El;					// log_10(e)
-const long double LN_2 = M_LN2l;						// log_e(2)
-const long double LN_10 = M_LN10l;						// log_e(10)
-const long double SQRT_2 = M_SQRT2l;					// sqrt(2)
-const long double _1_SQRT_2 = M_SQRT1_2l;				// 1 / sqrt(2)
+long double E = M_El;									// e
+long double LOG2_E = M_LOG2El;							// log_2(e)
+long double LOG10_E = M_LOG10El;						// log_10(e)
+long double LN_2 = M_LN2l;								// log_e(2)
+long double LN_10 = M_LN10l;							// log_e(10)
+long double SQRT_2 = M_SQRT2l;							// sqrt(2)
+long double _1_SQRT_2 = M_SQRT1_2l;						// 1 / sqrt(2)
 
-const float _Complex E = M_E + 0.0_iF;					// e
-const float _Complex LOG2_E = M_LOG2E + 0.0_iF;			// log_2(e)
-const float _Complex LOG10_E = M_LOG10E + 0.0_iF;		// log_10(e)
-const float _Complex LN_2 = M_LN2 + 0.0_iF;				// log_e(2)
-const float _Complex LN_10 = M_LN10 + 0.0_iF;			// log_e(10)
-const float _Complex SQRT_2 = M_SQRT2 + 0.0_iF;			// sqrt(2)
-const float _Complex _1_SQRT_2 = M_SQRT1_2 + 0.0_iF;	// 1 / sqrt(2)
+float _Complex E = M_E + 0.0_iF;						// e
+float _Complex LOG2_E = M_LOG2E + 0.0_iF;				// log_2(e)
+float _Complex LOG10_E = M_LOG10E + 0.0_iF;				// log_10(e)
+float _Complex LN_2 = M_LN2 + 0.0_iF;					// log_e(2)
+float _Complex LN_10 = M_LN10 + 0.0_iF;					// log_e(10)
+float _Complex SQRT_2 = M_SQRT2 + 0.0_iF;				// sqrt(2)
+float _Complex _1_SQRT_2 = M_SQRT1_2 + 0.0_iF;			// 1 / sqrt(2)
 
-const double _Complex E = M_E + 0.0_iD;					// e
-const double _Complex LOG2_E = M_LOG2E + 0.0_iD;		// log_2(e)
-const double _Complex LOG10_E = M_LOG10E + 0.0_iD;		// log_10(e)
-const double _Complex LN_2 = M_LN2 + 0.0_iD;			// log_e(2)
-const double _Complex LN_10 = M_LN10 + 0.0_iD;			// log_e(10)
-const double _Complex SQRT_2 = M_SQRT2 + 0.0_iD;		// sqrt(2)
-const double _Complex _1_SQRT_2 = M_SQRT1_2 + 0.0_iD;	// 1 / sqrt(2)
+double _Complex E = M_E + 0.0_iD;						// e
+double _Complex LOG2_E = M_LOG2E + 0.0_iD;				// log_2(e)
+double _Complex LOG10_E = M_LOG10E + 0.0_iD;			// log_10(e)
+double _Complex LN_2 = M_LN2 + 0.0_iD;					// log_e(2)
+double _Complex LN_10 = M_LN10 + 0.0_iD;				// log_e(10)
+double _Complex SQRT_2 = M_SQRT2 + 0.0_iD;				// sqrt(2)
+double _Complex _1_SQRT_2 = M_SQRT1_2 + 0.0_iD;			// 1 / sqrt(2)
 
-const long double _Complex E = M_El + 0.0_iL;			// e
-const long double _Complex LOG2_E = M_LOG2El + 0.0_iL;	// log_2(e)
-const long double _Complex LOG10_E = M_LOG10El + 0.0_iL; // log_10(e)
-const long double _Complex LN_2 = M_LN2l + 0.0_iL;		// log_e(2)
-const long double _Complex LN_10 = M_LN10l + 0.0_iL;	// log_e(10)
-const long double _Complex SQRT_2 = M_SQRT2l + 0.0_iL;	// sqrt(2)
-const long double _Complex _1_SQRT_2 = M_SQRT1_2l + 0.0_iL; // 1 / sqrt(2)
+long double _Complex E = M_El + 0.0_iL;					// e
+long double _Complex LOG2_E = M_LOG2El + 0.0_iL;		// log_2(e)
+long double _Complex LOG10_E = M_LOG10El + 0.0_iL;		// log_10(e)
+long double _Complex LN_2 = M_LN2l + 0.0_iL;			// log_e(2)
+long double _Complex LN_10 = M_LN10l + 0.0_iL;			// log_e(10)
+long double _Complex SQRT_2 = M_SQRT2l + 0.0_iL;		// sqrt(2)
+long double _Complex _1_SQRT_2 = M_SQRT1_2l + 0.0_iL;	// 1 / sqrt(2)
 
 // Local Variables: //
Index: libcfa/src/limits.hfa
===================================================================
--- libcfa/src/limits.hfa	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/limits.hfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -10,6 +10,6 @@
 // Created On       : Wed Apr  6 18:06:52 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Thu Mar  1 16:20:54 2018
-// Update Count     : 13
+// Last Modified On : Wed Sep 30 22:56:35 2020
+// Update Count     : 15
 //
 
@@ -18,131 +18,131 @@
 // Integral Constants
 
-extern const signed char MIN;
-extern const unsigned char MIN;
-extern const short int MIN;
-extern const unsigned short int MIN;
-extern const int MIN;
-extern const unsigned int MIN;
-extern const long int MIN;
-extern const unsigned long int MIN;
-extern const long long int MIN;
-extern const unsigned long long int MIN;
+extern signed char MIN;
+extern unsigned char MIN;
+extern short int MIN;
+extern unsigned short int MIN;
+extern int MIN;
+extern unsigned int MIN;
+extern long int MIN;
+extern unsigned long int MIN;
+extern long long int MIN;
+extern unsigned long long int MIN;
 
-extern const signed char MAX;
-extern const unsigned char MAX;
-extern const short int MAX;
-extern const unsigned short int MAX;
-extern const int MAX;
-extern const unsigned int MAX;
-extern const long int MAX;
-extern const unsigned long int MAX;
-extern const long long int MAX;
-extern const unsigned long long int MAX;
+extern signed char MAX;
+extern unsigned char MAX;
+extern short int MAX;
+extern unsigned short int MAX;
+extern int MAX;
+extern unsigned int MAX;
+extern long int MAX;
+extern unsigned long int MAX;
+extern long long int MAX;
+extern unsigned long long int MAX;
 
 // Floating-Point Constants
 
-extern const float MIN;
-extern const double MIN;
-extern const long double MIN;
-extern const float _Complex MIN;
-extern const double _Complex MIN;
-extern const long double _Complex MIN;
+extern float MIN;
+extern double MIN;
+extern long double MIN;
+extern float _Complex MIN;
+extern double _Complex MIN;
+extern long double _Complex MIN;
 
-extern const float MAX;
-extern const double MAX;
-extern const long double MAX;
-extern const float _Complex MAX;
-extern const double _Complex MAX;
-extern const long double _Complex MAX;
+extern float MAX;
+extern double MAX;
+extern long double MAX;
+extern float _Complex MAX;
+extern double _Complex MAX;
+extern long double _Complex MAX;
 
-extern const float PI;									// pi
-extern const float PI_2;								// pi / 2
-extern const float PI_4;								// pi / 4
-extern const float _1_PI;								// 1 / pi
-extern const float _2_PI;								// 2 / pi
-extern const float _2_SQRT_PI;							// 2 / sqrt(pi)
+extern float PI;										// pi
+extern float PI_2;										// pi / 2
+extern float PI_4;										// pi / 4
+extern float _1_PI;										// 1 / pi
+extern float _2_PI;										// 2 / pi
+extern float _2_SQRT_PI;								// 2 / sqrt(pi)
 
-extern const double PI;									// pi
-extern const double PI_2;								// pi / 2
-extern const double PI_4;								// pi / 4
-extern const double _1_PI;								// 1 / pi
-extern const double _2_PI;								// 2 / pi
-extern const double _2_SQRT_PI;							// 2 / sqrt(pi)
+extern double PI;										// pi
+extern double PI_2;										// pi / 2
+extern double PI_4;										// pi / 4
+extern double _1_PI;									// 1 / pi
+extern double _2_PI;									// 2 / pi
+extern double _2_SQRT_PI;								// 2 / sqrt(pi)
 
-extern const long double PI;							// pi
-extern const long double PI_2;							// pi / 2
-extern const long double PI_4;							// pi / 4
-extern const long double _1_PI;							// 1 / pi
-extern const long double _2_PI;							// 2 / pi
-extern const long double _2_SQRT_PI;					// 2 / sqrt(pi)
+extern long double PI;									// pi
+extern long double PI_2;								// pi / 2
+extern long double PI_4;								// pi / 4
+extern long double _1_PI;								// 1 / pi
+extern long double _2_PI;								// 2 / pi
+extern long double _2_SQRT_PI;							// 2 / sqrt(pi)
 
-extern const float _Complex PI;							// pi
-extern const float _Complex PI_2;						// pi / 2
-extern const float _Complex PI_4;						// pi / 4
-extern const float _Complex _1_PI;						// 1 / pi
-extern const float _Complex _2_PI;						// 2 / pi
-extern const float _Complex _2_SQRT_PI;					// 2 / sqrt(pi)
+extern float _Complex PI;								// pi
+extern float _Complex PI_2;								// pi / 2
+extern float _Complex PI_4;								// pi / 4
+extern float _Complex _1_PI;							// 1 / pi
+extern float _Complex _2_PI;							// 2 / pi
+extern float _Complex _2_SQRT_PI;						// 2 / sqrt(pi)
 
-extern const double _Complex PI;						// pi
-extern const double _Complex PI_2;						// pi / 2
-extern const double _Complex PI_4;						// pi / 4
-extern const double _Complex _1_PI;						// 1 / pi
-extern const double _Complex _2_PI;						// 2 / pi
-extern const double _Complex _2_SQRT_PI;				// 2 / sqrt(pi)
+extern double _Complex PI;								// pi
+extern double _Complex PI_2;							// pi / 2
+extern double _Complex PI_4;							// pi / 4
+extern double _Complex _1_PI;							// 1 / pi
+extern double _Complex _2_PI;							// 2 / pi
+extern double _Complex _2_SQRT_PI;						// 2 / sqrt(pi)
 
-extern const long double _Complex PI;					// pi
-extern const long double _Complex PI_2;					// pi / 2
-extern const long double _Complex PI_4;					// pi / 4
-extern const long double _Complex _1_PI;				// 1 / pi
-extern const long double _Complex _2_PI;				// 2 / pi
-extern const long double _Complex _2_SQRT_PI;			// 2 / sqrt(pi)
+extern long double _Complex PI;							// pi
+extern long double _Complex PI_2;						// pi / 2
+extern long double _Complex PI_4;						// pi / 4
+extern long double _Complex _1_PI;						// 1 / pi
+extern long double _Complex _2_PI;						// 2 / pi
+extern long double _Complex _2_SQRT_PI;					// 2 / sqrt(pi)
 
-extern const float E;									// e
-extern const float LOG2_E;								// log_2(e)
-extern const float LOG10_E;								// log_10(e)
-extern const float LN_2;								// log_e(2)
-extern const float LN_10;								// log_e(10)
-extern const float SQRT_2;								// sqrt(2)
-extern const float _1_SQRT_2;							// 1 / sqrt(2)
+extern float E;											// e
+extern float LOG2_E;									// log_2(e)
+extern float LOG10_E;									// log_10(e)
+extern float LN_2;										// log_e(2)
+extern float LN_10;										// log_e(10)
+extern float SQRT_2;									// sqrt(2)
+extern float _1_SQRT_2;									// 1 / sqrt(2)
 
-extern const double E;									// e
-extern const double LOG2_E;								// log_2(e)
-extern const double LOG10_E;							// log_10(e)
-extern const double LN_2;								// log_e(2)
-extern const double LN_10;								// log_e(10)
-extern const double SQRT_2;								// sqrt(2)
-extern const double _1_SQRT_2;							// 1 / sqrt(2)
+extern double E;										// e
+extern double LOG2_E;									// log_2(e)
+extern double LOG10_E;									// log_10(e)
+extern double LN_2;										// log_e(2)
+extern double LN_10;									// log_e(10)
+extern double SQRT_2;									// sqrt(2)
+extern double _1_SQRT_2;								// 1 / sqrt(2)
 
-extern const long double E;								// e
-extern const long double LOG2_E;						// log_2(e)
-extern const long double LOG10_E;						// log_10(e)
-extern const long double LN_2;							// log_e(2)
-extern const long double LN_10;							// log_e(10)
-extern const long double SQRT_2;						// sqrt(2)
-extern const long double _1_SQRT_2;						// 1/sqrt(2)
+extern long double E;									// e
+extern long double LOG2_E;								// log_2(e)
+extern long double LOG10_E;								// log_10(e)
+extern long double LN_2;								// log_e(2)
+extern long double LN_10;								// log_e(10)
+extern long double SQRT_2;								// sqrt(2)
+extern long double _1_SQRT_2;							// 1/sqrt(2)
 
-extern const float _Complex E;							// e
-extern const float _Complex LOG2_E;						// log_2(e)
-extern const float _Complex LOG10_E;					// log_10(e)
-extern const float _Complex LN_2;						// log_e(2)
-extern const float _Complex LN_10;						// log_e(10)
-extern const float _Complex SQRT_2;						// sqrt(2)
-extern const float _Complex _1_SQRT_2;					// 1 / sqrt(2)
+extern float _Complex E;								// e
+extern float _Complex LOG2_E;							// log_2(e)
+extern float _Complex LOG10_E;							// log_10(e)
+extern float _Complex LN_2;								// log_e(2)
+extern float _Complex LN_10;							// log_e(10)
+extern float _Complex SQRT_2;							// sqrt(2)
+extern float _Complex _1_SQRT_2;						// 1 / sqrt(2)
 
-extern const double _Complex E;							// e
-extern const double _Complex LOG2_E;					// log_2(e)
-extern const double _Complex LOG10_E;					// log_10(e)
-extern const double _Complex LN_2;						// log_e(2)
-extern const double _Complex LN_10;						// log_e(10)
-extern const double _Complex SQRT_2;					// sqrt(2)
-extern const double _Complex _1_SQRT_2;					// 1 / sqrt(2)
+extern double _Complex E;								// e
+extern double _Complex LOG2_E;							// log_2(e)
+extern double _Complex LOG10_E;							// log_10(e)
+extern double _Complex LN_2;							// log_e(2)
+extern double _Complex LN_10;							// log_e(10)
+extern double _Complex SQRT_2;							// sqrt(2)
+extern double _Complex _1_SQRT_2;						// 1 / sqrt(2)
 
-extern const long double _Complex E;					// e
-extern const long double _Complex LOG2_E;				// log_2(e)
-extern const long double _Complex LOG10_E;				// log_10(e)
-extern const long double _Complex LN_2;					// log_e(2)
-extern const long double _Complex LN_10;				// log_e(10)
-extern const long double _Complex SQRT_2;				// sqrt(2)
-extern const long double _Complex _1_SQRT_2;			// 1 / sqrt(2)
+extern long double _Complex E;							// e
+extern long double _Complex LOG2_E;						// log_2(e)
+extern long double _Complex LOG10_E;					// log_10(e)
+extern long double _Complex LN_2;						// log_e(2)
+extern long double _Complex LN_10;						// log_e(10)
+extern long double _Complex SQRT_2;						// sqrt(2)
+extern long double _Complex _1_SQRT_2;					// 1 / sqrt(2)
 
 // Local Variables: //
Index: libcfa/src/parseargs.cfa
===================================================================
--- libcfa/src/parseargs.cfa	(revision ae2c27a754e54e8563131c0e84adfd4afe813c6f)
+++ libcfa/src/parseargs.cfa	(revision c76bd3423f7b7fdfa3c5b8b2d451657555da4ca5)
@@ -25,12 +25,18 @@
 #include "limits.hfa"
 
-extern int cfa_args_argc;
-extern char ** cfa_args_argv;
-extern char ** cfa_args_envp;
+extern int cfa_args_argc __attribute__((weak));
+extern char ** cfa_args_argv __attribute__((weak));
+extern char ** cfa_args_envp __attribute__((weak));
 
 static void usage(char * cmd, cfa_option options[], size_t opt_count, const char * usage, FILE * out)  __attribute__ ((noreturn));
 
 void parse_args( cfa_option options[], size_t opt_count, const char * usage, char ** & left ) {
-	parse_args(cfa_args_argc, cfa_args_argv, options, opt_count, usage, left );
+	if( 0p != &cfa_args_argc ) {
+		parse_args(cfa_args_argc, cfa_args_argv, options, opt_count, usage, left );
+	}
+	else {
+		char * temp = "";
+		parse_args(0, &temp, options, opt_count, usage, left );
+	}
 }
 
