Index: libcfa/src/concurrency/io.cfa
===================================================================
--- libcfa/src/concurrency/io.cfa	(revision 6ec07e5f9016c92f73237be92c20ecdd0fba5795)
+++ libcfa/src/concurrency/io.cfa	(revision af7acb9c31520a9956bf84e617377d94a46b8baa)
@@ -20,5 +20,5 @@
 
 #if !defined(HAVE_LINUX_IO_URING_H)
-	void __kernel_io_startup( cluster &, bool ) {
+	void __kernel_io_startup( cluster &, int, bool ) {
 		// Nothing to do without io_uring
 	}
@@ -86,19 +86,122 @@
 	#endif
 
-	#if defined(__CFA_IO_POLLING_USER__)
-		void ?{}( __io_poller_fast & this, struct cluster & cltr ) {
-			this.ring = &cltr.io;
-			(this.thrd){ "Fast I/O Poller", cltr };
-		}
-		void ^?{}( __io_poller_fast & mutex this );
-      	void main( __io_poller_fast & this );
-      	static inline $thread * get_thread( __io_poller_fast & this ) { return &this.thrd; }
-		void ^?{}( __io_poller_fast & mutex this ) {}
-	#endif
+	// Fast poller user-thread
+	// Not using the "thread" keyword because we want to control
+	// more carefully when to start/stop it
+	struct __io_poller_fast {
+		struct __io_data * ring;
+		bool waiting;
+		$thread thrd;
+	};
+
+	void ?{}( __io_poller_fast & this, struct cluster & cltr ) {
+		this.ring = cltr.io;
+		this.waiting = true;
+		(this.thrd){ "Fast I/O Poller", cltr };
+	}
+	void ^?{}( __io_poller_fast & mutex this );
+	void main( __io_poller_fast & this );
+	static inline $thread * get_thread( __io_poller_fast & this ) { return &this.thrd; }
+	void ^?{}( __io_poller_fast & mutex this ) {}
+
+	struct __submition_data {
+		// Head and tail of the ring (associated with array)
+		volatile uint32_t * head;
+		volatile uint32_t * tail;
+
+		// The actual kernel ring which uses head/tail
+		// indexes into the sqes arrays
+		uint32_t * array;
+
+		// number of entries and mask to go with it
+		const uint32_t * num;
+		const uint32_t * mask;
+
+		// Submission flags (Not sure what for)
+		uint32_t * flags;
+
+		// number of sqes not submitted (whatever that means)
+		uint32_t * dropped;
+
+		// Like head/tail but not seen by the kernel
+		volatile uint32_t alloc;
+		volatile uint32_t ready;
+
+		__spinlock_t lock;
+
+		// A buffer of sqes (not the actual ring)
+		struct io_uring_sqe * sqes;
+
+		// The location and size of the mmaped area
+		void * ring_ptr;
+		size_t ring_sz;
+
+		// Statistics
+		#if !defined(__CFA_NO_STATISTICS__)
+			struct {
+				struct {
+					volatile unsigned long long int val;
+					volatile unsigned long long int cnt;
+					volatile unsigned long long int block;
+				} submit_avg;
+			} stats;
+		#endif
+	};
+
+	struct __completion_data {
+		// Head and tail of the ring
+		volatile uint32_t * head;
+		volatile uint32_t * tail;
+
+		// number of entries and mask to go with it
+		const uint32_t * mask;
+		const uint32_t * num;
+
+		// number of cqes not submitted (whatever that means)
+		uint32_t * overflow;
+
+		// the kernel ring
+		struct io_uring_cqe * cqes;
+
+		// The location and size of the mmaped area
+		void * ring_ptr;
+		size_t ring_sz;
+
+		// Statistics
+		#if !defined(__CFA_NO_STATISTICS__)
+			struct {
+				struct {
+					unsigned long long int val;
+					unsigned long long int slow_cnt;
+					unsigned long long int fast_cnt;
+				} completed_avg;
+			} stats;
+		#endif
+	};
+
+	struct __io_data {
+		struct __submition_data submit_q;
+		struct __completion_data completion_q;
+		uint32_t ring_flags;
+		int cltr_flags;
+		int fd;
+		semaphore submit;
+		volatile bool done;
+		struct {
+			struct {
+				void * stack;
+				pthread_t kthrd;
+			} slow;
+			__io_poller_fast fast;
+			__bin_sem_t sem;
+		} poller;
+	};
 
 //=============================================================================================
 // I/O Startup / Shutdown logic
 //=============================================================================================
-	void __kernel_io_startup( cluster & this, bool main_cluster ) {
+	void __kernel_io_startup( cluster & this, int io_flags, bool main_cluster ) {
+		this.io = malloc();
+
 		// Step 1 : call to setup
 		struct io_uring_params params;
@@ -113,7 +216,7 @@
 
 		// Step 2 : mmap result
-		memset(&this.io, 0, sizeof(struct io_ring));
-		struct io_uring_sq & sq = this.io.submit_q;
-		struct io_uring_cq & cq = this.io.completion_q;
+		memset( this.io, 0, sizeof(struct __io_data) );
+		struct __submition_data  & sq = this.io->submit_q;
+		struct __completion_data & cq = this.io->completion_q;
 
 		// calculate the right ring size
@@ -193,15 +296,18 @@
 
 		// Update the global ring info
-		this.io.flags = params.flags;
-		this.io.fd    = fd;
-		this.io.done  = false;
-		(this.io.submit){ min(*sq.num, *cq.num) };
+		this.io->ring_flags = params.flags;
+		this.io->cltr_flags = io_flags;
+		this.io->fd         = fd;
+		this.io->done       = false;
+		(this.io->submit){ min(*sq.num, *cq.num) };
 
 		// Initialize statistics
 		#if !defined(__CFA_NO_STATISTICS__)
-			this.io.submit_q.stats.submit_avg.val = 0;
-			this.io.submit_q.stats.submit_avg.cnt = 0;
-			this.io.completion_q.stats.completed_avg.val = 0;
-			this.io.completion_q.stats.completed_avg.cnt = 0;
+			this.io->submit_q.stats.submit_avg.val   = 0;
+			this.io->submit_q.stats.submit_avg.cnt   = 0;
+			this.io->submit_q.stats.submit_avg.block = 0;
+			this.io->completion_q.stats.completed_avg.val = 0;
+			this.io->completion_q.stats.completed_avg.slow_cnt = 0;
+			this.io->completion_q.stats.completed_avg.fast_cnt = 0;
 		#endif
 
@@ -212,13 +318,13 @@
 
 	void __kernel_io_finish_start( cluster & this ) {
-		#if defined(__CFA_IO_POLLING_USER__)
+		if( this.io->cltr_flags & CFA_CLUSTER_IO_POLLER_USER_THREAD ) {
 			__cfadbg_print_safe(io_core, "Kernel I/O : Creating fast poller for cluter %p\n", &this);
-			(this.io.poller.fast){ this };
-			__thrd_start( this.io.poller.fast, main );
-		#endif
+			(this.io->poller.fast){ this };
+			__thrd_start( this.io->poller.fast, main );
+		}
 
 		// Create the poller thread
 		__cfadbg_print_safe(io_core, "Kernel I/O : Creating slow poller for cluter %p\n", &this);
-		this.io.poller.slow.stack = __create_pthread( &this.io.poller.slow.kthrd, __io_poller_slow, &this );
+		this.io->poller.slow.stack = __create_pthread( &this.io->poller.slow.kthrd, __io_poller_slow, &this );
 	}
 
@@ -226,27 +332,66 @@
 		__cfadbg_print_safe(io_core, "Kernel I/O : Stopping pollers for cluster\n", &this);
 		// Notify the poller thread of the shutdown
-		__atomic_store_n(&this.io.done, true, __ATOMIC_SEQ_CST);
+		__atomic_store_n(&this.io->done, true, __ATOMIC_SEQ_CST);
 
 		// Stop the IO Poller
 		sigval val = { 1 };
-		pthread_sigqueue( this.io.poller.slow.kthrd, SIGUSR1, val );
-		#if defined(__CFA_IO_POLLING_USER__)
-			post( this.io.poller.sem );
-		#endif
+		pthread_sigqueue( this.io->poller.slow.kthrd, SIGUSR1, val );
+		post( this.io->poller.sem );
 
 		// Wait for the poller thread to finish
-		pthread_join( this.io.poller.slow.kthrd, 0p );
-		free( this.io.poller.slow.stack );
+		pthread_join( this.io->poller.slow.kthrd, 0p );
+		free( this.io->poller.slow.stack );
 
 		__cfadbg_print_safe(io_core, "Kernel I/O : Slow poller stopped for cluster\n", &this);
 
-		#if defined(__CFA_IO_POLLING_USER__)
+		if( this.io->cltr_flags & CFA_CLUSTER_IO_POLLER_USER_THREAD ) {
+			with( this.io->poller.fast ) {
+				/* paranoid */ verify( waiting ); // The thread shouldn't be in a system call
+				/* paranoid */ verify( this.procs.head == 0p || &this == mainCluster );
+				/* paranoid */ verify( this.idles.head == 0p || &this == mainCluster );
+
+				// We need to adjust the clean-up based on where the thread is
+				if( thrd.preempted != __NO_PREEMPTION ) {
+
+					// This is the tricky case
+					// The thread was preempted and now it is on the ready queue
+					/* paranoid */ verify( thrd.state == Active );           // The thread better be in this state
+					/* paranoid */ verify( thrd.next == 1p );                // The thread should be the last on the list
+					/* paranoid */ verify( this.ready_queue.head == &thrd ); // The thread should be the only thing on the list
+
+					// Remove the thread from the ready queue of this cluster
+					this.ready_queue.head = 1p;
+					thrd.next = 0p;
+
+					// Fixup the thread state
+					thrd.state = Blocked;
+					thrd.preempted = __NO_PREEMPTION;
+
+					// Pretend like the thread was blocked all along
+				}
+				// !!! This is not an else if !!!
+				if( thrd.state == Blocked ) {
+
+					// This is the "easy case"
+					// The thread is parked and can easily be moved to active cluster
+					verify( thrd.curr_cluster != active_cluster() || thrd.curr_cluster == mainCluster );
+					thrd.curr_cluster = active_cluster();
+
 			// unpark the fast io_poller
-			unpark( &this.io.poller.fast.thrd __cfaabi_dbg_ctx2 );
-
-			^(this.io.poller.fast){};
+					unpark( &thrd __cfaabi_dbg_ctx2 );
+				}
+				else {
+
+					// The thread is in a weird state
+					// I don't know what to do here
+					abort("Fast poller thread is in unexpected state, cannot clean-up correctly\n");
+				}
+
+			}
+
+			^(this.io->poller.fast){};
 
 			__cfadbg_print_safe(io_core, "Kernel I/O : Fast poller stopped for cluster\n", &this);
-		#endif
+		}
 	}
 
@@ -259,21 +404,26 @@
 		#if !defined(__CFA_NO_STATISTICS__)
 			if(this.print_stats) {
-				__cfaabi_bits_print_safe( STDERR_FILENO,
-					"----- I/O uRing Stats -----\n"
-					"- total submit calls  : %llu\n"
-					"- avg submit          : %lf\n"
-					"- total wait calls    : %llu\n"
-					"- avg completion/wait : %lf\n",
-					this.io.submit_q.stats.submit_avg.cnt,
-					((double)this.io.submit_q.stats.submit_avg.val) / this.io.submit_q.stats.submit_avg.cnt,
-					this.io.completion_q.stats.completed_avg.cnt,
-					((double)this.io.completion_q.stats.completed_avg.val) / this.io.completion_q.stats.completed_avg.cnt
-				);
+				with(this.io->submit_q.stats, this.io->completion_q.stats) {
+					__cfaabi_bits_print_safe( STDERR_FILENO,
+						"----- I/O uRing Stats -----\n"
+						"- total submit calls  : %'15llu\n"
+						"- avg submit          : %'18.2lf\n"
+						"- pre-submit block %%  : %'18.2lf\n"
+						"- total wait calls    : %'15llu   (%'llu slow, %'llu fast)\n"
+						"- avg completion/wait : %'18.2lf\n",
+						submit_avg.cnt,
+						((double)submit_avg.val) / submit_avg.cnt,
+						(100.0 * submit_avg.block) / submit_avg.cnt,
+						completed_avg.slow_cnt + completed_avg.fast_cnt,
+						completed_avg.slow_cnt,  completed_avg.fast_cnt,
+						((double)completed_avg.val) / (completed_avg.slow_cnt + completed_avg.fast_cnt)
+					);
+				}
 			}
 		#endif
 
 		// Shutdown the io rings
-		struct io_uring_sq & sq = this.io.submit_q;
-		struct io_uring_cq & cq = this.io.completion_q;
+		struct __submition_data  & sq = this.io->submit_q;
+		struct __completion_data & cq = this.io->completion_q;
 
 		// unmap the submit queue entries
@@ -289,5 +439,7 @@
 
 		// close the file descriptor
-		close(this.io.fd);
+		close(this.io->fd);
+
+		free( this.io );
 	}
 
@@ -302,5 +454,5 @@
 	// Process a single completion message from the io_uring
 	// This is NOT thread-safe
-	static int __drain_io( struct io_ring & ring, sigset_t * mask, int waitcnt, bool in_kernel ) {
+	static int __drain_io( struct __io_data & ring, sigset_t * mask, int waitcnt, bool in_kernel ) {
 		int ret = syscall( __NR_io_uring_enter, ring.fd, 0, waitcnt, IORING_ENTER_GETEVENTS, mask, _NSIG / 8);
 		if( ret < 0 ) {
@@ -320,7 +472,4 @@
 		// Nothing was new return 0
 		if (head == tail) {
-			#if !defined(__CFA_NO_STATISTICS__)
-				ring.completion_q.stats.completed_avg.cnt += 1;
-			#endif
 			return 0;
 		}
@@ -348,10 +497,4 @@
 		__atomic_fetch_add( ring.completion_q.head, count, __ATOMIC_RELAXED );
 
-		// Update statistics
-		#if !defined(__CFA_NO_STATISTICS__)
-			ring.completion_q.stats.completed_avg.val += count;
-			ring.completion_q.stats.completed_avg.cnt += 1;
-		#endif
-
 		return count;
 	}
@@ -359,5 +502,5 @@
 	static void * __io_poller_slow( void * arg ) {
 		cluster * cltr = (cluster *)arg;
-		struct io_ring & ring = cltr->io;
+		struct __io_data & ring = *cltr->io;
 
 		sigset_t mask;
@@ -372,10 +515,18 @@
 		verify( (*ring.completion_q.head) == (*ring.completion_q.tail) );
 
-		while(!__atomic_load_n(&ring.done, __ATOMIC_SEQ_CST)) {
-			#if defined(__CFA_IO_POLLING_USER__)
-
+		__cfadbg_print_safe(io_core, "Kernel I/O : Slow poller for ring %p ready\n", &ring);
+
+		if( ring.cltr_flags & CFA_CLUSTER_IO_POLLER_USER_THREAD ) {
+			while(!__atomic_load_n(&ring.done, __ATOMIC_SEQ_CST)) {
 				// In the user-thread approach drain and if anything was drained,
 				// batton pass to the user-thread
 				int count = __drain_io( ring, &mask, 1, true );
+
+				// Update statistics
+				#if !defined(__CFA_NO_STATISTICS__)
+					ring.completion_q.stats.completed_avg.val += count;
+					ring.completion_q.stats.completed_avg.slow_cnt += 1;
+				#endif
+
 				if(count > 0) {
 					__cfadbg_print_safe(io_core, "Kernel I/O : Moving to ring %p to fast poller\n", &ring);
@@ -383,37 +534,63 @@
 					wait( ring.poller.sem );
 				}
-
-			#else
-
+			}
+		}
+		else {
+			while(!__atomic_load_n(&ring.done, __ATOMIC_SEQ_CST)) {
 				//In the naive approach, just poll the io completion queue directly
-				__drain_io( ring, &mask, 1, true );
-
+				int count = __drain_io( ring, &mask, 1, true );
+
+				// Update statistics
+				#if !defined(__CFA_NO_STATISTICS__)
+					ring.completion_q.stats.completed_avg.val += count;
+					ring.completion_q.stats.completed_avg.slow_cnt += 1;
+				#endif
+			}
+		}
+
+		__cfadbg_print_safe(io_core, "Kernel I/O : Slow poller for ring %p stopping\n", &ring);
+
+		return 0p;
+	}
+
+	void main( __io_poller_fast & this ) {
+		verify( this.ring->cltr_flags & CFA_CLUSTER_IO_POLLER_USER_THREAD );
+
+		// Start parked
+		park( __cfaabi_dbg_ctx );
+
+		__cfadbg_print_safe(io_core, "Kernel I/O : Fast poller for ring %p ready\n", &this.ring);
+
+		int reset = 0;
+
+		// Then loop until we need to start
+		while(!__atomic_load_n(&this.ring->done, __ATOMIC_SEQ_CST)) {
+			// Drain the io
+			this.waiting = false;
+			int count = __drain_io( *this.ring, 0p, 0, false );
+			reset += count > 0 ? 1 : 0;
+
+			// Update statistics
+			#if !defined(__CFA_NO_STATISTICS__)
+				this.ring->completion_q.stats.completed_avg.val += count;
+				this.ring->completion_q.stats.completed_avg.fast_cnt += 1;
 			#endif
-		}
-
-		return 0p;
-	}
-
-	#if defined(__CFA_IO_POLLING_USER__)
-		void main( __io_poller_fast & this ) {
-			// Start parked
-			park( __cfaabi_dbg_ctx );
-
-			// Then loop until we need to start
-			while(!__atomic_load_n(&this.ring->done, __ATOMIC_SEQ_CST)) {
-				// Drain the io
-				if(0 > __drain_io( *this.ring, 0p, 0, false )) {
-					// If we got something, just yield and check again
-					yield();
-				}
-				else {
-					// We didn't get anything baton pass to the slow poller
-					__cfadbg_print_safe(io_core, "Kernel I/O : Moving to ring %p to slow poller\n", &this.ring);
-					post( this.ring->poller.sem );
-					park( __cfaabi_dbg_ctx );
-				}
+
+			this.waiting = true;
+			if(reset < 5) {
+				// If we got something, just yield and check again
+				yield();
 			}
-		}
-	#endif
+			else {
+				// We didn't get anything baton pass to the slow poller
+				__cfadbg_print_safe(io_core, "Kernel I/O : Moving to ring %p to slow poller\n", &this.ring);
+				post( this.ring->poller.sem );
+				park( __cfaabi_dbg_ctx );
+				reset = 0;
+			}
+		}
+
+		__cfadbg_print_safe(io_core, "Kernel I/O : Fast poller for ring %p stopping\n", &this.ring);
+	}
 
 //=============================================================================================
@@ -445,7 +622,10 @@
 //
 
-	static inline [* struct io_uring_sqe, uint32_t] __submit_alloc( struct io_ring & ring ) {
+	static inline [* struct io_uring_sqe, uint32_t] __submit_alloc( struct __io_data & ring ) {
 		// Wait for a spot to be available
-		P(ring.submit);
+		__attribute__((unused)) bool blocked = P(ring.submit);
+		#if !defined(__CFA_NO_STATISTICS__)
+			__atomic_fetch_add( &ring.submit_q.stats.submit_avg.block, blocked ? 1ul64 : 0ul64, __ATOMIC_RELAXED );
+		#endif
 
 		// Allocate the sqe
@@ -463,5 +643,5 @@
 	}
 
-	static inline void __submit( struct io_ring & ring, uint32_t idx ) {
+	static inline void __submit( struct __io_data & ring, uint32_t idx ) {
 		// get mutual exclusion
 		lock(ring.submit_q.lock __cfaabi_dbg_ctx2);
@@ -524,5 +704,5 @@
 
 	#define __submit_prelude \
-		struct io_ring & ring = active_cluster()->io; \
+		struct __io_data & ring = *active_cluster()->io; \
 		struct io_uring_sqe * sqe; \
 		uint32_t idx; \
Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision 6ec07e5f9016c92f73237be92c20ecdd0fba5795)
+++ libcfa/src/concurrency/kernel.cfa	(revision af7acb9c31520a9956bf84e617377d94a46b8baa)
@@ -256,5 +256,5 @@
 }
 
-void ?{}(cluster & this, const char name[], Duration preemption_rate) with( this ) {
+void ?{}(cluster & this, const char name[], Duration preemption_rate, int io_flags) with( this ) {
 	this.name = name;
 	this.preemption_rate = preemption_rate;
@@ -270,5 +270,5 @@
 	threads{ __get };
 
-	__kernel_io_startup( this, &this == mainCluster );
+	__kernel_io_startup( this, io_flags, &this == mainCluster );
 
 	doregister(this);
@@ -993,5 +993,5 @@
 void ^?{}(semaphore & this) {}
 
-void P(semaphore & this) with( this ){
+bool P(semaphore & this) with( this ){
 	lock( lock __cfaabi_dbg_ctx2 );
 	count -= 1;
@@ -1003,7 +1003,9 @@
 		unlock( lock );
 		park( __cfaabi_dbg_ctx );
+		return true;
 	}
 	else {
 	    unlock( lock );
+	    return false;
 	}
 }
Index: libcfa/src/concurrency/kernel.hfa
===================================================================
--- libcfa/src/concurrency/kernel.hfa	(revision 6ec07e5f9016c92f73237be92c20ecdd0fba5795)
+++ libcfa/src/concurrency/kernel.hfa	(revision af7acb9c31520a9956bf84e617377d94a46b8baa)
@@ -38,5 +38,5 @@
 void  ?{}(semaphore & this, int count = 1);
 void ^?{}(semaphore & this);
-void   P (semaphore & this);
+bool   P (semaphore & this);
 bool   V (semaphore & this);
 bool   V (semaphore & this, unsigned count);
@@ -114,104 +114,8 @@
 //-----------------------------------------------------------------------------
 // I/O
-#if defined(HAVE_LINUX_IO_URING_H)
-struct io_uring_sq {
-	// Head and tail of the ring (associated with array)
-	volatile uint32_t * head;
-	volatile uint32_t * tail;
+struct __io_data;
 
-	// The actual kernel ring which uses head/tail
-	// indexes into the sqes arrays
-	uint32_t * array;
-
-	// number of entries and mask to go with it
-	const uint32_t * num;
-	const uint32_t * mask;
-
-	// Submission flags (Not sure what for)
-	uint32_t * flags;
-
-	// number of sqes not submitted (whatever that means)
-	uint32_t * dropped;
-
-	// Like head/tail but not seen by the kernel
-	volatile uint32_t alloc;
-	volatile uint32_t ready;
-
-	__spinlock_t lock;
-
-	// A buffer of sqes (not the actual ring)
-	struct io_uring_sqe * sqes;
-
-	// The location and size of the mmaped area
-	void * ring_ptr;
-	size_t ring_sz;
-
-	// Statistics
-	#if !defined(__CFA_NO_STATISTICS__)
-		struct {
-			struct {
-				unsigned long long int val;
-				unsigned long long int cnt;
-			} submit_avg;
-		} stats;
-	#endif
-};
-
-struct io_uring_cq {
-	// Head and tail of the ring
-	volatile uint32_t * head;
-	volatile uint32_t * tail;
-
-	// number of entries and mask to go with it
-	const uint32_t * mask;
-	const uint32_t * num;
-
-	// number of cqes not submitted (whatever that means)
-	uint32_t * overflow;
-
-	// the kernel ring
-	struct io_uring_cqe * cqes;
-
-	// The location and size of the mmaped area
-	void * ring_ptr;
-	size_t ring_sz;
-
-	// Statistics
-	#if !defined(__CFA_NO_STATISTICS__)
-		struct {
-			struct {
-				unsigned long long int val;
-				unsigned long long int cnt;
-			} completed_avg;
-		} stats;
-	#endif
-};
-
-#if defined(__CFA_IO_POLLING_USER__)
-	struct __io_poller_fast {
-		struct io_ring * ring;
-		$thread thrd;
-	};
-#endif
-
-struct io_ring {
-	struct io_uring_sq submit_q;
-	struct io_uring_cq completion_q;
-	uint32_t flags;
-	int fd;
-	semaphore submit;
-	volatile bool done;
-	struct {
-		struct {
-			void * stack;
-			pthread_t kthrd;
-		} slow;
-		#if defined(__CFA_IO_POLLING_USER__)
-			__io_poller_fast fast;
-			__bin_sem_t sem;
-		#endif
-	} poller;
-};
-#endif
+#define CFA_CLUSTER_IO_POLLER_USER_THREAD 1 << 0
+// #define CFA_CLUSTER_IO_POLLER_KERNEL_SIDE 1 << 1
 
 //-----------------------------------------------------------------------------
@@ -247,7 +151,5 @@
 	} node;
 
-	#if defined(HAVE_LINUX_IO_URING_H)
-		struct io_ring io;
-	#endif
+	struct __io_data * io;
 
 	#if !defined(__CFA_NO_STATISTICS__)
@@ -257,10 +159,13 @@
 extern Duration default_preemption();
 
-void ?{} (cluster & this, const char name[], Duration preemption_rate);
+void ?{} (cluster & this, const char name[], Duration preemption_rate, int flags);
 void ^?{}(cluster & this);
 
-static inline void ?{} (cluster & this)                           { this{"Anonymous Cluster", default_preemption()}; }
-static inline void ?{} (cluster & this, Duration preemption_rate) { this{"Anonymous Cluster", preemption_rate}; }
-static inline void ?{} (cluster & this, const char name[])        { this{name, default_preemption()}; }
+static inline void ?{} (cluster & this)                                      { this{"Anonymous Cluster", default_preemption(), 0}; }
+static inline void ?{} (cluster & this, Duration preemption_rate)            { this{"Anonymous Cluster", preemption_rate, 0}; }
+static inline void ?{} (cluster & this, const char name[])                   { this{name, default_preemption(), 0}; }
+static inline void ?{} (cluster & this, int flags)                           { this{"Anonymous Cluster", default_preemption(), flags}; }
+static inline void ?{} (cluster & this, Duration preemption_rate, int flags) { this{"Anonymous Cluster", preemption_rate, flags}; }
+static inline void ?{} (cluster & this, const char name[], int flags)        { this{name, default_preemption(), flags}; }
 
 static inline [cluster *&, cluster *& ] __get( cluster & this ) __attribute__((const)) { return this.node.[next, prev]; }
Index: libcfa/src/concurrency/kernel_private.hfa
===================================================================
--- libcfa/src/concurrency/kernel_private.hfa	(revision 6ec07e5f9016c92f73237be92c20ecdd0fba5795)
+++ libcfa/src/concurrency/kernel_private.hfa	(revision af7acb9c31520a9956bf84e617377d94a46b8baa)
@@ -59,4 +59,6 @@
 extern volatile thread_local __cfa_kernel_preemption_state_t preemption_state __attribute__ ((tls_model ( "initial-exec" )));
 
+extern cluster * mainCluster;
+
 //-----------------------------------------------------------------------------
 // Threads
@@ -75,5 +77,5 @@
 //-----------------------------------------------------------------------------
 // I/O
-void __kernel_io_startup     ( cluster &, bool );
+void __kernel_io_startup     ( cluster &, int, bool );
 void __kernel_io_finish_start( cluster & );
 void __kernel_io_prepare_stop( cluster & );
Index: libcfa/src/exception.hfa
===================================================================
--- libcfa/src/exception.hfa	(revision af7acb9c31520a9956bf84e617377d94a46b8baa)
+++ libcfa/src/exception.hfa	(revision af7acb9c31520a9956bf84e617377d94a46b8baa)
@@ -0,0 +1,106 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// exception.hfa -- User facing tools for working with exceptions.
+//
+// Author           : Andrew Beach
+// Created On       : Thu Apr  7 10:25:00 2020
+// Last Modified By : Andrew Beach
+// Last Modified On : Thu Apr  7 10:25:00 2020
+// Update Count     : 0
+//
+
+// Everything below this line should be considered a patch while the exception
+// objects themselves are designed and  created and should be removed in time.
+// -----------------------------------------------------------------------------------------------
+
+// All internals helper macros begin with an underscore.
+#define _CLOSE(...) __VA_ARGS__ }
+#define _GLUE2(left, right) left##right
+#define _GLUE3(left, middle, right) left##middle##right
+#define _EXC_DISPATCH(to, ...) to(__VA_ARGS__,__cfaehm_base_exception_t,)
+
+// FWD_TRIVIAL_EXCEPTION(exception_name);
+// Declare a trivial exception, one that adds no fields or features.
+// This will make the exception visible and may go in a .hfa or .cfa file.
+#define FWD_TRIVIAL_EXCEPTION(...) _EXC_DISPATCH(_FWD_TRIVIAL_EXCEPTION, __VA_ARGS__)
+// INST_TRIVIAL_EXCEPTION(exception_name);
+// Create the trival exception. This must be used exactly once and should be used in a .cfa file,
+// as it creates the unique instance of the virtual table.
+#define INST_TRIVIAL_EXCEPTION(...) _EXC_DISPATCH(_INST_TRIVIAL_EXCEPTION, __VA_ARGS__)
+// TRIVIAL_EXCEPTION(exception_name[, parent_name]);
+// Does both of the above, a short hand if the exception is only used in one .cfa file.
+// For legacy reasons this is the only one that official supports having a parent other than the
+// base exception. This feature may be removed or changed.
+#define TRIVIAL_EXCEPTION(...) \
+	_EXC_DISPATCH(_FWD_TRIVIAL_EXCEPTION, __VA_ARGS__); \
+	_EXC_DISPATCH(_INST_TRIVIAL_EXCEPTION, __VA_ARGS__)
+#define _FWD_TRIVIAL_EXCEPTION(exception_name, parent_name, ...) \
+	_VTABLE_DECLARATION(exception_name, parent_name)(); \
+	struct exception_name { \
+		VTABLE_FIELD(exception_name); \
+	}; \
+	void ?{}(exception_name & this); \
+	const char * _GLUE2(exception_name,_msg)(exception_name * this)
+#define _INST_TRIVIAL_EXCEPTION(exception_name, parent_name, ...) \
+	void ?{}(exception_name & this) { \
+		VTABLE_INIT(this, exception_name); \
+	} \
+	const char * _GLUE2(exception_name,_msg)(exception_name * this) { \
+		return #exception_name; \
+	} \
+	_VTABLE_INSTANCE(exception_name, parent_name,)(_GLUE2(exception_name,_msg))
+
+// DATA_EXCEPTION(exception_name)(fields...);
+// Forward declare an exception that adds fields but no features. The added fields go in the
+// second argument list. The virtual table instance must be provided later (see VTABLE_INSTANCE).
+#define DATA_EXCEPTION(...) _EXC_DISPATCH(_DATA_EXCEPTION, __VA_ARGS__)
+#define _DATA_EXCEPTION(exception_name, parent_name, ...) \
+	_VTABLE_DECLARATION(exception_name, parent_name)(); \
+	struct exception_name { VTABLE_FIELD(exception_name); _CLOSE
+
+// VTABLE_DECLARATION(exception_name)([new_features...]);
+// Declare a virtual table type for an exception with exception_name. You may also add features
+// (fields on the virtual table) by including them in the second list.
+#define VTABLE_DECLARATION(...) _EXC_DISPATCH(_VTABLE_DECLARATION, __VA_ARGS__)
+#define _VTABLE_DECLARATION(exception_name, parent_name, ...) \
+	struct exception_name; \
+	VTABLE_TYPE(exception_name); \
+	extern VTABLE_TYPE(exception_name) VTABLE_NAME(exception_name); \
+	VTABLE_TYPE(exception_name) { \
+		VTABLE_TYPE(parent_name) const * parent; \
+		size_t size; \
+		void (*copy)(exception_name * this, exception_name * other); \
+		void (*free)(exception_name & this); \
+		const char * (*msg)(exception_name * this); \
+		_CLOSE
+
+// VTABLE_INSTANCE(exception_name)(msg [, others...]);
+// Create the instance of the virtual table. There must be exactly one instance of a virtual table
+// for each exception type. This fills in most of the fields of the virtual table (uses ?=? and
+// ^?{}) but you must provide the message function and any other fields added in the declaration.
+#define VTABLE_INSTANCE(...) _EXC_DISPATCH(_VTABLE_INSTANCE, __VA_ARGS__)
+#define _VTABLE_INSTANCE(exception_name, parent_name, ...) \
+	void _GLUE2(exception_name,_copy)(exception_name * this, exception_name * other) { \
+		*this = *other; \
+	} \
+	VTABLE_TYPE(exception_name) VTABLE_NAME(exception_name) @= { \
+		&VTABLE_NAME(parent_name), sizeof(exception_name), \
+		_GLUE2(exception_name,_copy), ^?{}, \
+		_CLOSE
+
+// VTABLE_TYPE(exception_name) | VTABLE_NAME(exception_name)
+// Get the name of the vtable type or the name of the vtable instance for an exception type.
+#define VTABLE_TYPE(exception_name) struct _GLUE2(exception_name,_vtable)
+#define VTABLE_NAME(exception_name) _GLUE3(_,exception_name,_vtable_instance)
+
+// VTABLE_FIELD(exception_name);
+// The declaration of the virtual table field. Should be the first declaration in a virtual type.
+#define VTABLE_FIELD(exception_name) VTABLE_TYPE(exception_name) const * virtual_table
+
+// VTABLE_INIT(object_reference, exception_name);
+// Sets a virtual table field on an object to the virtual table instance for the type.
+#define VTABLE_INIT(this, exception_name) (this).virtual_table = &VTABLE_NAME(exception_name)
Index: libcfa/src/heap.cfa
===================================================================
--- libcfa/src/heap.cfa	(revision 6ec07e5f9016c92f73237be92c20ecdd0fba5795)
+++ libcfa/src/heap.cfa	(revision af7acb9c31520a9956bf84e617377d94a46b8baa)
@@ -10,6 +10,6 @@
 // Created On       : Tue Dec 19 21:58:35 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sat Apr 18 17:43:15 2020
-// Update Count     : 718
+// Last Modified On : Wed May  6 17:29:26 2020
+// Update Count     : 727
 //
 
@@ -19,9 +19,9 @@
 #include <errno.h>										// errno
 #include <string.h>										// memset, memcpy
+#include <limits.h>										// ULONG_MAX
 extern "C" {
 #include <sys/mman.h>									// mmap, munmap
 } // extern "C"
 
-// #comment TD : Many of these should be merged into math I believe
 #include "bits/align.hfa"								// libPow2
 #include "bits/defs.hfa"								// likely, unlikely
@@ -30,4 +30,5 @@
 //#include "stdlib.hfa"									// bsearchl
 #include "malloc.h"
+#include "bitmanip.hfa"									// ceiling
 
 #define MIN(x, y) (y > x ? x : y)
@@ -81,11 +82,11 @@
 };
 
+size_t default_heap_expansion() __attribute__(( weak )) {
+	return __CFA_DEFAULT_HEAP_EXPANSION__;
+} // default_heap_expansion
+
 size_t default_mmap_start() __attribute__(( weak )) {
 	return __CFA_DEFAULT_MMAP_START__;
 } // default_mmap_start
-
-size_t default_heap_expansion() __attribute__(( weak )) {
-	return __CFA_DEFAULT_HEAP_EXPANSION__;
-} // default_heap_expansion
 
 
@@ -360,11 +361,4 @@
 
 
-static inline bool setHeapExpand( size_t value ) {
-  if ( heapExpand < pageSize ) return true;
-	heapExpand = value;
-	return false;
-} // setHeapExpand
-
-
 // thunk problem
 size_t Bsearchl( unsigned int key, const unsigned int * vals, size_t dim ) {
@@ -383,5 +377,5 @@
 
 static inline bool setMmapStart( size_t value ) {		// true => mmapped, false => sbrk
-  if ( value < pageSize || bucketSizes[NoBucketSizes - 1] < value ) return true;
+  if ( value < pageSize || bucketSizes[NoBucketSizes - 1] < value ) return false;
 	mmapStart = value;									// set global
 
@@ -390,5 +384,5 @@
 	assert( maxBucketsUsed < NoBucketSizes );			// subscript failure ?
 	assert( mmapStart <= bucketSizes[maxBucketsUsed] ); // search failure ?
-	return false;
+	return true;
 } // setMmapStart
 
@@ -449,5 +443,5 @@
 
 	#ifdef __CFA_DEBUG__
-	checkHeader( addr < heapBegin || header < (HeapManager.Storage.Header *)heapBegin, name, addr ); // bad low address ?
+	checkHeader( addr < heapBegin, name, addr );		// bad low address ?
 	#endif // __CFA_DEBUG__
 
@@ -508,5 +502,5 @@
 	// along with the block and is a multiple of the alignment size.
 
-  if ( unlikely( size > ~0ul - sizeof(HeapManager.Storage) ) ) return 0p;
+  if ( unlikely( size > ULONG_MAX - sizeof(HeapManager.Storage) ) ) return 0p;
 	size_t tsize = size + sizeof(HeapManager.Storage);
 	if ( likely( tsize < mmapStart ) ) {				// small size => sbrk
@@ -560,5 +554,5 @@
 		block->header.kind.real.home = freeElem;		// pointer back to free list of apropriate size
 	} else {											// large size => mmap
-  if ( unlikely( size > ~0ul - pageSize ) ) return 0p;
+  if ( unlikely( size > ULONG_MAX - pageSize ) ) return 0p;
 		tsize = libCeiling( tsize, pageSize );			// must be multiple of page size
 		#ifdef __STATISTICS__
@@ -702,5 +696,5 @@
 	#endif // FASTLOOKUP
 
-	if ( setMmapStart( default_mmap_start() ) ) {
+	if ( ! setMmapStart( default_mmap_start() ) ) {
 		abort( "HeapManager : internal error, mmap start initialization failure." );
 	} // if
@@ -708,6 +702,5 @@
 
 	char * end = (char *)sbrk( 0 );
-	sbrk( (char *)libCeiling( (long unsigned int)end, libAlign() ) - end ); // move start of heap to multiple of alignment
-	heapBegin = heapEnd = sbrk( 0 );					// get new start point
+	heapBegin = heapEnd = sbrk( (char *)libCeiling( (long unsigned int)end, libAlign() ) - end ); // move start of heap to multiple of alignment
 } // HeapManager
 
@@ -735,5 +728,5 @@
 	//assert( heapManager.heapBegin != 0 );
 	//heapManager{};
-	if ( heapManager.heapBegin == 0p ) heapManager{};
+	if ( heapManager.heapBegin == 0p ) heapManager{};	// sanity check
 } // memory_startup
 
@@ -863,15 +856,5 @@
 		#endif // __STATISTICS__
 
-		size_t size = dim * elemSize;
-		char * addr = (char *)mallocNoStats( size );
-	  if ( unlikely( addr == 0p ) ) return 0p;
-
-		HeapManager.Storage.Header * header;
-		HeapManager.FreeHeader * freeElem;
-		size_t bsize, alignment;
-		headers( "aalloc", addr, header, freeElem, bsize, alignment );
-
-		header->kind.real.blockSize |= 2;				// mark as zero filled
-		return addr;
+		return mallocNoStats( dim * elemSize );
 	} // aalloc
 
@@ -914,6 +897,6 @@
 	
 		// change size, DO NOT preserve STICKY PROPERTIES.
+		free( oaddr );
 		void * naddr = mallocNoStats( size );			// create new area
-		free( oaddr );
 		return naddr;
 	} // resize
@@ -988,14 +971,5 @@
 		#endif // __STATISTICS__
 
-		size_t size = dim * elemSize;
-		char * addr = (char *)memalignNoStats( alignment, size );
-	  if ( unlikely( addr == 0p ) ) return 0p;
-		HeapManager.Storage.Header * header;
-		HeapManager.FreeHeader * freeElem;
-		size_t bsize;
-		headers( "amemalign", addr, header, freeElem, bsize, alignment );
-
-		header->kind.real.blockSize |= 2;				// mark as zero filled
-		return addr;
+		return memalignNoStats( alignment, dim * elemSize );
 	} // amemalign
 
@@ -1043,5 +1017,5 @@
 
 	// Frees the memory space pointed to by ptr, which must have been returned by a previous call to malloc(), calloc()
-	// or realloc().  Otherwise, or if free(ptr) has already been called before, undefined behavior occurs. If ptr is
+	// or realloc().  Otherwise, or if free(ptr) has already been called before, undefined behaviour occurs. If ptr is
 	// 0p, no operation is performed.
 	void free( void * addr ) {
@@ -1170,12 +1144,13 @@
 
 
-	// Adjusts parameters that control the behavior of the memory-allocation functions (see malloc). The param argument
+	// Adjusts parameters that control the behaviour of the memory-allocation functions (see malloc). The param argument
 	// specifies the parameter to be modified, and value specifies the new value for that parameter.
 	int mallopt( int option, int value ) {
 		choose( option ) {
 		  case M_TOP_PAD:
-			if ( setHeapExpand( value ) ) return 1;
+			heapExpand = ceiling( value, pageSize ); return 1;
 		  case M_MMAP_THRESHOLD:
 			if ( setMmapStart( value ) ) return 1;
+			break;
 		} // switch
 		return 0;										// error, unsupported
Index: libcfa/src/iostream.cfa
===================================================================
--- libcfa/src/iostream.cfa	(revision 6ec07e5f9016c92f73237be92c20ecdd0fba5795)
+++ libcfa/src/iostream.cfa	(revision af7acb9c31520a9956bf84e617377d94a46b8baa)
@@ -10,6 +10,6 @@
 // Created On       : Wed May 27 17:56:53 2015
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Thu Apr 30 10:50:31 2020
-// Update Count     : 1001
+// Last Modified On : Sat May  2 18:30:25 2020
+// Update Count     : 1017
 //
 
@@ -583,27 +583,20 @@
 					if ( fmt.flags.pc && fmt.pc > 64 ) fmt.pc -= 64; else { fmt.flags.pc = false; fmt.pc = 0; } \
 					if ( fmt.flags.left ) { \
-						fmt2.wd = fmt.wd; \
-						if ( fmt2.wd <= 64 ) { \
-							fmt2.wd = 64; \
-						} else { \
-							if ( fmt.pc > 0 ) { \
-								fmt2.wd -= fmt.pc; \
-							} else { \
-								fmt2.wd = fmt.wd - high1( msig ); \
-							} /* if */ \
-							if ( ! fmt.flags.nobsdp ) fmt2.wd -= 2; \
-							if ( fmt2.wd < 0 ) fmt2.wd = 0; \
-							fmt2.flags.left = true; \
-							fmt.wd = 0; \
-						} /* if */ \
-						printf( "left %d %d %x\n", f.wd, f.pc, f.all ); \
-						printf( "left %d %d %x\n", fmt.wd, fmt.pc, fmt.all ); \
+						fmt.flags.left = false; \
+						fmt.wd = 0; \
+						/* printf( "L %llo %llo %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all ); */ \
+						fmt2.flags.left = true;	\
+						int msigd = high1( msig ); \
+						fmt2.wd = f.wd - (fmt.pc > msigd ? fmt.pc : msigd); \
+						if ( ! fmt.flags.nobsdp ) fmt2.wd -= 2; /* compensate for 0b base specifier */ \
+						if ( (int)fmt2.wd < 64 ) fmt2.wd = 64; /* cast deals with negative value */ \
+						fmt2.flags.pc = true; fmt2.pc = 64; \
 					} else { \
 						if ( fmt.wd > 64 ) fmt.wd -= 64; \
-						/* if ( ! fmt.flags.nobsdp && fmt.pc < fmt.wd ) fmt.wd -= 1; */ \
+						else fmt.wd = 1; \
+						/* printf( "R %llo %llo %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all ); */ \
 						fmt2.wd = 64; \
-						/* printf( "R %llo %llo %llo %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt2.val, fmt.wd, fmt.pc, fmt.base, fmt.all ); */ \
 					} /* if */ \
-					/* printf( "\nC %llo %d %d '%c' %x\n", fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \
+					/* printf( "C %llo %d %d '%c' %x\n", fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \
 					(ostype &)(os | fmt | "" | fmt2); \
 				} else if ( f.base == 'o' ) { \
@@ -614,18 +607,16 @@
 						fmt.flags.left = false; \
 						fmt.wd = 0; \
-						/* if ( fmt2.val > 1 && fmt.flags.pc && fmt.pc > 0 ) fmt.pc -= 1; */ \
 						/* printf( "L %llo %llo %llo %d %d '%c' %x %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all, fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \
 						(ostype &)(os | fmt | "" | fmt2); \
 						sepOff( os ); \
 						fmt2.flags.left = true;	\
-						int msigd = ceiling( high1( fmt.val ), 3 ) + 1; \
-						fmt2.wd = f.wd - (fmt.pc > msigd ? fmt.pc : msigd) - 1; \
+						int msigd = ceiling( high1( fmt.val ), 3 ); \
+						fmt2.wd = f.wd - (fmt.pc > msigd ? fmt.pc : msigd); \
+						if ( ! fmt.flags.nobsdp ) fmt2.wd -= 1; /* compensate for 0 base specifier */ \
 						if ( (int)fmt2.wd < 21 ) fmt2.wd = 21; /* cast deals with negative value */ \
 						fmt2.flags.pc = true; fmt2.pc = 21; \
 					} else { \
 						if ( fmt.wd > 22 ) fmt.wd -= 22; \
-						/* compensate for leading 0 */ \
-						/*if ( ! fmt.flags.nobsdp && fmt.pc < fmt.wd ) fmt.wd -= 1;*/ \
-						fmt2.wd = 1; \
+						else fmt.wd = 1; \
 						/* printf( "R %llo %llo %llo %d %d '%c' %x %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all, fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \
 						(ostype &)(os | fmt | "" | fmt2); \
@@ -636,26 +627,23 @@
 					/* printf( "\nC %llo %d %d '%c' %x\n", fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \
 					(ostype &)(os | fmt2); \
-				} else { \
-					if ( fmt.flags.pc && fmt.pc > 16 ) fmt.pc -= 16; \
+				} else { /* f.base == 'x'  | f.base == 'X' */ \
+					if ( fmt.flags.pc && fmt.pc > 16 ) fmt.pc -= 16; else { fmt.flags.pc = false; fmt.pc = 0; } \
 					if ( fmt.flags.left ) { \
-						fmt2.wd = fmt.wd; \
-						if ( fmt2.wd <= 16 ) { \
-							fmt2.wd = 16; \
-						} else { \
-							if ( fmt.pc > 0 ) { \
-								fmt2.wd -= fmt.pc; \
-							} else { \
-								fmt2.wd = fmt.wd - ceiling2( high1( msig ), 4 ) / 4; \
-							} /* if */ \
-							if ( ! fmt.flags.nobsdp ) fmt2.wd -= 2; \
-							if ( fmt2.wd < 0 ) fmt2.wd = 0; \
-							fmt2.flags.left = true; \
-							fmt.wd = 0; \
-						} /* if */ \
+						fmt.flags.left = false; \
+						fmt.wd = 0; \
+						/* printf( "L %llo %llo %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all ); */ \
+						fmt2.flags.left = true;	\
+						int msigd = high1( msig ); \
+						fmt2.wd = f.wd - (fmt.pc > msigd ? fmt.pc : msigd); \
+						if ( ! fmt.flags.nobsdp ) fmt2.wd -= 2; /* compensate for 0x base specifier */ \
+						if ( (int)fmt2.wd < 16 ) fmt2.wd = 16; /* cast deals with negative value */ \
+						fmt2.flags.pc = true; fmt2.pc = 16; \
 					} else { \
+						if ( fmt.wd > 16 ) fmt.wd -= 16; \
+						else fmt.wd = 1; \
+						/* printf( "R %llo %llo %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all ); */ \
 						fmt2.wd = 16; \
-						if ( fmt.wd > 16 ) fmt.wd -= 16; \
 					} /* if */ \
-					fmt2.pc = 16; fmt2.flags.pc = true; \
+					/* printf( "C %llo %d %d '%c' %x\n", fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \
 					(ostype &)(os | fmt | "" | fmt2); \
 				} /* if */ \
Index: libcfa/src/startup.cfa
===================================================================
--- libcfa/src/startup.cfa	(revision 6ec07e5f9016c92f73237be92c20ecdd0fba5795)
+++ libcfa/src/startup.cfa	(revision af7acb9c31520a9956bf84e617377d94a46b8baa)
@@ -14,5 +14,6 @@
 //
 
-#include <time.h>										// tzset
+#include <time.h>	         // tzset
+#include <locale.h>        // setlocale
 #include "startup.hfa"
 
@@ -21,4 +22,5 @@
     void __cfaabi_appready_startup( void ) {
 		tzset();										// initialize time global variables
+		setlocale(LC_NUMERIC, "");
 		#ifdef __CFA_DEBUG__
 		extern void heapAppStart();
