Index: libcfa/src/concurrency/io.cfa
===================================================================
--- libcfa/src/concurrency/io.cfa	(revision 3c039b0fd26ae5a91be2186806d965e4c3bb3975)
+++ libcfa/src/concurrency/io.cfa	(revision 61dd73dbc9ed2199345b4a1fa560ffa30aca0634)
@@ -86,15 +86,113 @@
 	#endif
 
-	#if defined(__CFA_IO_POLLING_USER__)
-		void ?{}( __io_poller_fast & this, struct cluster & cltr ) {
-			this.ring = &cltr.io;
-			this.waiting = true;
-			(this.thrd){ "Fast I/O Poller", cltr };
-		}
-		void ^?{}( __io_poller_fast & mutex this );
-      	void main( __io_poller_fast & this );
-      	static inline $thread * get_thread( __io_poller_fast & this ) { return &this.thrd; }
-		void ^?{}( __io_poller_fast & mutex this ) {}
-	#endif
+	// Fast poller user-thread
+	// Not using the "thread" keyword because we want to control
+	// more carefully when to start/stop it
+	struct __io_poller_fast {
+		struct __io_data * ring;
+		bool waiting;
+		$thread thrd;
+	};
+
+	void ?{}( __io_poller_fast & this, struct cluster & cltr ) {
+		this.ring = cltr.io;
+		this.waiting = true;
+		(this.thrd){ "Fast I/O Poller", cltr };
+	}
+	void ^?{}( __io_poller_fast & mutex this );
+	void main( __io_poller_fast & this );
+	static inline $thread * get_thread( __io_poller_fast & this ) { return &this.thrd; }
+	void ^?{}( __io_poller_fast & mutex this ) {}
+
+	struct __submition_data {
+		// Head and tail of the ring (associated with array)
+		volatile uint32_t * head;
+		volatile uint32_t * tail;
+
+		// The actual kernel ring which uses head/tail
+		// indexes into the sqes arrays
+		uint32_t * array;
+
+		// number of entries and mask to go with it
+		const uint32_t * num;
+		const uint32_t * mask;
+
+		// Submission flags (Not sure what for)
+		uint32_t * flags;
+
+		// number of sqes not submitted (whatever that means)
+		uint32_t * dropped;
+
+		// Like head/tail but not seen by the kernel
+		volatile uint32_t alloc;
+		volatile uint32_t ready;
+
+		__spinlock_t lock;
+
+		// A buffer of sqes (not the actual ring)
+		struct io_uring_sqe * sqes;
+
+		// The location and size of the mmaped area
+		void * ring_ptr;
+		size_t ring_sz;
+
+		// Statistics
+		#if !defined(__CFA_NO_STATISTICS__)
+			struct {
+				struct {
+					unsigned long long int val;
+					unsigned long long int cnt;
+				} submit_avg;
+			} stats;
+		#endif
+	};
+
+	struct __completion_data {
+		// Head and tail of the ring
+		volatile uint32_t * head;
+		volatile uint32_t * tail;
+
+		// number of entries and mask to go with it
+		const uint32_t * mask;
+		const uint32_t * num;
+
+		// number of cqes not submitted (whatever that means)
+		uint32_t * overflow;
+
+		// the kernel ring
+		struct io_uring_cqe * cqes;
+
+		// The location and size of the mmaped area
+		void * ring_ptr;
+		size_t ring_sz;
+
+		// Statistics
+		#if !defined(__CFA_NO_STATISTICS__)
+			struct {
+				struct {
+					unsigned long long int val;
+					unsigned long long int slow_cnt;
+					unsigned long long int fast_cnt;
+				} completed_avg;
+			} stats;
+		#endif
+	};
+
+	struct __io_data {
+		struct __submition_data submit_q;
+		struct __completion_data completion_q;
+		uint32_t flags;
+		int fd;
+		semaphore submit;
+		volatile bool done;
+		struct {
+			struct {
+				void * stack;
+				pthread_t kthrd;
+			} slow;
+			__io_poller_fast fast;
+			__bin_sem_t sem;
+		} poller;
+	};
 
 //=============================================================================================
@@ -102,4 +200,6 @@
 //=============================================================================================
 	void __kernel_io_startup( cluster & this, bool main_cluster ) {
+		this.io = malloc();
+
 		// Step 1 : call to setup
 		struct io_uring_params params;
@@ -114,7 +214,7 @@
 
 		// Step 2 : mmap result
-		memset(&this.io, 0, sizeof(struct io_ring));
-		struct io_uring_sq & sq = this.io.submit_q;
-		struct io_uring_cq & cq = this.io.completion_q;
+		memset( this.io, 0, sizeof(struct __io_data) );
+		struct __submition_data  & sq = this.io->submit_q;
+		struct __completion_data & cq = this.io->completion_q;
 
 		// calculate the right ring size
@@ -194,16 +294,16 @@
 
 		// Update the global ring info
-		this.io.flags = params.flags;
-		this.io.fd    = fd;
-		this.io.done  = false;
-		(this.io.submit){ min(*sq.num, *cq.num) };
+		this.io->flags = params.flags;
+		this.io->fd    = fd;
+		this.io->done  = false;
+		(this.io->submit){ min(*sq.num, *cq.num) };
 
 		// Initialize statistics
 		#if !defined(__CFA_NO_STATISTICS__)
-			this.io.submit_q.stats.submit_avg.val = 0;
-			this.io.submit_q.stats.submit_avg.cnt = 0;
-			this.io.completion_q.stats.completed_avg.val = 0;
-			this.io.completion_q.stats.completed_avg.slow_cnt = 0;
-			this.io.completion_q.stats.completed_avg.fast_cnt = 0;
+			this.io->submit_q.stats.submit_avg.val = 0;
+			this.io->submit_q.stats.submit_avg.cnt = 0;
+			this.io->completion_q.stats.completed_avg.val = 0;
+			this.io->completion_q.stats.completed_avg.slow_cnt = 0;
+			this.io->completion_q.stats.completed_avg.fast_cnt = 0;
 		#endif
 
@@ -214,13 +314,11 @@
 
 	void __kernel_io_finish_start( cluster & this ) {
-		#if defined(__CFA_IO_POLLING_USER__)
-			__cfadbg_print_safe(io_core, "Kernel I/O : Creating fast poller for cluter %p\n", &this);
-			(this.io.poller.fast){ this };
-			__thrd_start( this.io.poller.fast, main );
-		#endif
+		__cfadbg_print_safe(io_core, "Kernel I/O : Creating fast poller for cluter %p\n", &this);
+		(this.io->poller.fast){ this };
+		__thrd_start( this.io->poller.fast, main );
 
 		// Create the poller thread
 		__cfadbg_print_safe(io_core, "Kernel I/O : Creating slow poller for cluter %p\n", &this);
-		this.io.poller.slow.stack = __create_pthread( &this.io.poller.slow.kthrd, __io_poller_slow, &this );
+		this.io->poller.slow.stack = __create_pthread( &this.io->poller.slow.kthrd, __io_poller_slow, &this );
 	}
 
@@ -228,29 +326,27 @@
 		__cfadbg_print_safe(io_core, "Kernel I/O : Stopping pollers for cluster\n", &this);
 		// Notify the poller thread of the shutdown
-		__atomic_store_n(&this.io.done, true, __ATOMIC_SEQ_CST);
+		__atomic_store_n(&this.io->done, true, __ATOMIC_SEQ_CST);
 
 		// Stop the IO Poller
 		sigval val = { 1 };
-		pthread_sigqueue( this.io.poller.slow.kthrd, SIGUSR1, val );
+		pthread_sigqueue( this.io->poller.slow.kthrd, SIGUSR1, val );
+		post( this.io->poller.sem );
+
+		// Wait for the poller thread to finish
+		pthread_join( this.io->poller.slow.kthrd, 0p );
+		free( this.io->poller.slow.stack );
+
+		__cfadbg_print_safe(io_core, "Kernel I/O : Slow poller stopped for cluster\n", &this);
+
 		#if defined(__CFA_IO_POLLING_USER__)
-			post( this.io.poller.sem );
-		#endif
-
-		// Wait for the poller thread to finish
-		pthread_join( this.io.poller.slow.kthrd, 0p );
-		free( this.io.poller.slow.stack );
-
-		__cfadbg_print_safe(io_core, "Kernel I/O : Slow poller stopped for cluster\n", &this);
-
-		#if defined(__CFA_IO_POLLING_USER__)
-			verify( this.io.poller.fast.waiting );
-			verify( this.io.poller.fast.thrd.state == Blocked );
-
-			this.io.poller.fast.thrd.curr_cluster = mainCluster;
+			verify( this.io->poller.fast.waiting );
+			verify( this.io->poller.fast.thrd.state == Blocked );
+
+			this.io->poller.fast.thrd.curr_cluster = mainCluster;
 
 			// unpark the fast io_poller
-			unpark( &this.io.poller.fast.thrd __cfaabi_dbg_ctx2 );
-
-			^(this.io.poller.fast){};
+			unpark( &this.io->poller.fast.thrd __cfaabi_dbg_ctx2 );
+
+			^(this.io->poller.fast){};
 
 			__cfadbg_print_safe(io_core, "Kernel I/O : Fast poller stopped for cluster\n", &this);
@@ -266,22 +362,24 @@
 		#if !defined(__CFA_NO_STATISTICS__)
 			if(this.print_stats) {
-				__cfaabi_bits_print_safe( STDERR_FILENO,
-					"----- I/O uRing Stats -----\n"
-					"- total submit calls  : %llu\n"
-					"- avg submit          : %lf\n"
-					"- total wait calls    : %llu (%llu slow, %llu fast)\n"
-					"- avg completion/wait : %lf\n",
-					this.io.submit_q.stats.submit_avg.cnt,
-					((double)this.io.submit_q.stats.submit_avg.val) / this.io.submit_q.stats.submit_avg.cnt,
-					this.io.completion_q.stats.completed_avg.slow_cnt + this.io.completion_q.stats.completed_avg.fast_cnt,
-					this.io.completion_q.stats.completed_avg.slow_cnt, this.io.completion_q.stats.completed_avg.fast_cnt,
-					((double)this.io.completion_q.stats.completed_avg.val) / (this.io.completion_q.stats.completed_avg.slow_cnt + this.io.completion_q.stats.completed_avg.fast_cnt)
-				);
+				with(this.io->submit_q.stats, this.io->completion_q.stats) {
+					__cfaabi_bits_print_safe( STDERR_FILENO,
+						"----- I/O uRing Stats -----\n"
+						"- total submit calls  : %llu\n"
+						"- avg submit          : %lf\n"
+						"- total wait calls    : %llu (%llu slow, %llu fast)\n"
+						"- avg completion/wait : %lf\n",
+						submit_avg.cnt,
+						((double)submit_avg.val) / submit_avg.cnt,
+						completed_avg.slow_cnt + completed_avg.fast_cnt,
+						completed_avg.slow_cnt,  completed_avg.fast_cnt,
+						((double)completed_avg.val) / (completed_avg.slow_cnt + completed_avg.fast_cnt)
+					);
+				}
 			}
 		#endif
 
 		// Shutdown the io rings
-		struct io_uring_sq & sq = this.io.submit_q;
-		struct io_uring_cq & cq = this.io.completion_q;
+		struct __submition_data  & sq = this.io->submit_q;
+		struct __completion_data & cq = this.io->completion_q;
 
 		// unmap the submit queue entries
@@ -297,5 +395,7 @@
 
 		// close the file descriptor
-		close(this.io.fd);
+		close(this.io->fd);
+
+		free( this.io );
 	}
 
@@ -310,5 +410,5 @@
 	// Process a single completion message from the io_uring
 	// This is NOT thread-safe
-	static int __drain_io( struct io_ring & ring, sigset_t * mask, int waitcnt, bool in_kernel ) {
+	static int __drain_io( struct __io_data & ring, sigset_t * mask, int waitcnt, bool in_kernel ) {
 		int ret = syscall( __NR_io_uring_enter, ring.fd, 0, waitcnt, IORING_ENTER_GETEVENTS, mask, _NSIG / 8);
 		if( ret < 0 ) {
@@ -358,5 +458,5 @@
 	static void * __io_poller_slow( void * arg ) {
 		cluster * cltr = (cluster *)arg;
-		struct io_ring & ring = cltr->io;
+		struct __io_data & ring = *cltr->io;
 
 		sigset_t mask;
@@ -411,39 +511,37 @@
 	}
 
-	#if defined(__CFA_IO_POLLING_USER__)
-		void main( __io_poller_fast & this ) {
-			// Start parked
-			park( __cfaabi_dbg_ctx );
-
-			__cfadbg_print_safe(io_core, "Kernel I/O : Fast poller for ring %p ready\n", &this.ring);
-
-			// Then loop until we need to start
-			while(!__atomic_load_n(&this.ring->done, __ATOMIC_SEQ_CST)) {
-				// Drain the io
-				this.waiting = false;
-				int count = __drain_io( *this.ring, 0p, 0, false );
-
-				// Update statistics
-				#if !defined(__CFA_NO_STATISTICS__)
-					this.ring->completion_q.stats.completed_avg.val += count;
-					this.ring->completion_q.stats.completed_avg.fast_cnt += 1;
-				#endif
-
-				this.waiting = true;
-				if(0 > count) {
-					// If we got something, just yield and check again
-					yield();
-				}
-				else {
-					// We didn't get anything baton pass to the slow poller
-					__cfadbg_print_safe(io_core, "Kernel I/O : Moving to ring %p to slow poller\n", &this.ring);
-					post( this.ring->poller.sem );
-					park( __cfaabi_dbg_ctx );
-				}
+	void main( __io_poller_fast & this ) {
+		// Start parked
+		park( __cfaabi_dbg_ctx );
+
+		__cfadbg_print_safe(io_core, "Kernel I/O : Fast poller for ring %p ready\n", &this.ring);
+
+		// Then loop until we need to start
+		while(!__atomic_load_n(&this.ring->done, __ATOMIC_SEQ_CST)) {
+			// Drain the io
+			this.waiting = false;
+			int count = __drain_io( *this.ring, 0p, 0, false );
+
+			// Update statistics
+			#if !defined(__CFA_NO_STATISTICS__)
+				this.ring->completion_q.stats.completed_avg.val += count;
+				this.ring->completion_q.stats.completed_avg.fast_cnt += 1;
+			#endif
+
+			this.waiting = true;
+			if(0 > count) {
+				// If we got something, just yield and check again
+				yield();
 			}
-
-			__cfadbg_print_safe(io_core, "Kernel I/O : Fast poller for ring %p stopping\n", &this.ring);
-		}
-	#endif
+			else {
+				// We didn't get anything baton pass to the slow poller
+				__cfadbg_print_safe(io_core, "Kernel I/O : Moving to ring %p to slow poller\n", &this.ring);
+				post( this.ring->poller.sem );
+				park( __cfaabi_dbg_ctx );
+			}
+		}
+
+		__cfadbg_print_safe(io_core, "Kernel I/O : Fast poller for ring %p stopping\n", &this.ring);
+	}
 
 //=============================================================================================
@@ -475,5 +573,5 @@
 //
 
-	static inline [* struct io_uring_sqe, uint32_t] __submit_alloc( struct io_ring & ring ) {
+	static inline [* struct io_uring_sqe, uint32_t] __submit_alloc( struct __io_data & ring ) {
 		// Wait for a spot to be available
 		P(ring.submit);
@@ -493,5 +591,5 @@
 	}
 
-	static inline void __submit( struct io_ring & ring, uint32_t idx ) {
+	static inline void __submit( struct __io_data & ring, uint32_t idx ) {
 		// get mutual exclusion
 		lock(ring.submit_q.lock __cfaabi_dbg_ctx2);
@@ -554,5 +652,5 @@
 
 	#define __submit_prelude \
-		struct io_ring & ring = active_cluster()->io; \
+		struct __io_data & ring = *active_cluster()->io; \
 		struct io_uring_sqe * sqe; \
 		uint32_t idx; \
Index: libcfa/src/concurrency/kernel.hfa
===================================================================
--- libcfa/src/concurrency/kernel.hfa	(revision 3c039b0fd26ae5a91be2186806d965e4c3bb3975)
+++ libcfa/src/concurrency/kernel.hfa	(revision 61dd73dbc9ed2199345b4a1fa560ffa30aca0634)
@@ -114,106 +114,5 @@
 //-----------------------------------------------------------------------------
 // I/O
-#if defined(HAVE_LINUX_IO_URING_H)
-struct io_uring_sq {
-	// Head and tail of the ring (associated with array)
-	volatile uint32_t * head;
-	volatile uint32_t * tail;
-
-	// The actual kernel ring which uses head/tail
-	// indexes into the sqes arrays
-	uint32_t * array;
-
-	// number of entries and mask to go with it
-	const uint32_t * num;
-	const uint32_t * mask;
-
-	// Submission flags (Not sure what for)
-	uint32_t * flags;
-
-	// number of sqes not submitted (whatever that means)
-	uint32_t * dropped;
-
-	// Like head/tail but not seen by the kernel
-	volatile uint32_t alloc;
-	volatile uint32_t ready;
-
-	__spinlock_t lock;
-
-	// A buffer of sqes (not the actual ring)
-	struct io_uring_sqe * sqes;
-
-	// The location and size of the mmaped area
-	void * ring_ptr;
-	size_t ring_sz;
-
-	// Statistics
-	#if !defined(__CFA_NO_STATISTICS__)
-		struct {
-			struct {
-				unsigned long long int val;
-				unsigned long long int cnt;
-			} submit_avg;
-		} stats;
-	#endif
-};
-
-struct io_uring_cq {
-	// Head and tail of the ring
-	volatile uint32_t * head;
-	volatile uint32_t * tail;
-
-	// number of entries and mask to go with it
-	const uint32_t * mask;
-	const uint32_t * num;
-
-	// number of cqes not submitted (whatever that means)
-	uint32_t * overflow;
-
-	// the kernel ring
-	struct io_uring_cqe * cqes;
-
-	// The location and size of the mmaped area
-	void * ring_ptr;
-	size_t ring_sz;
-
-	// Statistics
-	#if !defined(__CFA_NO_STATISTICS__)
-		struct {
-			struct {
-				unsigned long long int val;
-				unsigned long long int slow_cnt;
-				unsigned long long int fast_cnt;
-			} completed_avg;
-		} stats;
-	#endif
-};
-
-#if defined(__CFA_IO_POLLING_USER__)
-	struct __io_poller_fast {
-		struct io_ring * ring;
-		bool waiting;
-		$thread thrd;
-	};
-#endif
-
-struct io_ring {
-	struct io_uring_sq submit_q;
-	struct io_uring_cq completion_q;
-	uint32_t flags;
-	int fd;
-	semaphore submit;
-	volatile bool done;
-	struct {
-		struct {
-			void * stack;
-			pthread_t kthrd;
-		} slow;
-		#if defined(__CFA_IO_POLLING_USER__)
-			__io_poller_fast fast;
-			__bin_sem_t sem;
-		#endif
-	} poller;
-};
-#endif
+struct __io_data;
 
 //-----------------------------------------------------------------------------
@@ -249,7 +148,5 @@
 	} node;
 
-	#if defined(HAVE_LINUX_IO_URING_H)
-		struct io_ring io;
-	#endif
+	struct __io_data * io;
 
 	#if !defined(__CFA_NO_STATISTICS__)
