Index: libcfa/src/concurrency/io.cfa
===================================================================
--- libcfa/src/concurrency/io.cfa	(revision 6e33a2decadaea8f234b8538b1ff71447e2234ea)
+++ libcfa/src/concurrency/io.cfa	(revision 05cfa4db51185682042d80b5c991924da251e96a)
@@ -141,6 +141,7 @@
 			struct {
 				struct {
-					unsigned long long int val;
-					unsigned long long int cnt;
+					volatile unsigned long long int val;
+					volatile unsigned long long int cnt;
+					volatile unsigned long long int block;
 				} submit_avg;
 			} stats;
@@ -303,6 +304,7 @@
 		// Initialize statistics
 		#if !defined(__CFA_NO_STATISTICS__)
-			this.io->submit_q.stats.submit_avg.val = 0;
-			this.io->submit_q.stats.submit_avg.cnt = 0;
+			this.io->submit_q.stats.submit_avg.val   = 0;
+			this.io->submit_q.stats.submit_avg.cnt   = 0;
+			this.io->submit_q.stats.submit_avg.block = 0;
 			this.io->completion_q.stats.completed_avg.val = 0;
 			this.io->completion_q.stats.completed_avg.slow_cnt = 0;
@@ -344,11 +346,47 @@
 
 		if( this.io->cltr_flags & CFA_CLUSTER_IO_POLLER_USER_THREAD ) {
-			verify( this.io->poller.fast.waiting );
-			verify( this.io->poller.fast.thrd.state == Blocked );
-
-			this.io->poller.fast.thrd.curr_cluster = mainCluster;
+			with( this.io->poller.fast ) {
+				/* paranoid */ verify( waiting ); // The thread shouldn't be in a system call
+				/* paranoid */ verify( this.procs.head == 0p || &this == mainCluster );
+				/* paranoid */ verify( this.idles.head == 0p || &this == mainCluster );
+
+				// We need to adjust the clean-up based on where the thread is
+				if( thrd.preempted != __NO_PREEMPTION ) {
+
+					// This is the tricky case
+					// The thread was preempted and now it is on the ready queue
+					/* paranoid */ verify( thrd.state == Active );           // The thread better be in this state
+					/* paranoid */ verify( thrd.next == 1p );                // The thread should be the last on the list
+					/* paranoid */ verify( this.ready_queue.head == &thrd ); // The thread should be the only thing on the list
+
+					// Remove the thread from the ready queue of this cluster
+					this.ready_queue.head = 1p;
+					thrd.next = 0p;
+
+					// Fixup the thread state
+					thrd.state = Blocked;
+					thrd.preempted = __NO_PREEMPTION;
+
+					// Pretend like the thread was blocked all along
+				}
+				// !!! This is not an else if !!!
+				if( thrd.state == Blocked ) {
+
+					// This is the "easy case"
+					// The thread is parked and can easily be moved to active cluster
+					verify( thrd.curr_cluster != active_cluster() || thrd.curr_cluster == mainCluster );
+					thrd.curr_cluster = active_cluster();
 
 			// unpark the fast io_poller
-			unpark( &this.io->poller.fast.thrd __cfaabi_dbg_ctx2 );
+					unpark( &thrd __cfaabi_dbg_ctx2 );
+				}
+				else {
+
+					// The thread is in a weird state
+					// I don't know what to do here
+					abort("Fast poller thread is in unexpected state, cannot clean-up correctly\n");
+				}
+
+			}
 
 			^(this.io->poller.fast){};
@@ -369,10 +407,12 @@
 					__cfaabi_bits_print_safe( STDERR_FILENO,
 						"----- I/O uRing Stats -----\n"
-						"- total submit calls  : %llu\n"
-						"- avg submit          : %lf\n"
-						"- total wait calls    : %llu (%llu slow, %llu fast)\n"
-						"- avg completion/wait : %lf\n",
+						"- total submit calls  : %'llu\n"
+						"- avg submit          : %'.2lf\n"
+						"- pre-submit block %%  : %'.2lf\n"
+						"- total wait calls    : %'llu (%'llu slow, %'llu fast)\n"
+						"- avg completion/wait : %'.2lf\n",
 						submit_avg.cnt,
 						((double)submit_avg.val) / submit_avg.cnt,
+						(100.0 * submit_avg.block) / submit_avg.cnt,
 						completed_avg.slow_cnt + completed_avg.fast_cnt,
 						completed_avg.slow_cnt,  completed_avg.fast_cnt,
@@ -584,5 +624,8 @@
 	static inline [* struct io_uring_sqe, uint32_t] __submit_alloc( struct __io_data & ring ) {
 		// Wait for a spot to be available
-		P(ring.submit);
+		__attribute__((unused)) bool blocked = P(ring.submit);
+		#if !defined(__CFA_NO_STATISTICS__)
+			__atomic_fetch_add( &ring.submit_q.stats.submit_avg.block, blocked ? 1ul64 : 0ul64, __ATOMIC_RELAXED );
+		#endif
 
 		// Allocate the sqe
