Index: libcfa/src/concurrency/io.cfa
===================================================================
--- libcfa/src/concurrency/io.cfa	(revision 35fd2c49fe696052e822079cc8b22822c31f163f)
+++ libcfa/src/concurrency/io.cfa	(revision 561dd26d132b85ddcfe4d7d84e17ae985d20af77)
@@ -134,4 +134,5 @@
 		int ret = 0;
 		if( need_sys_to_submit || need_sys_to_complete ) {
+			__cfadbg_print_safe(io_core, "Kernel I/O : IO_URING enter %d %u %u\n", ring.fd, to_submit, flags);
 			ret = syscall( __NR_io_uring_enter, ring.fd, to_submit, 0, flags, (sigset_t *)0p, _NSIG / 8);
 			if( ret < 0 ) {
@@ -230,6 +231,8 @@
 		__cfadbg_print_safe(io_core, "Kernel I/O : IO poller %p for ring %p ready\n", &this, &this.ring);
 
-		int reset = 0;
+		const int reset_cnt = 5;
+		int reset = reset_cnt;
 		// Then loop until we need to start
+		LOOP:
 		while(!__atomic_load_n(&this.done, __ATOMIC_SEQ_CST)) {
 			// Drain the io
@@ -239,5 +242,5 @@
 				[count, again] = __drain_io( *this.ring );
 
-				if(!again) reset++;
+				if(!again) reset--;
 
 				// Update statistics
@@ -249,19 +252,29 @@
 
 			// If we got something, just yield and check again
-			if(reset < 5) {
+			if(reset > 1) {
 				yield();
-			}
-			// We didn't get anything baton pass to the slow poller
-			else {
+				continue LOOP;
+			}
+
+			// We alread failed to find events a few time.
+			if(reset == 1) {
+				// Rearm the context so it can block
+				// but don't block right away
+				// we need to retry one last time in case
+				// something completed *just now*
+				__ioctx_prepare_block( this, ev );
+				continue LOOP;
+			}
+
 				__STATS__( false,
 					io.complete_q.blocks += 1;
 				)
 				__cfadbg_print_safe(io_core, "Kernel I/O : Parking io poller %p\n", &this.self);
-				reset = 0;
 
 				// block this thread
-				__ioctx_prepare_block( this, ev );
 				wait( this.sem );
-			}
+
+			// restore counter
+			reset = reset_cnt;
 		}
 
@@ -319,4 +332,5 @@
 					)
 
+					__cfadbg_print_safe( io, "Kernel I/O : allocated [%p, %u] for %p (%p)\n", sqe, idx, active_thread(), (void*)data );
 
 					// Success return the data
@@ -376,4 +390,6 @@
 
 	void __submit( struct io_context * ctx, __u32 idx ) __attribute__((nonnull (1))) {
+		__cfadbg_print_safe( io, "Kernel I/O : submitting %u for %p\n", idx, active_thread() );
+
 		__io_data & ring = *ctx->thrd.ring;
 		// Get now the data we definetely need
@@ -443,5 +459,7 @@
 				unlock(ring.submit_q.submit_lock);
 			#endif
-			if( ret < 0 ) return;
+			if( ret < 0 ) {
+				return;
+			}
 
 			// Release the consumed SQEs
@@ -454,4 +472,6 @@
 				io.submit_q.submit_avg.cnt += 1;
 			)
+
+			__cfadbg_print_safe( io, "Kernel I/O : submitted %u (among %u) for %p\n", idx, ret, active_thread() );
 		}
 		else {
Index: libcfa/src/concurrency/io/call.cfa.in
===================================================================
--- libcfa/src/concurrency/io/call.cfa.in	(revision 35fd2c49fe696052e822079cc8b22822c31f163f)
+++ libcfa/src/concurrency/io/call.cfa.in	(revision 561dd26d132b85ddcfe4d7d84e17ae985d20af77)
@@ -464,4 +464,37 @@
 
 print("""
+//-----------------------------------------------------------------------------
+bool cancel(io_cancellation & this) {
+	#if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_ASYNC_CANCEL)
+		return false;
+	#else
+		io_future_t future;
+
+		io_context * context = __get_io_context();
+
+		__u8 sflags = 0;
+		struct __io_data & ring = *context->thrd.ring;
+
+		__u32 idx;
+		struct io_uring_sqe * sqe;
+		[sqe, idx] = __submit_alloc( ring, (__u64)(uintptr_t)&future );
+
+		sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0;
+		sqe->opcode = IORING_OP_ASYNC_CANCEL;
+		sqe->flags = sflags;
+		sqe->addr = this.target;
+
+		verify( sqe->user_data == (__u64)(uintptr_t)&future );
+		__submit( context, idx );
+
+		wait(future);
+
+		if( future.result == 0 ) return true; // Entry found
+		if( future.result == -EALREADY) return true; // Entry found but in progress
+		if( future.result == -ENOENT ) return false; // Entry not found
+		return false;
+	#endif
+}
+
 //-----------------------------------------------------------------------------
 // Check if a function is has asynchronous
Index: libcfa/src/concurrency/io/setup.cfa
===================================================================
--- libcfa/src/concurrency/io/setup.cfa	(revision 35fd2c49fe696052e822079cc8b22822c31f163f)
+++ libcfa/src/concurrency/io/setup.cfa	(revision 561dd26d132b85ddcfe4d7d84e17ae985d20af77)
@@ -169,6 +169,10 @@
 		// Main loop
 		while( iopoll.run ) {
+			__cfadbg_print_safe(io_core, "Kernel I/O - epoll : waiting on io_uring contexts\n");
+
 			// Wait for events
 			int nfds = epoll_pwait( iopoll.epollfd, events, 10, -1, &mask );
+
+			__cfadbg_print_safe(io_core, "Kernel I/O - epoll : %d io contexts events, waking up\n", nfds);
 
 			// Check if an error occured
@@ -181,5 +185,5 @@
 				$io_ctx_thread * io_ctx = ($io_ctx_thread *)(uintptr_t)events[i].data.u64;
 				/* paranoid */ verify( io_ctx );
-				__cfadbg_print_safe(io_core, "Kernel I/O : Unparking io poller %p\n", io_ctx);
+				__cfadbg_print_safe(io_core, "Kernel I/O - epoll : Unparking io poller %p\n", io_ctx);
 				#if !defined( __CFA_NO_STATISTICS__ )
 					__cfaabi_tls.this_stats = io_ctx->self.curr_cluster->stats;
@@ -233,4 +237,9 @@
 		$thread & thrd = this.thrd.self;
 		if( cluster_context ) {
+			// We are about to do weird things with the threads
+			// we don't need interrupts to complicate everything
+			disable_interrupts();
+
+			// Get cluster info
 			cluster & cltr = *thrd.curr_cluster;
 			/* paranoid */ verify( cltr.idles.total == 0 || &cltr == mainCluster );
@@ -239,13 +248,15 @@
 			// We need to adjust the clean-up based on where the thread is
 			if( thrd.state == Ready || thrd.preempted != __NO_PREEMPTION ) {
+				// This is the tricky case
+				// The thread was preempted or ready to run and now it is on the ready queue
+				// but the cluster is shutting down, so there aren't any processors to run the ready queue
+				// the solution is to steal the thread from the ready-queue and pretend it was blocked all along
 
 				ready_schedule_lock();
-
-					// This is the tricky case
-					// The thread was preempted and now it is on the ready queue
+					// The thread should on the list
+					/* paranoid */ verify( thrd.link.next != 0p );
+
+					// Remove the thread from the ready queue of this cluster
 					// The thread should be the last on the list
-					/* paranoid */ verify( thrd.link.next != 0p );
-
-					// Remove the thread from the ready queue of this cluster
 					__attribute__((unused)) bool removed = remove_head( &cltr, &thrd );
 					/* paranoid */ verify( removed );
@@ -263,6 +274,6 @@
 			}
 			// !!! This is not an else if !!!
+			// Ok, now the thread is blocked (whether we cheated to get here or not)
 			if( thrd.state == Blocked ) {
-
 				// This is the "easy case"
 				// The thread is parked and can easily be moved to active cluster
@@ -274,9 +285,11 @@
 			}
 			else {
-
 				// The thread is in a weird state
 				// I don't know what to do here
 				abort("io_context poller thread is in unexpected state, cannot clean-up correctly\n");
 			}
+
+			// The weird thread kidnapping stuff is over, restore interrupts.
+			enable_interrupts( __cfaabi_dbg_ctx );
 		} else {
 			post( this.thrd.sem );
@@ -463,4 +476,5 @@
 
 	void __ioctx_prepare_block($io_ctx_thread & ctx, struct epoll_event & ev) {
+		__cfadbg_print_safe(io_core, "Kernel I/O - epoll : Re-arming io poller %p\n", &ctx);
 		int ret = epoll_ctl(iopoll.epollfd, EPOLL_CTL_MOD, ctx.ring->fd, &ev);
 		if (ret < 0) {
