Index: libcfa/src/concurrency/io.cfa
===================================================================
--- libcfa/src/concurrency/io.cfa	(revision 2377ca2ae5f29b9a9faf225df848aa347d834fb6)
+++ libcfa/src/concurrency/io.cfa	(revision 18f7858ea013d58a6e3cd0b6bfc61801c6f018c1)
@@ -94,10 +94,40 @@
 	extern void __kernel_unpark( thread$ * thrd, unpark_hint );
 
-	static bool __cfa_do_drain( $io_context * ctx, cluster * cltr ) {
+	static void ioring_syscsll( struct $io_context & ctx, unsigned int min_comp, unsigned int flags ) {
+		__STATS__( true, io.calls.flush++; )
+		int ret = syscall( __NR_io_uring_enter, ctx.fd, ctx.sq.to_submit, min_comp, flags, (sigset_t *)0p, _NSIG / 8);
+		if( ret < 0 ) {
+			switch((int)errno) {
+			case EAGAIN:
+			case EINTR:
+			case EBUSY:
+				// Update statistics
+				__STATS__( false, io.calls.errors.busy ++; )
+				return false;
+			default:
+				abort( "KERNEL ERROR: IO_URING SYSCALL - (%d) %s\n", (int)errno, strerror(errno) );
+			}
+		}
+
+		__cfadbg_print_safe(io, "Kernel I/O : %u submitted to io_uring %d\n", ret, ctx.fd);
+		__STATS__( true, io.calls.submitted += ret; )
+		/* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num );
+		/* paranoid */ verify( ctx.sq.to_submit >= ret );
+
+		ctx.sq.to_submit -= ret;
+
+		/* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num );
+
+		// Release the consumed SQEs
+		__release_sqes( ctx );
+
+		/* paranoid */ verify( ! __preemption_enabled() );
+
+		__atomic_store_n(&ctx.proc->io.pending, false, __ATOMIC_RELAXED);
+	}
+
+	static bool try_acquire( $io_context * ctx ) __attribute__((nonnull(1))) {
 		/* paranoid */ verify( ! __preemption_enabled() );
 		/* paranoid */ verify( ready_schedule_islocked() );
-		/* paranoid */ verify( ctx );
-
-		const __u32 mask = *ctx->cq.mask;
 
 
@@ -115,4 +145,13 @@
 		}
 
+		return true;
+	}
+
+	static bool __cfa_do_drain( $io_context * ctx, cluster * cltr ) __attribute__((nonnull(1, 2))) {
+		/* paranoid */ verify( ! __preemption_enabled() );
+		/* paranoid */ verify( ready_schedule_islocked() );
+		/* paranoid */ verify( ctx->cq.lock == true );
+
+		const __u32 mask = *ctx->cq.mask;
 		unsigned long long ts_prev = ctx->cq.ts;
 
@@ -155,4 +194,6 @@
 		bool local = false;
 		bool remote = false;
+
+		ready_schedule_lock();
 
 		cluster * const cltr = proc->cltr;
@@ -186,12 +227,16 @@
 				const unsigned target = proc->io.target;
 				/* paranoid */ verify( io.tscs[target].tv != MAX );
-				if(target < ctxs_count) {
+				HELP: if(target < ctxs_count) {
 					const unsigned long long cutoff = calc_cutoff(ctsc, ctx->cq.id, ctxs_count, io.data, io.tscs, __shard_factor.io);
 					const unsigned long long age = moving_average(ctsc, io.tscs[target].tv, io.tscs[target].ma);
 					// __cfadbg_print_safe(ready_queue, "Kernel : Help attempt on %u from %u, age %'llu vs cutoff %'llu, %s\n", target, this, age, cutoff, age > cutoff ? "yes" : "no");
-					if(age > cutoff) {
-						remote = __cfa_do_drain( io.data[target], cltr );
-						if(remote) __STATS__( false, io.calls.helped++; )
-					}
+					if(age <= cutoff) break HELP;
+
+					if(!try_acquire(io.data[target])) break HELP;
+
+					if(!__cfa_do_drain( io.data[target], cltr )) break HELP;
+
+					remote = true;
+					__STATS__( false, io.calls.helped++; )
 				}
 				proc->io.target = MAX;
@@ -201,13 +246,17 @@
 
 		// Drain the local queue
-		local = __cfa_do_drain( proc->io.ctx, cltr );
+		if(try_acquire( proc->io.ctx )) {
+			local = __cfa_do_drain( proc->io.ctx, cltr );
+		}
 
 		/* paranoid */ verify( ready_schedule_islocked() );
 		/* paranoid */ verify( ! __preemption_enabled() );
 		/* paranoid */ verify( active_processor() == proc );
+
+		ready_schedule_unlock();
 		return local || remote;
 	}
 
-	bool __cfa_io_flush( processor * proc, int min_comp ) {
+	bool __cfa_io_flush( processor * proc ) {
 		/* paranoid */ verify( ! __preemption_enabled() );
 		/* paranoid */ verify( proc );
@@ -219,42 +268,10 @@
 		__ioarbiter_flush( ctx );
 
-		if(ctx.sq.to_submit != 0 || min_comp > 0) {
-
-			__STATS__( true, io.calls.flush++; )
-			int ret = syscall( __NR_io_uring_enter, ctx.fd, ctx.sq.to_submit, min_comp, min_comp > 0 ? IORING_ENTER_GETEVENTS : 0, (sigset_t *)0p, _NSIG / 8);
-			if( ret < 0 ) {
-				switch((int)errno) {
-				case EAGAIN:
-				case EINTR:
-				case EBUSY:
-					// Update statistics
-					__STATS__( false, io.calls.errors.busy ++; )
-					return false;
-				default:
-					abort( "KERNEL ERROR: IO_URING SYSCALL - (%d) %s\n", (int)errno, strerror(errno) );
-				}
-			}
-
-			__cfadbg_print_safe(io, "Kernel I/O : %u submitted to io_uring %d\n", ret, ctx.fd);
-			__STATS__( true, io.calls.submitted += ret; )
-			/* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num );
-			/* paranoid */ verify( ctx.sq.to_submit >= ret );
-
-			ctx.sq.to_submit -= ret;
-
-			/* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num );
-
-			// Release the consumed SQEs
-			__release_sqes( ctx );
-
-			/* paranoid */ verify( ! __preemption_enabled() );
-
-			__atomic_store_n(&ctx.proc->io.pending, false, __ATOMIC_RELAXED);
-		}
-
-		ready_schedule_lock();
-		bool ret = __cfa_io_drain( proc );
-		ready_schedule_unlock();
-		return ret;
+		if(ctx.sq.to_submit != 0) {
+			ioring_syscsll(ctx, 0, 0);
+
+		}
+
+		return __cfa_io_drain( proc );
 	}
 
@@ -389,9 +406,9 @@
 		if(sq.to_submit > 30) {
 			__tls_stats()->io.flush.full++;
-			__cfa_io_flush( ctx->proc, 0 );
+			__cfa_io_flush( ctx->proc );
 		}
 		if(!lazy) {
 			__tls_stats()->io.flush.eager++;
-			__cfa_io_flush( ctx->proc, 0 );
+			__cfa_io_flush( ctx->proc );
 		}
 	}
@@ -656,4 +673,26 @@
 			return true;
 		}
+
+		void __cfa_io_idle( processor * proc ) {
+			iovec iov;
+			__atomic_acquire( &proc->io.ctx->cq.lock );
+
+			with( this->idle_wctx) {
+
+			// Do we already have a pending read
+			if(available(*ftr)) {
+				// There is no pending read, we need to add one
+				reset(*ftr);
+
+				iov.iov_base = rdbuf;
+				iov.iov_len  = sizeof(eventfd_t);
+				__kernel_read(proc, *ftr, iov, evfd );
+			}
+
+			__ioarbiter_flush( *proc->io.ctx );
+			ioring_syscsll(ctx, 1, IORING_ENTER_GETEVENTS);
+
+			__cfa_do_drain( proc->io.ctx, proc->cltr );
+		}
 	#endif
 #endif
Index: libcfa/src/concurrency/io/setup.cfa
===================================================================
--- libcfa/src/concurrency/io/setup.cfa	(revision 2377ca2ae5f29b9a9faf225df848aa347d834fb6)
+++ libcfa/src/concurrency/io/setup.cfa	(revision 18f7858ea013d58a6e3cd0b6bfc61801c6f018c1)
@@ -32,5 +32,7 @@
 
 	void __cfa_io_start( processor * proc ) {}
-	bool __cfa_io_flush( processor * proc, int ) { return false; }
+	bool __cfa_io_flush( processor * proc ) { return false; }
+	bool __cfa_io_drain( processor * proc ) __attribute__((nonnull (1)));
+	void __cfa_io_idle ( processor * ) __attribute__((nonnull (1)));
 	void __cfa_io_stop ( processor * proc ) {}
 
@@ -215,5 +217,5 @@
 
 		// completion queue
-		cq.lock      = 0;
+		cq.lock      = false;
 		cq.id        = MAX;
 		cq.ts        = rdtscl();
Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision 2377ca2ae5f29b9a9faf225df848aa347d834fb6)
+++ libcfa/src/concurrency/kernel.cfa	(revision 18f7858ea013d58a6e3cd0b6bfc61801c6f018c1)
@@ -132,11 +132,11 @@
 static void __wake_one(cluster * cltr);
 
-static void idle_sleep(processor * proc, io_future_t & future, iovec & iov);
+static void idle_sleep(processor * proc);
 static bool mark_idle (__cluster_proc_list & idles, processor & proc);
 static void mark_awake(__cluster_proc_list & idles, processor & proc);
 
 extern bool __cfa_io_drain( processor * proc ) __attribute__((nonnull (1)));
-extern bool __cfa_io_flush( processor *, int min_comp );
-static inline bool __maybe_io_drain( processor * );
+extern bool __cfa_io_flush( processor * ) __attribute__((nonnull (1)));
+extern void __cfa_io_idle( processor * ) __attribute__((nonnull (1)));
 
 #if defined(CFA_WITH_IO_URING_IDLE)
@@ -168,5 +168,4 @@
 	// mark it as already fulfilled so we know if there is a pending request or not
 	this->idle_wctx.ftr->self.ptr = 1p;
-	iovec idle_iovec = { this->idle_wctx.rdbuf, sizeof(eventfd_t) };
 
 	__cfadbg_print_safe(runtime_core, "Kernel : core %p starting\n", this);
@@ -193,5 +192,5 @@
 		for() {
 			// Check if there is pending io
-			__maybe_io_drain( this );
+			__cfa_io_drain( this );
 
 			// Try to get the next thread
@@ -199,6 +198,10 @@
 
 			if( !readyThread ) {
+				// there is no point in holding submissions if we are idle
 				__IO_STATS__(true, io.flush.idle++; )
-				__cfa_io_flush( this, 0 );
+				__cfa_io_flush( this );
+
+				// drain again in case something showed up
+				__cfa_io_drain( this );
 
 				readyThread = __next_thread( this->cltr );
@@ -206,11 +209,14 @@
 
 			if( !readyThread ) for(5) {
+				readyThread = __next_thread_slow( this->cltr );
+
+				if( readyThread ) break;
+
+				// It's unlikely we still I/O to submit, but the arbiter could
 				__IO_STATS__(true, io.flush.idle++; )
-
-				readyThread = __next_thread_slow( this->cltr );
-
-				if( readyThread ) break;
-
-				__cfa_io_flush( this, 0 );
+				__cfa_io_flush( this );
+
+				// drain again in case something showed up
+				__cfa_io_drain( this );
 			}
 
@@ -235,5 +241,5 @@
 				}
 
-				idle_sleep( this, *this->idle_wctx.ftr, idle_iovec );
+				idle_sleep( this );
 
 				// We were woken up, remove self from idle
@@ -257,5 +263,5 @@
 			if(__atomic_load_n(&this->io.pending, __ATOMIC_RELAXED) && !__atomic_load_n(&this->io.dirty, __ATOMIC_RELAXED)) {
 				__IO_STATS__(true, io.flush.dirty++; )
-				__cfa_io_flush( this, 0 );
+				__cfa_io_flush( this );
 			}
 		}
@@ -683,5 +689,5 @@
 }
 
-static void idle_sleep(processor * this, io_future_t & future, iovec & iov) {
+static void idle_sleep(processor * this) {
 	/* paranoid */ verify( this->idle_wctx.evfd != 1 );
 	/* paranoid */ verify( this->idle_wctx.evfd != 2 );
@@ -735,13 +741,5 @@
 		#endif
 	#else
-		// Do we already have a pending read
-		if(available(future)) {
-			// There is no pending read, we need to add one
-			reset(future);
-
-			__kernel_read(this, future, iov, this->idle_wctx.evfd );
-		}
-
-		__cfa_io_flush( this, 1 );
+		__cfa_io_idle( this );
 	#endif
 }
@@ -831,21 +829,5 @@
 #endif
 
-static inline bool __maybe_io_drain( processor * proc ) {
-	/* paranoid */ verify( proc );
-	bool ret = false;
-	#if defined(CFA_HAVE_LINUX_IO_URING_H)
-		__cfadbg_print_safe(runtime_core, "Kernel : core %p checking io for ring %d\n", proc, proc->io.ctx->fd);
-
-		// Check if we should drain the queue
-		$io_context * ctx = proc->io.ctx;
-		unsigned head = *ctx->cq.head;
-		unsigned tail = *ctx->cq.tail;
-		if(head == tail) return false;
-		ready_schedule_lock();
-		ret = __cfa_io_drain( proc );
-		ready_schedule_unlock();
-	#endif
-	return ret;
-}
+
 
 //-----------------------------------------------------------------------------