Index: libcfa/src/concurrency/io.cfa
===================================================================
--- libcfa/src/concurrency/io.cfa	(revision 78a580d0f8719cd1f71563bdcdef7a9930002650)
+++ libcfa/src/concurrency/io.cfa	(revision 4479890a8b920bd26343ba78b81c5b6c26ae3ccf)
@@ -94,5 +94,5 @@
 	extern void __kernel_unpark( thread$ * thrd, unpark_hint );
 
-	bool __cfa_io_drain( $io_context * ctx, cluster * cltr ) {
+	static bool __cfa_do_drain( $io_context * ctx, cluster * cltr ) {
 		/* paranoid */ verify( ! __preemption_enabled() );
 		/* paranoid */ verify( ready_schedule_islocked() );
@@ -142,4 +142,60 @@
 
 		return true;
+	}
+
+	bool __cfa_io_drain( processor * proc ) {
+		bool local = false;
+		bool remote = false;
+
+		cluster * const cltr = proc->cltr;
+		$io_context * const ctx = proc->io.ctx;
+		/* paranoid */ verify( cltr );
+		/* paranoid */ verify( ctx );
+
+		with(cltr->sched) {
+			const size_t ctxs_count = io.count;
+
+			/* paranoid */ verify( ready_schedule_islocked() );
+			/* paranoid */ verify( ! __preemption_enabled() );
+			/* paranoid */ verify( active_processor() == proc );
+			/* paranoid */ verify( __shard_factor.io > 0 );
+			/* paranoid */ verify( ctxs_count > 0 );
+			/* paranoid */ verify( ctx->cq.id < ctxs_count );
+
+			const unsigned this_cache = cache_id(cltr, ctx->cq.id / __shard_factor.io);
+			const unsigned long long ctsc = rdtscl();
+
+			if(proc->io.target == MAX) {
+				uint64_t chaos = __tls_rand();
+				unsigned ext = chaos & 0xff;
+				unsigned other  = (chaos >> 8) % (ctxs_count);
+
+				if(ext < 3 || __atomic_load_n(&caches[other / __shard_factor.io].id, __ATOMIC_RELAXED) == this_cache) {
+					proc->io.target = other;
+				}
+			}
+			else {
+				const unsigned target = proc->io.target;
+				/* paranoid */ verify( io.tscs[target].tv != MAX );
+				if(target < ctxs_count) {
+					const unsigned long long cutoff = calc_cutoff(ctsc, ctx->cq.id, ctxs_count, io.data, io.tscs, __shard_factor.io);
+					const unsigned long long age = moving_average(ctsc, io.tscs[target].tv, io.tscs[target].ma);
+					// __cfadbg_print_safe(ready_queue, "Kernel : Help attempt on %u from %u, age %'llu vs cutoff %'llu, %s\n", target, this, age, cutoff, age > cutoff ? "yes" : "no");
+					if(age > cutoff) {
+						remote = __cfa_do_drain( io.data[target], cltr );
+					}
+				}
+				proc->io.target = MAX;
+			}
+		}
+
+
+		// Drain the local queue
+		local = __cfa_do_drain( proc->io.ctx, cltr );
+
+		/* paranoid */ verify( ready_schedule_islocked() );
+		/* paranoid */ verify( ! __preemption_enabled() );
+		/* paranoid */ verify( active_processor() == proc );
+		return local || remote;
 	}
 
@@ -189,5 +245,5 @@
 
 		ready_schedule_lock();
-		bool ret = __cfa_io_drain( &ctx, cltr );
+		bool ret = __cfa_io_drain( proc );
 		ready_schedule_unlock();
 		return ret;
Index: libcfa/src/concurrency/io/types.hfa
===================================================================
--- libcfa/src/concurrency/io/types.hfa	(revision 78a580d0f8719cd1f71563bdcdef7a9930002650)
+++ libcfa/src/concurrency/io/types.hfa	(revision 4479890a8b920bd26343ba78b81c5b6c26ae3ccf)
@@ -135,4 +135,8 @@
 	};
 
+	static inline unsigned long long ts($io_context *& this) {
+		return this->cq.ts;
+	}
+
 	struct __pending_alloc {
 		inline __outstanding_io;
Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision 78a580d0f8719cd1f71563bdcdef7a9930002650)
+++ libcfa/src/concurrency/kernel.cfa	(revision 4479890a8b920bd26343ba78b81c5b6c26ae3ccf)
@@ -136,5 +136,5 @@
 static void mark_awake(__cluster_proc_list & idles, processor & proc);
 
-extern bool __cfa_io_drain( $io_context *, cluster * cltr );
+extern bool __cfa_io_drain( processor * proc ) __attribute__((nonnull (1)));
 extern bool __cfa_io_flush( processor *, int min_comp );
 static inline bool __maybe_io_drain( processor * );
@@ -829,4 +829,5 @@
 
 static inline bool __maybe_io_drain( processor * proc ) {
+	/* paranoid */ verify( proc );
 	bool ret = false;
 	#if defined(CFA_HAVE_LINUX_IO_URING_H)
@@ -839,5 +840,5 @@
 		if(head == tail) return false;
 		ready_schedule_lock();
-		ret = __cfa_io_drain( ctx, proc->cltr );
+		ret = __cfa_io_drain( proc );
 		ready_schedule_unlock();
 	#endif
Index: libcfa/src/concurrency/kernel/cluster.hfa
===================================================================
--- libcfa/src/concurrency/kernel/cluster.hfa	(revision 78a580d0f8719cd1f71563bdcdef7a9930002650)
+++ libcfa/src/concurrency/kernel/cluster.hfa	(revision 4479890a8b920bd26343ba78b81c5b6c26ae3ccf)
@@ -49,5 +49,5 @@
 static inline unsigned long long calc_cutoff(
 	const unsigned long long ctsc,
-	const processor * proc,
+	unsigned procid,
 	size_t count,
 	Data_t * data,
@@ -55,5 +55,5 @@
 	const unsigned shard_factor
 ) {
-	unsigned start = proc->rdq.id;
+	unsigned start = procid;
 	unsigned long long max = 0;
 	for(i; shard_factor) {
Index: libcfa/src/concurrency/ready_queue.cfa
===================================================================
--- libcfa/src/concurrency/ready_queue.cfa	(revision 78a580d0f8719cd1f71563bdcdef7a9930002650)
+++ libcfa/src/concurrency/ready_queue.cfa	(revision 4479890a8b920bd26343ba78b81c5b6c26ae3ccf)
@@ -139,5 +139,5 @@
 		/* paranoid */ verify( readyQ.tscs[target].tv != MAX );
 		if(target < lanes_count) {
-			const unsigned long long cutoff = calc_cutoff(ctsc, proc, lanes_count, cltr->sched.readyQ.data, cltr->sched.readyQ.tscs, __shard_factor.readyq);
+			const unsigned long long cutoff = calc_cutoff(ctsc, proc->rdq.id, lanes_count, cltr->sched.readyQ.data, cltr->sched.readyQ.tscs, __shard_factor.readyq);
 			const unsigned long long age = moving_average(ctsc, readyQ.tscs[target].tv, readyQ.tscs[target].ma);
 			__cfadbg_print_safe(ready_queue, "Kernel : Help attempt on %u from %u, age %'llu vs cutoff %'llu, %s\n", target, this, age, cutoff, age > cutoff ? "yes" : "no");