Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision 040334e1377f9d717d5f1b39e87eaec92fdeaa88)
+++ libcfa/src/concurrency/kernel.cfa	(revision 0fba0d413eab5b9cc9de04051c9757efa5372792)
@@ -42,7 +42,7 @@
 
 #if !defined(__CFA_NO_STATISTICS__)
-	#define __STATS( ...) __VA_ARGS__
+	#define __STATS_DEF( ...) __VA_ARGS__
 #else
-	#define __STATS( ...)
+	#define __STATS_DEF( ...)
 #endif
 
@@ -122,4 +122,5 @@
 static thread$ * __next_thread(cluster * this);
 static thread$ * __next_thread_slow(cluster * this);
+static thread$ * __next_thread_search(cluster * this);
 static inline bool __must_unpark( thread$ * thrd ) __attribute((nonnull(1)));
 static void __run_thread(processor * this, thread$ * dst);
@@ -194,8 +195,18 @@
 
 			if( !readyThread ) {
-				__tls_stats()->io.flush.idle++;
+				__IO_STATS__(true, io.flush.idle++; )
 				__cfa_io_flush( this, 0 );
 
+				readyThread = __next_thread( this->cltr );
+			}
+
+			if( !readyThread ) for(5) {
+				__IO_STATS__(true, io.flush.idle++; )
+
 				readyThread = __next_thread_slow( this->cltr );
+
+				if( readyThread ) break;
+
+				__cfa_io_flush( this, 0 );
 			}
 
@@ -209,12 +220,10 @@
 
 				// Confirm the ready-queue is empty
-				readyThread = __next_thread_slow( this->cltr );
+				readyThread = __next_thread_search( this->cltr );
 				if( readyThread ) {
 					// A thread was found, cancel the halt
 					mark_awake(this->cltr->procs, * this);
 
-					#if !defined(__CFA_NO_STATISTICS__)
-						__tls_stats()->ready.sleep.cancels++;
-					#endif
+					__STATS__(true, ready.sleep.cancels++; )
 
 					// continue the mai loop
@@ -243,5 +252,5 @@
 
 			if(this->io.pending && !this->io.dirty) {
-				__tls_stats()->io.flush.dirty++;
+				__IO_STATS__(true, io.flush.dirty++; )
 				__cfa_io_flush( this, 0 );
 			}
@@ -356,7 +365,5 @@
 				break RUNNING;
 			case TICKET_UNBLOCK:
-				#if !defined(__CFA_NO_STATISTICS__)
-					__tls_stats()->ready.threads.threads++;
-				#endif
+				__STATS__(true, ready.threads.threads++; )
 				// This is case 2, the racy case, someone tried to run this thread before it finished blocking
 				// In this case, just run it again.
@@ -373,7 +380,5 @@
 	__cfadbg_print_safe(runtime_core, "Kernel : core %p finished running thread %p\n", this, thrd_dst);
 
-	#if !defined(__CFA_NO_STATISTICS__)
-		__tls_stats()->ready.threads.threads--;
-	#endif
+	__STATS__(true, ready.threads.threads--; )
 
 	/* paranoid */ verify( ! __preemption_enabled() );
@@ -386,5 +391,5 @@
 	thread$ * thrd_src = kernelTLS().this_thread;
 
-	__STATS( thrd_src->last_proc = kernelTLS().this_processor; )
+	__STATS_DEF( thrd_src->last_proc = kernelTLS().this_processor; )
 
 	// Run the thread on this processor
@@ -438,5 +443,5 @@
 	// Dereference the thread now because once we push it, there is not guaranteed it's still valid.
 	struct cluster * cl = thrd->curr_cluster;
-	__STATS(bool outside = hint == UNPARK_LOCAL && thrd->last_proc && thrd->last_proc != kernelTLS().this_processor; )
+	__STATS_DEF(bool outside = hint == UNPARK_LOCAL && thrd->last_proc && thrd->last_proc != kernelTLS().this_processor; )
 
 	// push the thread to the cluster ready-queue
@@ -489,12 +494,17 @@
 
 	ready_schedule_lock();
-		thread$ * thrd;
-		for(25) {
-			thrd = pop_slow( this );
-			if(thrd) goto RET;
-		}
-		thrd = pop_search( this );
-
-		RET:
+		thread$ * thrd = pop_slow( this );
+	ready_schedule_unlock();
+
+	/* paranoid */ verify( ! __preemption_enabled() );
+	return thrd;
+}
+
+// KERNEL ONLY
+static inline thread$ * __next_thread_search(cluster * this) with( *this ) {
+	/* paranoid */ verify( ! __preemption_enabled() );
+
+	ready_schedule_lock();
+		thread$ * thrd = pop_search( this );
 	ready_schedule_unlock();
 
@@ -738,7 +748,5 @@
 
 static bool mark_idle(__cluster_proc_list & this, processor & proc) {
-	#if !defined(__CFA_NO_STATISTICS__)
-		__tls_stats()->ready.sleep.halts++;
-	#endif
+	__STATS__(true, ready.sleep.halts++; )
 
 	proc.idle_wctx.fd = 0;
Index: libcfa/src/concurrency/kernel/fwd.hfa
===================================================================
--- libcfa/src/concurrency/kernel/fwd.hfa	(revision 040334e1377f9d717d5f1b39e87eaec92fdeaa88)
+++ libcfa/src/concurrency/kernel/fwd.hfa	(revision 0fba0d413eab5b9cc9de04051c9757efa5372792)
@@ -396,6 +396,18 @@
 				if( !(in_kernel) ) enable_interrupts(); \
 			}
+			#if defined(CFA_HAVE_LINUX_IO_URING_H)
+				#define __IO_STATS__(in_kernel, ...) { \
+					if( !(in_kernel) ) disable_interrupts(); \
+					with( *__tls_stats() ) { \
+						__VA_ARGS__ \
+					} \
+					if( !(in_kernel) ) enable_interrupts(); \
+				}
+			#else
+				#define __IO_STATS__(in_kernel, ...)
+			#endif
 		#else
 			#define __STATS__(in_kernel, ...)
+			#define __IO_STATS__(in_kernel, ...)
 		#endif
 	}
