Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision 224916bc85e53eee248fe77dbc9865040f49982b)
+++ libcfa/src/concurrency/kernel.cfa	(revision 7cf3b1d8c069acc5052d1134b294cb0be126a478)
@@ -205,8 +205,4 @@
 				// Don't block if we are done
 				if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP;
-
-				#if !defined(__CFA_NO_STATISTICS__)
-					__tls_stats()->ready.sleep.halts++;
-				#endif
 
 				// Push self to idle stack
@@ -732,30 +728,46 @@
 // Wake a thread from the front if there are any
 static void __wake_one(cluster * this) {
+	eventfd_t val;
+
 	/* paranoid */ verify( ! __preemption_enabled() );
 	/* paranoid */ verify( ready_schedule_islocked() );
 
 	// Check if there is a sleeping processor
-	// int fd = __atomic_load_n(&this->procs.fd, __ATOMIC_SEQ_CST);
-	int fd = 0;
-	if( __atomic_load_n(&this->procs.fd, __ATOMIC_SEQ_CST) != 0 ) {
-		fd = __atomic_exchange_n(&this->procs.fd, 0, __ATOMIC_RELAXED);
-	}
-
-	// If no one is sleeping, we are done
-	if( fd == 0 ) return;
-
-	// We found a processor, wake it up
-	eventfd_t val;
-	val = 1;
-	eventfd_write( fd, val );
-
-	#if !defined(__CFA_NO_STATISTICS__)
-		if( kernelTLS().this_stats ) {
-			__tls_stats()->ready.sleep.wakes++;
-		}
-		else {
-			__atomic_fetch_add(&this->stats->ready.sleep.wakes, 1, __ATOMIC_RELAXED);
-		}
-	#endif
+	struct __fd_waitctx * fdp = __atomic_load_n(&this->procs.fdw, __ATOMIC_SEQ_CST);
+
+	// If no one is sleeping: we are done
+	if( fdp == 0p ) return;
+
+	int fd = 1;
+	if( __atomic_load_n(&fdp->fd, __ATOMIC_SEQ_CST) != 1 ) {
+		fd = __atomic_exchange_n(&fdp->fd, 1, __ATOMIC_RELAXED);
+	}
+
+	switch(fd) {
+	case 0:
+		// If the processor isn't ready to sleep then the exchange will already wake it up
+		#if !defined(__CFA_NO_STATISTICS__)
+			if( kernelTLS().this_stats ) { __tls_stats()->ready.sleep.early++;
+			} else { __atomic_fetch_add(&this->stats->ready.sleep.early, 1, __ATOMIC_RELAXED); }
+		#endif
+		break;
+	case 1:
+		// If someone else already said they will wake them: we are done
+		#if !defined(__CFA_NO_STATISTICS__)
+			if( kernelTLS().this_stats ) { __tls_stats()->ready.sleep.seen++;
+			} else { __atomic_fetch_add(&this->stats->ready.sleep.seen, 1, __ATOMIC_RELAXED); }
+		#endif
+		break;
+	default:
+		// If the processor was ready to sleep, we need to wake it up with an actual write
+		val = 1;
+		eventfd_write( fd, val );
+
+		#if !defined(__CFA_NO_STATISTICS__)
+			if( kernelTLS().this_stats ) { __tls_stats()->ready.sleep.wakes++;
+			} else { __atomic_fetch_add(&this->stats->ready.sleep.wakes, 1, __ATOMIC_RELAXED); }
+		#endif
+		break;
+	}
 
 	/* paranoid */ verify( ready_schedule_islocked() );
@@ -770,4 +782,6 @@
 
 	__cfadbg_print_safe(runtime_core, "Kernel : waking Processor %p\n", this);
+
+	this->idle_wctx.fd = 1;
 
 	eventfd_t val;
@@ -779,4 +793,19 @@
 
 static void idle_sleep(processor * this, io_future_t & future, iovec & iov) {
+	// Tell everyone we are ready to go do sleep
+	for() {
+		int expected = this->idle_wctx.fd;
+
+		// Someone already told us to wake-up! No time for a nap.
+		if(expected == 1) { return; }
+
+		// Try to mark that we are going to sleep
+		if(__atomic_compare_exchange_n(&this->idle_wctx.fd, &expected, this->idle_fd, false,  __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) ) {
+			// Every one agreed, taking a nap
+			break;
+		}
+	}
+
+
 	#if !defined(CFA_WITH_IO_URING_IDLE)
 		#if !defined(__CFA_NO_STATISTICS__)
@@ -825,4 +854,10 @@
 
 static bool mark_idle(__cluster_proc_list & this, processor & proc) {
+	#if !defined(__CFA_NO_STATISTICS__)
+		__tls_stats()->ready.sleep.halts++;
+	#endif
+
+	proc.idle_wctx.fd = 0;
+
 	/* paranoid */ verify( ! __preemption_enabled() );
 	if(!try_lock( this )) return false;
@@ -832,5 +867,5 @@
 		insert_first(this.idles, proc);
 
-		__atomic_store_n(&this.fd, proc.idle_fd, __ATOMIC_SEQ_CST);
+		__atomic_store_n(&this.fdw, &proc.idle_wctx, __ATOMIC_SEQ_CST);
 	unlock( this );
 	/* paranoid */ verify( ! __preemption_enabled() );
@@ -848,7 +883,7 @@
 
 		{
-			int fd = 0;
-			if(!this.idles`isEmpty) fd = this.idles`first.idle_fd;
-			__atomic_store_n(&this.fd, fd, __ATOMIC_SEQ_CST);
+			struct __fd_waitctx * wctx = 0;
+			if(!this.idles`isEmpty) wctx = &this.idles`first.idle_wctx;
+			__atomic_store_n(&this.fdw, wctx, __ATOMIC_SEQ_CST);
 		}
 
Index: libcfa/src/concurrency/kernel.hfa
===================================================================
--- libcfa/src/concurrency/kernel.hfa	(revision 224916bc85e53eee248fe77dbc9865040f49982b)
+++ libcfa/src/concurrency/kernel.hfa	(revision 7cf3b1d8c069acc5052d1134b294cb0be126a478)
@@ -53,4 +53,9 @@
 coroutine processorCtx_t {
 	struct processor * proc;
+};
+
+
+struct __fd_waitctx {
+	volatile int fd;
 };
 
@@ -101,4 +106,7 @@
 	int idle_fd;
 
+	// Idle waitctx
+	struct __fd_waitctx idle_wctx;
+
 	// Termination synchronisation (user semaphore)
 	oneshot terminated;
@@ -207,5 +215,5 @@
 
 	// FD to use to wake a processor
-	volatile int fd;
+	struct __fd_waitctx * volatile fdw;
 
 	// Total number of processors
Index: libcfa/src/concurrency/kernel/startup.cfa
===================================================================
--- libcfa/src/concurrency/kernel/startup.cfa	(revision 224916bc85e53eee248fe77dbc9865040f49982b)
+++ libcfa/src/concurrency/kernel/startup.cfa	(revision 7cf3b1d8c069acc5052d1134b294cb0be126a478)
@@ -537,4 +537,11 @@
 	}
 
+	this.idle_wctx.fd = 0;
+
+	// I'm assuming these two are reserved for standard input and output
+	// so I'm using them as sentinels with idle_wctx.
+	/* paranoid */ verify( this.idle_fd != 0 );
+	/* paranoid */ verify( this.idle_fd != 1 );
+
 	#if !defined(__CFA_NO_STATISTICS__)
 		print_stats = 0;
@@ -590,5 +597,5 @@
 // Cluster
 static void ?{}(__cluster_proc_list & this) {
-	this.fd    = 0;
+	this.fdw   = 0p;
 	this.idle  = 0;
 	this.total = 0;
Index: libcfa/src/concurrency/stats.cfa
===================================================================
--- libcfa/src/concurrency/stats.cfa	(revision 224916bc85e53eee248fe77dbc9865040f49982b)
+++ libcfa/src/concurrency/stats.cfa	(revision 7cf3b1d8c069acc5052d1134b294cb0be126a478)
@@ -31,5 +31,7 @@
 		stats->ready.sleep.halts   = 0;
 		stats->ready.sleep.cancels = 0;
+		stats->ready.sleep.early   = 0;
 		stats->ready.sleep.wakes   = 0;
+		stats->ready.sleep.seen    = 0;
 		stats->ready.sleep.exits   = 0;
 
@@ -91,5 +93,7 @@
 		tally_one( &cltr->ready.sleep.halts       , &proc->ready.sleep.halts        );
 		tally_one( &cltr->ready.sleep.cancels     , &proc->ready.sleep.cancels      );
+		tally_one( &cltr->ready.sleep.early       , &proc->ready.sleep.early        );
 		tally_one( &cltr->ready.sleep.wakes       , &proc->ready.sleep.wakes        );
+		tally_one( &cltr->ready.sleep.seen        , &proc->ready.sleep.wakes        );
 		tally_one( &cltr->ready.sleep.exits       , &proc->ready.sleep.exits        );
 
@@ -153,5 +157,7 @@
 			     | " (" | eng3(ready.pop.search.attempt) | " try)";
 
-			sstr | "- Idle Slp : " | eng3(ready.sleep.halts) | "halt," | eng3(ready.sleep.cancels) | "cancel," | eng3(ready.sleep.wakes) | "wake," | eng3(ready.sleep.exits) | "exit";
+			sstr | "- Idle Slp : " | eng3(ready.sleep.halts) | "halt," | eng3(ready.sleep.cancels) | "cancel,"
+			     | eng3(ready.sleep.wakes + ready.sleep.early) | '(' | eng3(ready.sleep.early) | ',' | eng3(ready.sleep.seen) | ')' | " wake(early, seen),"
+			     | eng3(ready.sleep.exits) | "exit";
 			sstr | nl;
 		}
Index: libcfa/src/concurrency/stats.hfa
===================================================================
--- libcfa/src/concurrency/stats.hfa	(revision 224916bc85e53eee248fe77dbc9865040f49982b)
+++ libcfa/src/concurrency/stats.hfa	(revision 7cf3b1d8c069acc5052d1134b294cb0be126a478)
@@ -69,5 +69,7 @@
 			volatile uint64_t halts;
 			volatile uint64_t cancels;
+			volatile uint64_t early;
 			volatile uint64_t wakes;
+			volatile uint64_t seen;
 			volatile uint64_t exits;
 		} sleep;
