Index: libcfa/src/bits/containers.hfa
===================================================================
--- libcfa/src/bits/containers.hfa	(revision f0ce5f46bf7a712637429882bcf81e19c2741855)
+++ libcfa/src/bits/containers.hfa	(revision bb2e05e9c3217d0e578d8a1fc236a3d3915670b9)
@@ -274,4 +274,9 @@
 			return this.head != 0;
 		}
+
+		void move_to_front( __dllist(T) & src, __dllist(T) & dst, T & node ) {
+			remove    (src, node);
+			push_front(dst, node);
+		}
 	}
 	#undef next
Index: libcfa/src/bits/locks.hfa
===================================================================
--- libcfa/src/bits/locks.hfa	(revision f0ce5f46bf7a712637429882bcf81e19c2741855)
+++ libcfa/src/bits/locks.hfa	(revision bb2e05e9c3217d0e578d8a1fc236a3d3915670b9)
@@ -139,7 +139,5 @@
 	}
 
-	static inline void post(__bin_sem_t & this) with( this ) {
-		verify(__cfaabi_dbg_in_kernel());
-
+	static inline bool post(__bin_sem_t & this) with( this ) {
 		pthread_mutex_lock(&lock);
 			bool needs_signal = !signaled;
@@ -147,6 +145,7 @@
 		pthread_mutex_unlock(&lock);
 
-		if (needs_signal)
-			pthread_cond_signal(&cond);
+		if (needs_signal) pthread_cond_signal(&cond);
+
+		return needs_signal;
 	}
 #endif
Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision f0ce5f46bf7a712637429882bcf81e19c2741855)
+++ libcfa/src/concurrency/kernel.cfa	(revision bb2e05e9c3217d0e578d8a1fc236a3d3915670b9)
@@ -114,4 +114,12 @@
 
 //-----------------------------------------------------------------------------
+// Kernel Scheduling logic
+static $thread * __next_thread(cluster * this);
+static void __run_thread(processor * this, $thread * dst);
+static $thread * __halt(processor * this);
+static bool __wake_one(cluster * cltr, bool was_empty);
+static bool __wake_proc(processor *);
+
+//-----------------------------------------------------------------------------
 // Kernel storage
 KERNEL_STORAGE(cluster,		mainCluster);
@@ -220,5 +228,5 @@
 	runner.proc = &this;
 
-	idleLock{};
+	idle{};
 
 	__cfaabi_dbg_print_safe("Kernel : Starting core %p\n", &this);
@@ -234,5 +242,5 @@
 
 		__atomic_store_n(&do_terminate, true, __ATOMIC_RELAXED);
-		wake( &this );
+		__wake_proc( &this );
 
 		P( terminated );
@@ -264,8 +272,4 @@
 // Kernel Scheduling logic
 //=============================================================================================
-static $thread * __next_thread(cluster * this);
-static void __run_thread(processor * this, $thread * dst);
-static void __halt(processor * this);
-
 //Main of the processor contexts
 void main(processorCtx_t & runner) {
@@ -289,19 +293,23 @@
 		$thread * readyThread = 0p;
 		for( unsigned int spin_count = 0; ! __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST); spin_count++ ) {
+			// Try to get the next thread
 			readyThread = __next_thread( this->cltr );
 
-			if(readyThread) {
+			// If no ready thread
+			if( readyThread == 0p ) {
+				// Block until a thread is ready
+				readyThread = __halt(this);
+			}
+
+			// Check if we actually found a thread
+			if( readyThread ) {
 				/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-				/* paranoid */ verifyf( readyThread->state == Blocked || readyThread->state == Start || readyThread->preempted != __NO_PREEMPTION, "state : %d, preempted %d\n", readyThread->state, readyThread->preempted);
+				/* paranoid */ verifyf( readyThread->state == Ready || readyThread->preempted != __NO_PREEMPTION, "state : %d, preempted %d\n", readyThread->state, readyThread->preempted);
 				/* paranoid */ verifyf( readyThread->next == 0p, "Expected null got %p", readyThread->next );
 
+				// We found a thread run it
 				__run_thread(this, readyThread);
 
 				/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-
-				spin_count = 0;
-			} else {
-				// spin(this, &spin_count);
-				__halt(this);
 			}
 		}
@@ -312,8 +320,12 @@
 	unregister(this->cltr, this);
 
-	bool signalled = V( this->terminated );
-	if(signalled)
+	V( this->terminated );
 
 	__cfaabi_dbg_print_safe("Kernel : core %p terminated\n", this);
+
+	// HACK : the coroutine context switch expects this_thread to be set
+	// and it make sense for it to be set in all other cases except here
+	// fake it
+	if( this == mainProcessor ) kernelTLS.this_thread = mainThread;
 }
 
@@ -338,7 +350,7 @@
 		if(unlikely(thrd_dst->preempted)) {
 			thrd_dst->preempted = __NO_PREEMPTION;
-			verify(thrd_dst->state == Active || thrd_dst->state == Rerun);
+			verify(thrd_dst->state == Active  || thrd_dst->state == Rerun);
 		} else {
-			verify(thrd_dst->state == Start || thrd_dst->state == Primed || thrd_dst->state == Blocked);
+			verify(thrd_dst->state == Blocked || thrd_dst->state == Ready); // Ready means scheduled normally, blocked means rerun
 			thrd_dst->state = Active;
 		}
@@ -521,6 +533,6 @@
 	dst->starter = dst->starter ? dst->starter : &src->self_cor;
 
-	// set state of current coroutine to inactive
-	src->state = src->state == Halted ? Halted : Blocked;
+	// make sure the current state is still correct
+	/* paranoid */ verify(src->state == Ready);
 
 	// context switch to specified coroutine
@@ -531,6 +543,6 @@
 	mainThread->curr_cor = &mainThread->self_cor;
 
-	// set state of new coroutine to active
-	src->state = Active;
+	// make sure the current state has been update
+	/* paranoid */ verify(src->state == Active);
 
 	verify( ! kernelTLS.preemption_state.enabled );
@@ -570,14 +582,5 @@
 	unlock( ready_queue_lock );
 
-	if(was_empty) {
-		lock      (proc_list_lock __cfaabi_dbg_ctx2);
-		if(idles) {
-			wake_fast(idles.head);
-		}
-		unlock    (proc_list_lock);
-	}
-	else if( struct processor * idle = idles.head ) {
-		wake_fast(idle);
-	}
+	__wake_one(thrd->curr_cluster, was_empty);
 
 	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
@@ -768,7 +771,7 @@
 	__cfaabi_dbg_print_safe("\n--------------------------------------------------\nKernel : Shutting down\n");
 
-	verify( TL_GET( preemption_state.enabled ) );
+	/* paranoid */ verify( TL_GET( preemption_state.enabled ) );
 	disable_interrupts();
-	verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
 
 	// SKULLDUGGERY: Notify the mainProcessor it needs to terminates.
@@ -800,28 +803,68 @@
 
 //=============================================================================================
-// Kernel Quiescing
+// Kernel Idle Sleep
 //=============================================================================================
-static void __halt(processor * this) with( *this ) {
-	// verify( ! __atomic_load_n(&do_terminate, __ATOMIC_SEQ_CST) );
-
+static $thread * __halt(processor * this) with( *this ) {
+	if( do_terminate ) return 0p;
+
+	// First, lock the cluster idle
+	lock( cltr->idle_lock __cfaabi_dbg_ctx2 );
+
+	// Check if we can find a thread
+	if( $thread * found = __next_thread( cltr ) ) {
+		unlock( cltr->idle_lock );
+		return found;
+	}
+
+	// Move this processor from the active list to the idle list
+	move_to_front(cltr->procs, cltr->idles, *this);
+
+	// Unlock the idle lock so we don't go to sleep with a lock
+	unlock    (cltr->idle_lock);
+
+	// We are ready to sleep
+	__cfaabi_dbg_print_safe("Kernel : Processor %p ready to sleep\n", this);
+	wait( idle );
+
+	// We have woken up
+	__cfaabi_dbg_print_safe("Kernel : Processor %p woke up and ready to run\n", this);
+
+	// Get ourself off the idle list
 	with( *cltr ) {
-		lock      (proc_list_lock __cfaabi_dbg_ctx2);
-		remove    (procs, *this);
-		push_front(idles, *this);
-		unlock    (proc_list_lock);
-	}
-
-	__cfaabi_dbg_print_safe("Kernel : Processor %p ready to sleep\n", this);
-
-	wait( idleLock );
-
-	__cfaabi_dbg_print_safe("Kernel : Processor %p woke up and ready to run\n", this);
-
-	with( *cltr ) {
-		lock      (proc_list_lock __cfaabi_dbg_ctx2);
-		remove    (idles, *this);
-		push_front(procs, *this);
-		unlock    (proc_list_lock);
-	}
+		lock  (idle_lock __cfaabi_dbg_ctx2);
+		move_to_front(idles, procs, *this);
+		unlock(idle_lock);
+	}
+
+	// Don't check the ready queue again, we may not be in a position to run a thread
+	return 0p;
+}
+
+// Wake a thread from the front if there are any
+static bool __wake_one(cluster * this, __attribute__((unused)) bool force) {
+	// if we don't want to force check if we know it's false
+	if( !this->idles.head && !force ) return false;
+
+	// First, lock the cluster idle
+	lock( this->idle_lock __cfaabi_dbg_ctx2 );
+
+	// Check if there is someone to wake up
+	if( !this->idles.head ) {
+		// Nope unlock and return false
+		unlock( this->idle_lock );
+		return false;
+	}
+
+	// Wake them up
+	post( this->idles.head->idle );
+
+	// Unlock and return true
+	unlock( this->idle_lock );
+	return true;
+}
+
+// Unconditionnaly wake a thread
+static bool __wake_proc(processor * this) {
+	return post( this->idle );
 }
 
@@ -967,15 +1010,15 @@
 
 void doregister( cluster * cltr, processor * proc ) {
-	lock      (cltr->proc_list_lock __cfaabi_dbg_ctx2);
+	lock      (cltr->idle_lock __cfaabi_dbg_ctx2);
 	cltr->nprocessors += 1;
 	push_front(cltr->procs, *proc);
-	unlock    (cltr->proc_list_lock);
+	unlock    (cltr->idle_lock);
 }
 
 void unregister( cluster * cltr, processor * proc ) {
-	lock  (cltr->proc_list_lock __cfaabi_dbg_ctx2);
+	lock  (cltr->idle_lock __cfaabi_dbg_ctx2);
 	remove(cltr->procs, *proc );
 	cltr->nprocessors -= 1;
-	unlock(cltr->proc_list_lock);
+	unlock(cltr->idle_lock);
 }
 
Index: libcfa/src/concurrency/kernel.hfa
===================================================================
--- libcfa/src/concurrency/kernel.hfa	(revision f0ce5f46bf7a712637429882bcf81e19c2741855)
+++ libcfa/src/concurrency/kernel.hfa	(revision bb2e05e9c3217d0e578d8a1fc236a3d3915670b9)
@@ -76,6 +76,6 @@
 	bool pending_preemption;
 
-	// Idle lock
-	__bin_sem_t idleLock;
+	// Idle lock (kernel semaphore)
+	__bin_sem_t idle;
 
 	// Termination
@@ -83,5 +83,5 @@
 	volatile bool do_terminate;
 
-	// Termination synchronisation
+	// Termination synchronisation (user semaphore)
 	semaphore terminated;
 
@@ -126,5 +126,5 @@
 
 	// List of processors
-	__spinlock_t proc_list_lock;
+	__spinlock_t idle_lock;
 	__dllist_t(struct processor) procs;
 	__dllist_t(struct processor) idles;
Index: libcfa/src/concurrency/kernel_private.hfa
===================================================================
--- libcfa/src/concurrency/kernel_private.hfa	(revision f0ce5f46bf7a712637429882bcf81e19c2741855)
+++ libcfa/src/concurrency/kernel_private.hfa	(revision bb2e05e9c3217d0e578d8a1fc236a3d3915670b9)
@@ -42,14 +42,5 @@
 void * __create_pthread( pthread_t *, void * (*)(void *), void * );
 
-static inline void wake_fast(processor * this) {
-	__cfaabi_dbg_print_safe("Kernel : Waking up processor %p\n", this);
-	post( this->idleLock );
-}
 
-static inline void wake(processor * this) {
-	disable_interrupts();
-	wake_fast(this);
-	enable_interrupts( __cfaabi_dbg_ctx );
-}
 
 struct event_kernel_t {
