Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision 04b5cef29b60d0ea2bc49962adba72c424a3e55e)
+++ libcfa/src/concurrency/kernel.cfa	(revision 64a7146c54101e2d816b7c6fd793f072222b5599)
@@ -118,8 +118,9 @@
 // Kernel Scheduling logic
 static $thread * __next_thread(cluster * this);
+static bool __has_next_thread(cluster * this);
 static void __run_thread(processor * this, $thread * dst);
-static $thread * __halt(processor * this);
-static bool __wake_one(cluster * cltr);
 static bool __wake_proc(processor *);
+static bool __wake_one(struct __processor_id_t * id, cluster * cltr);
+static void __halt(processor * this);
 
 //-----------------------------------------------------------------------------
@@ -276,6 +277,4 @@
 	#endif
 
-	procs{ __get };
-	idles{ __get };
 	threads{ __get };
 
@@ -315,5 +314,12 @@
 	if(this != mainProcessor) {
 		this->id = doregister((__processor_id_t*)this);
-		ready_queue_grow( this->cltr );
+		// Lock the RWlock so no-one pushes/pops while we are changing the queue
+		uint_fast32_t last_size = ready_mutate_lock();
+
+			// Adjust the ready queue size
+			ready_queue_grow( this->cltr );
+
+		// Unlock the RWlock
+		ready_mutate_unlock( last_size );
 	}
 
@@ -330,10 +336,4 @@
 			// Try to get the next thread
 			readyThread = __next_thread( this->cltr );
-
-			// If no ready thread
-			if( readyThread == 0p ) {
-				// Block until a thread is ready
-				readyThread = __halt(this);
-			}
 
 			// Check if we actually found a thread
@@ -349,4 +349,8 @@
 				/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
 			}
+			else {
+				// Block until a thread is ready
+				__halt(this);
+			}
 		}
 
@@ -360,5 +364,27 @@
 	// unregister the processor unless it's the main thread which is handled in the boot sequence
 	if(this != mainProcessor) {
-		ready_queue_shrink( this->cltr );
+		// Lock the RWlock so no-one pushes/pops while we are changing the queue
+		uint_fast32_t last_size = ready_mutate_lock();
+
+			// Adjust the ready queue size
+			ready_queue_shrink( this->cltr );
+
+			// Make sure we aren't on the idle queue
+			unsafe_remove( this->cltr->idles, this );
+			Link(processor) * link = &this->cltr->idles.stack;
+			for() {
+				processor * next = link->top;
+				if( next == this ) {
+					link->top = getNext(this)->top;
+					break;
+				}
+				if( next == 0p ) break;
+				link = getNext(next);
+			}
+
+		// Unlock the RWlock
+		ready_mutate_unlock( last_size );
+
+		// Finally we don't need the read_lock any more
 		unregister((__processor_id_t*)this);
 	}
@@ -646,5 +672,5 @@
 		push( thrd->curr_cluster, thrd );
 
-		__wake_one(thrd->curr_cluster);
+		__wake_one(id, thrd->curr_cluster);
 	ready_schedule_unlock( id );
 
@@ -662,4 +688,16 @@
 	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
 	return head;
+}
+
+// KERNEL ONLY
+static bool __has_next_thread(cluster * this) with( *this ) {
+	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+
+	ready_schedule_lock  ( (__processor_id_t*)kernelTLS.this_processor );
+		bool not_empty = query( this );
+	ready_schedule_unlock( (__processor_id_t*)kernelTLS.this_processor );
+
+	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	return not_empty;
 }
 
@@ -900,76 +938,48 @@
 // Kernel Idle Sleep
 //=============================================================================================
-static $thread * __halt(processor * this) with( *this ) {
-	// if( do_terminate ) return 0p;
-
-	// // First, lock the cluster idle
-	// lock( cltr->idle_lock __cfaabi_dbg_ctx2 );
-
-	// // Check if we can find a thread
-	// if( $thread * found = __next_thread( cltr ) ) {
-	// 	unlock( cltr->idle_lock );
-	// 	return found;
-	// }
-
-	// // Move this processor from the active list to the idle list
-	// move_to_front(cltr->procs, cltr->idles, *this);
-
-	// // Unlock the idle lock so we don't go to sleep with a lock
-	// unlock    (cltr->idle_lock);
-
-	// // We are ready to sleep
-	// __cfadbg_print_safe(runtime_core, "Kernel : Processor %p ready to sleep\n", this);
-	// wait( idle );
-
-	// // We have woken up
-	// __cfadbg_print_safe(runtime_core, "Kernel : Processor %p woke up and ready to run\n", this);
-
-	// // Get ourself off the idle list
-	// with( *cltr ) {
-	// 	lock  (idle_lock __cfaabi_dbg_ctx2);
-	// 	move_to_front(idles, procs, *this);
-	// 	unlock(idle_lock);
-	// }
-
-	// Don't check the ready queue again, we may not be in a position to run a thread
-	return 0p;
-}
-
 // Wake a thread from the front if there are any
-static bool __wake_one(cluster * this) {
-	// // First, lock the cluster idle
-	// lock( this->idle_lock __cfaabi_dbg_ctx2 );
-
-	// // Check if there is someone to wake up
-	// if( !this->idles.head ) {
-	// 	// Nope unlock and return false
-	// 	unlock( this->idle_lock );
-	// 	return false;
-	// }
-
-	// // Wake them up
-	// __cfadbg_print_safe(runtime_core, "Kernel : waking Processor %p\n", this->idles.head);
-	// /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-	// post( this->idles.head->idle );
-
-	// // Unlock and return true
-	// unlock( this->idle_lock );
-	// return true;
-
-	return false;
+static bool __wake_one(struct __processor_id_t * id, cluster * this) {
+	/* paranoid */ verify( ready_schedule_islocked( id ) );
+
+	// Check if there is a sleeping processor
+	processor * p = pop(this->idles);
+
+	// If no one is sleeping, we are done
+	if( 0p == p ) return false;
+
+	// We found a processor, wake it up
+	post( p->idle );
+
+	return true;
 }
 
 // Unconditionnaly wake a thread
 static bool __wake_proc(processor * this) {
-	// __cfadbg_print_safe(runtime_core, "Kernel : waking Processor %p\n", this);
-
-	// disable_interrupts();
-	// 	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-	// 	bool ret = post( this->idle );
-	// enable_interrupts( __cfaabi_dbg_ctx );
-
-	// return ret;
-
-	return false;
+	__cfadbg_print_safe(runtime_core, "Kernel : waking Processor %p\n", this);
+
+	disable_interrupts();
+		/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+		bool ret = post( this->idle );
+	enable_interrupts( __cfaabi_dbg_ctx );
+
+	return ret;
+}
+
+static void __halt(processor * this) with( *this ) {
+	if( do_terminate ) return;
+
+	// Push self to queue
+	push(cltr->idles, *this);
+
+	// Makre sure we don't miss a thread
+	if( __has_next_thread(cltr) ) {
+		// A thread was posted, make sure a processor is woken up
+		struct __processor_id_t *id = (struct __processor_id_t *) this;
+		ready_schedule_lock  ( id );
+			__wake_one( id, cltr );
+		ready_schedule_unlock( id );
+	}
+
+	wait( idle );
 }
 
@@ -1131,15 +1141,15 @@
 
 void doregister( cluster * cltr, processor * proc ) {
-	lock      (cltr->idle_lock __cfaabi_dbg_ctx2);
-	cltr->nprocessors += 1;
-	push_front(cltr->procs, *proc);
-	unlock    (cltr->idle_lock);
+	// lock      (cltr->idle_lock __cfaabi_dbg_ctx2);
+	// cltr->nprocessors += 1;
+	// push_front(cltr->procs, *proc);
+	// unlock    (cltr->idle_lock);
 }
 
 void unregister( cluster * cltr, processor * proc ) {
-	lock  (cltr->idle_lock __cfaabi_dbg_ctx2);
-	remove(cltr->procs, *proc );
-	cltr->nprocessors -= 1;
-	unlock(cltr->idle_lock);
+	// lock  (cltr->idle_lock __cfaabi_dbg_ctx2);
+	// remove(cltr->procs, *proc );
+	// cltr->nprocessors -= 1;
+	// unlock(cltr->idle_lock);
 }
 
Index: libcfa/src/concurrency/kernel.hfa
===================================================================
--- libcfa/src/concurrency/kernel.hfa	(revision 04b5cef29b60d0ea2bc49962adba72c424a3e55e)
+++ libcfa/src/concurrency/kernel.hfa	(revision 64a7146c54101e2d816b7c6fd793f072222b5599)
@@ -23,4 +23,6 @@
 #include "coroutine.hfa"
 
+#include "containers/stackLockFree.hfa"
+
 extern "C" {
 #include <pthread.h>
@@ -101,8 +103,5 @@
 
 	// Link lists fields
-	struct __dbg_node_cltr {
-		processor * next;
-		processor * prev;
-	} node;
+	Link(processor) link;
 
 #ifdef __CFA_DEBUG__
@@ -119,5 +118,5 @@
 static inline void  ?{}(processor & this, const char name[]) { this{name, *mainCluster }; }
 
-static inline [processor *&, processor *& ] __get( processor & this ) __attribute__((const)) { return this.node.[next, prev]; }
+static inline Link(processor) * getNext( processor * this ) { return &this->link; }
 
 //-----------------------------------------------------------------------------
@@ -185,8 +184,6 @@
 	Duration preemption_rate;
 
-	// List of processors
-	__spinlock_t idle_lock;
-	__dllist_t(struct processor) procs;
-	__dllist_t(struct processor) idles;
+	// List of idle processors
+	StackLF(processor) idles;
 	unsigned int nprocessors;
 
Index: libcfa/src/concurrency/kernel_private.hfa
===================================================================
--- libcfa/src/concurrency/kernel_private.hfa	(revision 04b5cef29b60d0ea2bc49962adba72c424a3e55e)
+++ libcfa/src/concurrency/kernel_private.hfa	(revision 64a7146c54101e2d816b7c6fd793f072222b5599)
@@ -112,7 +112,18 @@
 // while not generic it only relies on a opaque pointer
 struct __attribute__((aligned(128))) __scheduler_lock_id_t {
+	// Spin lock used as the underlying lock
+	volatile bool lock;
+
+	// Handle pointing to the proc owning this cell
+	// Used for allocating cells and debugging
 	__processor_id_t * volatile handle;
-	volatile bool lock;
-};
+
+	#ifdef __CFA_WITH_VERIFY__
+		// Debug, check if this is owned for reading
+		bool owned;
+	#endif
+};
+
+static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t));
 
 // Lock-Free registering/unregistering of threads
@@ -198,4 +209,9 @@
 	__atomic_acquire( &data[iproc].lock );
 	/*paranoid*/ verify(data[iproc].lock);
+
+	#ifdef __CFA_WITH_VERIFY__
+		// Debug, check if this is owned for reading
+		data[iproc].owned = true;
+	#endif
 }
 
@@ -205,6 +221,21 @@
 	/*paranoid*/ verify(iproc < ready);
 	/*paranoid*/ verify(data[iproc].lock);
+	/*paranoid*/ verify(data[iproc].owned);
+	#ifdef __CFA_WITH_VERIFY__
+		// Debug, check if this is owned for reading
+		data[iproc].owned = false;
+	#endif
 	__atomic_unlock(&data[iproc].lock);
 }
+
+#ifdef __CFA_WITH_VERIFY__
+	static inline bool ready_schedule_islocked( struct __processor_id_t * proc) {
+		return __scheduler_lock->data[proc->id].owned;
+	}
+
+	static inline bool ready_mutate_islocked() {
+		return __scheduler_lock->lock;
+	}
+#endif
 
 //-----------------------------------------------------------------------
@@ -217,4 +248,9 @@
 //=======================================================================
 // Ready-Queue API
+//-----------------------------------------------------------------------
+// pop thread from the ready queue of a cluster
+// returns 0p if empty
+__attribute__((hot)) bool query(struct cluster * cltr);
+
 //-----------------------------------------------------------------------
 // push thread onto a ready queue for a cluster
Index: libcfa/src/concurrency/ready_queue.cfa
===================================================================
--- libcfa/src/concurrency/ready_queue.cfa	(revision 04b5cef29b60d0ea2bc49962adba72c424a3e55e)
+++ libcfa/src/concurrency/ready_queue.cfa	(revision 64a7146c54101e2d816b7c6fd793f072222b5599)
@@ -81,4 +81,7 @@
 	this.handle = proc;
 	this.lock   = false;
+	#ifdef __CFA_WITH_VERIFY__
+		this.owned  = false;
+	#endif
 }
 
@@ -97,5 +100,5 @@
 			&& __atomic_compare_exchange_n( &data[i].handle, &null, proc, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
 			/*paranoid*/ verify(i < ready);
-			/*paranoid*/ verify(__alignof__(data[i]) == cache_line_size);
+			/*paranoid*/ verify(0 == (__alignof__(data[i]) % cache_line_size));
 			/*paranoid*/ verify((((uintptr_t)&data[i]) % cache_line_size) == 0);
 			return i;
@@ -562,4 +565,9 @@
 
 //-----------------------------------------------------------------------
+__attribute__((hot)) bool query(struct cluster * cltr) {
+	return query(cltr->ready_queue.snzi);
+}
+
+//-----------------------------------------------------------------------
 __attribute__((hot)) bool push(struct cluster * cltr, struct $thread * thrd) with (cltr->ready_queue) {
 	__cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr);
@@ -761,7 +769,5 @@
 // Grow the ready queue
 void ready_queue_grow  (struct cluster * cltr) {
-	// Lock the RWlock so no-one pushes/pops while we are changing the queue
-	uint_fast32_t last_size = ready_mutate_lock();
-
+	/* paranoid */ verify( ready_mutate_islocked() );
 	__cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue\n");
 
@@ -808,13 +814,10 @@
 	__cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue done\n");
 
-	// Unlock the RWlock
-	ready_mutate_unlock( last_size );
+	/* paranoid */ verify( ready_mutate_islocked() );
 }
 
 // Shrink the ready queue
 void ready_queue_shrink(struct cluster * cltr) {
-	// Lock the RWlock so no-one pushes/pops while we are changing the queue
-	uint_fast32_t last_size = ready_mutate_lock();
-
+	/* paranoid */ verify( ready_mutate_islocked() );
 	__cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue\n");
 
@@ -889,6 +892,4 @@
 
 	__cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue done\n");
-
-	// Unlock the RWlock
-	ready_mutate_unlock( last_size );
-}
+	/* paranoid */ verify( ready_mutate_islocked() );
+}
