Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision 2f5ea69b6db7b5cd41a83b98373f3b70e83aeddb)
+++ libcfa/src/concurrency/kernel.cfa	(revision 780a61412318cddfec5ff2b3146c9767ee2388b4)
@@ -113,7 +113,7 @@
 static void __wake_one(cluster * cltr);
 
-static void push  (__cluster_idles & idles, processor & proc);
-static void remove(__cluster_idles & idles, processor & proc);
-static [unsigned idle, unsigned total, * processor] query( & __cluster_idles idles );
+static void mark_idle (__cluster_proc_list & idles, processor & proc);
+static void mark_awake(__cluster_proc_list & idles, processor & proc);
+static [unsigned idle, unsigned total, * processor] query_idles( & __cluster_proc_list idles );
 
 extern void __cfa_io_start( processor * );
@@ -189,5 +189,5 @@
 
 				// Push self to idle stack
-				push(this->cltr->idles, * this);
+				mark_idle(this->cltr->procs, * this);
 
 				// Confirm the ready-queue is empty
@@ -195,5 +195,5 @@
 				if( readyThread ) {
 					// A thread was found, cancel the halt
-					remove(this->cltr->idles, * this);
+					mark_awake(this->cltr->procs, * this);
 
 					#if !defined(__CFA_NO_STATISTICS__)
@@ -225,5 +225,5 @@
 
 				// We were woken up, remove self from idle
-				remove(this->cltr->idles, * this);
+				mark_awake(this->cltr->procs, * this);
 
 				// DON'T just proceed, start looking again
@@ -617,5 +617,5 @@
 	unsigned idle;
 	unsigned total;
-	[idle, total, p] = query(this->idles);
+	[idle, total, p] = query_idles(this->procs);
 
 	// If no one is sleeping, we are done
@@ -654,27 +654,30 @@
 }
 
-static void push  (__cluster_idles & this, processor & proc) {
+static void mark_idle(__cluster_proc_list & this, processor & proc) {
 	/* paranoid */ verify( ! __preemption_enabled() );
 	lock( this );
 		this.idle++;
 		/* paranoid */ verify( this.idle <= this.total );
-
-		insert_first(this.list, proc);
+		remove(proc);
+		insert_first(this.idles, proc);
 	unlock( this );
 	/* paranoid */ verify( ! __preemption_enabled() );
 }
 
-static void remove(__cluster_idles & this, processor & proc) {
+static void mark_awake(__cluster_proc_list & this, processor & proc) {
 	/* paranoid */ verify( ! __preemption_enabled() );
 	lock( this );
 		this.idle--;
 		/* paranoid */ verify( this.idle >= 0 );
-
 		remove(proc);
+		insert_last(this.actives, proc);
 	unlock( this );
 	/* paranoid */ verify( ! __preemption_enabled() );
 }
 
-static [unsigned idle, unsigned total, * processor] query( & __cluster_idles this ) {
+static [unsigned idle, unsigned total, * processor] query_idles( & __cluster_proc_list this ) {
+	/* paranoid */ verify( ! __preemption_enabled() );
+	/* paranoid */ verify( ready_schedule_islocked() );
+
 	for() {
 		uint64_t l = __atomic_load_n(&this.lock, __ATOMIC_SEQ_CST);
@@ -682,5 +685,5 @@
 		unsigned idle    = this.idle;
 		unsigned total   = this.total;
-		processor * proc = &this.list`first;
+		processor * proc = &this.idles`first;
 		// Compiler fence is unnecessary, but gcc-8 and older incorrectly reorder code without it
 		asm volatile("": : :"memory");
@@ -688,4 +691,7 @@
 		return [idle, total, proc];
 	}
+
+	/* paranoid */ verify( ready_schedule_islocked() );
+	/* paranoid */ verify( ! __preemption_enabled() );
 }
 
Index: libcfa/src/concurrency/kernel.hfa
===================================================================
--- libcfa/src/concurrency/kernel.hfa	(revision 2f5ea69b6db7b5cd41a83b98373f3b70e83aeddb)
+++ libcfa/src/concurrency/kernel.hfa	(revision 780a61412318cddfec5ff2b3146c9767ee2388b4)
@@ -180,5 +180,5 @@
 
 // Idle Sleep
-struct __cluster_idles {
+struct __cluster_proc_list {
 	// Spin lock protecting the queue
 	volatile uint64_t lock;
@@ -191,5 +191,8 @@
 
 	// List of idle processors
-	dlist(processor, processor) list;
+	dlist(processor, processor) idles;
+
+	// List of active processors
+	dlist(processor, processor) actives;
 };
 
@@ -207,5 +210,5 @@
 
 	// List of idle processors
-	__cluster_idles idles;
+	__cluster_proc_list procs;
 
 	// List of threads
Index: libcfa/src/concurrency/kernel/startup.cfa
===================================================================
--- libcfa/src/concurrency/kernel/startup.cfa	(revision 2f5ea69b6db7b5cd41a83b98373f3b70e83aeddb)
+++ libcfa/src/concurrency/kernel/startup.cfa	(revision 780a61412318cddfec5ff2b3146c9767ee2388b4)
@@ -469,4 +469,5 @@
 	this.name = name;
 	this.cltr = &_cltr;
+	this.cltr_id = -1u;
 	do_terminate = false;
 	preemption_alarm = 0p;
@@ -489,39 +490,30 @@
 	#endif
 
-	lock( this.cltr->idles );
-		int target = this.cltr->idles.total += 1u;
-	unlock( this.cltr->idles );
-
-	id = doregister((__processor_id_t*)&this);
-
+	// Register and Lock the RWlock so no-one pushes/pops while we are changing the queue
+	uint_fast32_t last_size = ready_mutate_register((__processor_id_t*)&this);
+		this.cltr->procs.total += 1u;
+		insert_last(this.cltr->procs.actives, this);
+
+		// Adjust the ready queue size
+		ready_queue_grow( cltr );
+
+	// Unlock the RWlock
+	ready_mutate_unlock( last_size );
+
+	__cfadbg_print_safe(runtime_core, "Kernel : core %p created\n", &this);
+}
+
+// Not a ctor, it just preps the destruction but should not destroy members
+static void deinit(processor & this) {
 	// Lock the RWlock so no-one pushes/pops while we are changing the queue
 	uint_fast32_t last_size = ready_mutate_lock();
+		this.cltr->procs.total -= 1u;
+		remove(this);
 
 		// Adjust the ready queue size
-		this.cltr_id = ready_queue_grow( cltr, target );
-
-	// Unlock the RWlock
-	ready_mutate_unlock( last_size );
-
-	__cfadbg_print_safe(runtime_core, "Kernel : core %p created\n", &this);
-}
-
-// Not a ctor, it just preps the destruction but should not destroy members
-static void deinit(processor & this) {
-	lock( this.cltr->idles );
-		int target = this.cltr->idles.total -= 1u;
-	unlock( this.cltr->idles );
-
-	// Lock the RWlock so no-one pushes/pops while we are changing the queue
-	uint_fast32_t last_size = ready_mutate_lock();
-
-		// Adjust the ready queue size
-		ready_queue_shrink( this.cltr, target );
-
-	// Unlock the RWlock
-	ready_mutate_unlock( last_size );
-
-	// Finally we don't need the read_lock any more
-	unregister((__processor_id_t*)&this);
+		ready_queue_shrink( this.cltr );
+
+	// Unlock the RWlock and unregister: we don't need the read_lock any more
+	ready_mutate_unregister((__processor_id_t*)&this, last_size );
 
 	close(this.idle);
@@ -566,9 +558,8 @@
 //-----------------------------------------------------------------------------
 // Cluster
-static void ?{}(__cluster_idles & this) {
+static void ?{}(__cluster_proc_list & this) {
 	this.lock  = 0;
 	this.idle  = 0;
 	this.total = 0;
-	(this.list){};
 }
 
@@ -596,5 +587,5 @@
 
 		// Adjust the ready queue size
-		ready_queue_grow( &this, 0 );
+		ready_queue_grow( &this );
 
 	// Unlock the RWlock
@@ -611,5 +602,5 @@
 
 		// Adjust the ready queue size
-		ready_queue_shrink( &this, 0 );
+		ready_queue_shrink( &this );
 
 	// Unlock the RWlock
Index: libcfa/src/concurrency/kernel_private.hfa
===================================================================
--- libcfa/src/concurrency/kernel_private.hfa	(revision 2f5ea69b6db7b5cd41a83b98373f3b70e83aeddb)
+++ libcfa/src/concurrency/kernel_private.hfa	(revision 780a61412318cddfec5ff2b3146c9767ee2388b4)
@@ -83,46 +83,10 @@
 // Cluster lock API
 //=======================================================================
-// Cells use by the reader writer lock
-// while not generic it only relies on a opaque pointer
-struct __attribute__((aligned(128))) __scheduler_lock_id_t {
-	// Spin lock used as the underlying lock
-	volatile bool lock;
-
-	// Handle pointing to the proc owning this cell
-	// Used for allocating cells and debugging
-	__processor_id_t * volatile handle;
-
-	#ifdef __CFA_WITH_VERIFY__
-		// Debug, check if this is owned for reading
-		bool owned;
-	#endif
-};
-
-static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t));
-
 // Lock-Free registering/unregistering of threads
 // Register a processor to a given cluster and get its unique id in return
-unsigned doregister( struct __processor_id_t * proc );
+void register_proc_id( struct __processor_id_t * );
 
 // Unregister a processor from a given cluster using its id, getting back the original pointer
-void     unregister( struct __processor_id_t * proc );
-
-//-----------------------------------------------------------------------
-// Cluster idle lock/unlock
-static inline void lock(__cluster_idles & this) {
-	for() {
-		uint64_t l = this.lock;
-		if(
-			(0 == (l % 2))
-			&& __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
-		) return;
-		Pause();
-	}
-}
-
-static inline void unlock(__cluster_idles & this) {
-	/* paranoid */ verify( 1 == (this.lock % 2) );
-	__atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST );
-}
+void unregister_proc_id( struct __processor_id_t * proc );
 
 //=======================================================================
@@ -152,4 +116,22 @@
 	__atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
 }
+
+// Cells use by the reader writer lock
+// while not generic it only relies on a opaque pointer
+struct __attribute__((aligned(128))) __scheduler_lock_id_t {
+	// Spin lock used as the underlying lock
+	volatile bool lock;
+
+	// Handle pointing to the proc owning this cell
+	// Used for allocating cells and debugging
+	__processor_id_t * volatile handle;
+
+	#ifdef __CFA_WITH_VERIFY__
+		// Debug, check if this is owned for reading
+		bool owned;
+	#endif
+};
+
+static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t));
 
 //-----------------------------------------------------------------------
@@ -247,4 +229,56 @@
 void ready_mutate_unlock( uint_fast32_t /* value returned by lock */ );
 
+//-----------------------------------------------------------------------
+// Lock-Free registering/unregistering of threads
+// Register a processor to a given cluster and get its unique id in return
+// For convenience, also acquires the lock
+static inline uint_fast32_t ready_mutate_register( struct __processor_id_t * proc ) {
+	register_proc_id( proc );
+	return ready_mutate_lock();
+}
+
+// Unregister a processor from a given cluster using its id, getting back the original pointer
+// assumes the lock is acquired
+static inline void ready_mutate_unregister( struct __processor_id_t * proc, uint_fast32_t last_s ) {
+	ready_mutate_unlock( last_s );
+	unregister_proc_id( proc );
+}
+
+//-----------------------------------------------------------------------
+// Cluster idle lock/unlock
+static inline void lock(__cluster_proc_list & this) {
+	/* paranoid */ verify( ! __preemption_enabled() );
+
+	// Start by locking the global RWlock so that we know no-one is
+	// adding/removing processors while we mess with the idle lock
+	ready_schedule_lock();
+
+	// Simple counting lock, acquired, acquired by incrementing the counter
+	// to an odd number
+	for() {
+		uint64_t l = this.lock;
+		if(
+			(0 == (l % 2))
+			&& __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
+		) return;
+		Pause();
+	}
+
+	/* paranoid */ verify( ! __preemption_enabled() );
+}
+
+static inline void unlock(__cluster_proc_list & this) {
+	/* paranoid */ verify( ! __preemption_enabled() );
+
+	/* paranoid */ verify( 1 == (this.lock % 2) );
+	// Simple couting lock, release by incrementing to an even number
+	__atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST );
+
+	// Release the global lock, which we acquired when locking
+	ready_schedule_unlock();
+
+	/* paranoid */ verify( ! __preemption_enabled() );
+}
+
 //=======================================================================
 // Ready-Queue API
@@ -278,9 +312,9 @@
 //-----------------------------------------------------------------------
 // Increase the width of the ready queue (number of lanes) by 4
-unsigned ready_queue_grow  (struct cluster * cltr, int target);
+void ready_queue_grow  (struct cluster * cltr);
 
 //-----------------------------------------------------------------------
 // Decrease the width of the ready queue (number of lanes) by 4
-void ready_queue_shrink(struct cluster * cltr, int target);
+void ready_queue_shrink(struct cluster * cltr);
 
 
Index: libcfa/src/concurrency/preemption.cfa
===================================================================
--- libcfa/src/concurrency/preemption.cfa	(revision 2f5ea69b6db7b5cd41a83b98373f3b70e83aeddb)
+++ libcfa/src/concurrency/preemption.cfa	(revision 780a61412318cddfec5ff2b3146c9767ee2388b4)
@@ -712,5 +712,5 @@
 static void * alarm_loop( __attribute__((unused)) void * args ) {
 	__processor_id_t id;
-	id.id = doregister(&id);
+	register_proc_id(&id);
 	__cfaabi_tls.this_proc_id = &id;
 
@@ -773,5 +773,5 @@
 EXIT:
 	__cfaabi_dbg_print_safe( "Kernel : Preemption thread stopping\n" );
-	unregister(&id);
+	register_proc_id(&id);
 
 	return 0p;
Index: libcfa/src/concurrency/ready_queue.cfa
===================================================================
--- libcfa/src/concurrency/ready_queue.cfa	(revision 2f5ea69b6db7b5cd41a83b98373f3b70e83aeddb)
+++ libcfa/src/concurrency/ready_queue.cfa	(revision 780a61412318cddfec5ff2b3146c9767ee2388b4)
@@ -94,5 +94,5 @@
 //=======================================================================
 // Lock-Free registering/unregistering of threads
-unsigned doregister( struct __processor_id_t * proc ) with(*__scheduler_lock) {
+void register_proc_id( struct __processor_id_t * proc ) with(*__scheduler_lock) {
 	__cfadbg_print_safe(ready_queue, "Kernel : Registering proc %p for RW-Lock\n", proc);
 
@@ -108,5 +108,5 @@
 			/*paranoid*/ verify(0 == (__alignof__(data[i]) % cache_line_size));
 			/*paranoid*/ verify((((uintptr_t)&data[i]) % cache_line_size) == 0);
-			return i;
+			proc->id = i;
 		}
 	}
@@ -135,8 +135,8 @@
 	/*paranoid*/ verify(__alignof__(data[n]) == (2 * cache_line_size));
 	/*paranoid*/ verify((((uintptr_t)&data[n]) % cache_line_size) == 0);
-	return n;
-}
-
-void unregister( struct __processor_id_t * proc ) with(*__scheduler_lock) {
+	proc->id = n;
+}
+
+void unregister_proc_id( struct __processor_id_t * proc ) with(*__scheduler_lock) {
 	unsigned id = proc->id;
 	/*paranoid*/ verify(id < ready);
@@ -254,4 +254,5 @@
 	__attribute__((unused)) int preferred;
 	#if defined(BIAS)
+		/* paranoid */ verify(external || kernelTLS().this_processor->cltr_id < lanes.count );
 		preferred =
 			//*
@@ -344,5 +345,5 @@
 	int preferred;
 	#if defined(BIAS)
-		// Don't bother trying locally too much
+		/* paranoid */ verify(kernelTLS().this_processor->cltr_id < lanes.count );
 		preferred = kernelTLS().this_processor->cltr_id;
 	#endif
@@ -541,8 +542,24 @@
 }
 
+static void assign_list(unsigned & value, const int inc, dlist(processor, processor) & list, unsigned count) {
+	processor * it = &list`first;
+	for(unsigned i = 0; i < count; i++) {
+		/* paranoid */ verifyf( it, "Unexpected null iterator, at index %u of %u\n", i, count);
+		it->cltr_id = value;
+		value += inc;
+		it = &(*it)`next;
+	}
+}
+
+static void reassign_cltr_id(struct cluster * cltr, const int inc) {
+	unsigned preferred = 0;
+	assign_list(preferred, inc, cltr->procs.actives, cltr->procs.total - cltr->procs.idle);
+	assign_list(preferred, inc, cltr->procs.idles  , cltr->procs.idle );
+}
+
 // Grow the ready queue
-unsigned ready_queue_grow(struct cluster * cltr, int target) {
-	unsigned preferred;
+void ready_queue_grow(struct cluster * cltr) {
 	size_t ncount;
+	int target = cltr->procs.total;
 
 	/* paranoid */ verify( ready_mutate_islocked() );
@@ -562,8 +579,6 @@
 		if(target >= 2) {
 			ncount = target * 4;
-			preferred = ncount - 4;
 		} else {
 			ncount = 1;
-			preferred = 0;
 		}
 
@@ -595,4 +610,6 @@
 	}
 
+	reassign_cltr_id(cltr, 4);
+
 	// Make sure that everything is consistent
 	/* paranoid */ check( cltr->ready_queue );
@@ -601,9 +618,8 @@
 
 	/* paranoid */ verify( ready_mutate_islocked() );
-	return preferred;
 }
 
 // Shrink the ready queue
-void ready_queue_shrink(struct cluster * cltr, int target) {
+void ready_queue_shrink(struct cluster * cltr) {
 	/* paranoid */ verify( ready_mutate_islocked() );
 	__cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue\n");
@@ -611,4 +627,6 @@
 	// Make sure that everything is consistent
 	/* paranoid */ check( cltr->ready_queue );
+
+	int target = cltr->procs.total;
 
 	with( cltr->ready_queue ) {
@@ -679,4 +697,6 @@
 	}
 
+	reassign_cltr_id(cltr, 4);
+
 	// Make sure that everything is consistent
 	/* paranoid */ check( cltr->ready_queue );
