Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision 54eb5ebd44d38f93d7a6ae88f064436d0953610d)
+++ libcfa/src/concurrency/kernel.cfa	(revision 320ec6fc95f0cd3eb0e069c2baf00faccaba5778)
@@ -242,5 +242,5 @@
 	#endif
 
-	__atomic_fetch_add( &cltr->nprocessors, 1u, __ATOMIC_SEQ_CST );
+	int target = __atomic_add_fetch( &cltr->nprocessors, 1u, __ATOMIC_SEQ_CST );
 
 	id = doregister((__processor_id_t*)&this);
@@ -250,5 +250,5 @@
 
 		// Adjust the ready queue size
-		ready_queue_grow( cltr );
+		ready_queue_grow( cltr, target );
 
 	// Unlock the RWlock
@@ -260,9 +260,12 @@
 // Not a ctor, it just preps the destruction but should not destroy members
 void deinit(processor & this) {
+
+	int target = __atomic_sub_fetch( &this.cltr->nprocessors, 1u, __ATOMIC_SEQ_CST );
+
 	// Lock the RWlock so no-one pushes/pops while we are changing the queue
 	uint_fast32_t last_size = ready_mutate_lock();
 
 		// Adjust the ready queue size
-		ready_queue_shrink( this.cltr );
+		ready_queue_shrink( this.cltr, target );
 
 		// Make sure we aren't on the idle queue
@@ -305,6 +308,4 @@
 
 	deinit( this );
-
-	__atomic_fetch_sub( &cltr->nprocessors, 1u, __ATOMIC_SEQ_CST );
 }
 
@@ -936,5 +937,4 @@
 
 		/* paranoid */ verify( this.do_terminate == true );
-		__atomic_fetch_sub( &cltr->nprocessors, 1u, __ATOMIC_SEQ_CST );
 		__cfaabi_dbg_print_safe("Kernel : destroyed main processor context %p\n", &runner);
 	}
Index: libcfa/src/concurrency/kernel_private.hfa
===================================================================
--- libcfa/src/concurrency/kernel_private.hfa	(revision 54eb5ebd44d38f93d7a6ae88f064436d0953610d)
+++ libcfa/src/concurrency/kernel_private.hfa	(revision 320ec6fc95f0cd3eb0e069c2baf00faccaba5778)
@@ -274,9 +274,9 @@
 //-----------------------------------------------------------------------
 // Increase the width of the ready queue (number of lanes) by 4
-void ready_queue_grow  (struct cluster * cltr);
+void ready_queue_grow  (struct cluster * cltr, int target);
 
 //-----------------------------------------------------------------------
 // Decrease the width of the ready queue (number of lanes) by 4
-void ready_queue_shrink(struct cluster * cltr);
+void ready_queue_shrink(struct cluster * cltr, int target);
 
 //-----------------------------------------------------------------------
Index: libcfa/src/concurrency/ready_queue.cfa
===================================================================
--- libcfa/src/concurrency/ready_queue.cfa	(revision 54eb5ebd44d38f93d7a6ae88f064436d0953610d)
+++ libcfa/src/concurrency/ready_queue.cfa	(revision 320ec6fc95f0cd3eb0e069c2baf00faccaba5778)
@@ -37,5 +37,5 @@
 #endif
 
-#define BIAS 64
+#define BIAS 16
 
 // returns the maximum number of processors the RWLock support
@@ -500,5 +500,5 @@
 
 // Grow the ready queue
-void ready_queue_grow  (struct cluster * cltr) {
+void ready_queue_grow  (struct cluster * cltr, int target) {
 	/* paranoid */ verify( ready_mutate_islocked() );
 	__cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue\n");
@@ -515,4 +515,5 @@
 		// increase count
 		ncount += 4;
+		/* paranoid */ verify( ncount == target * 4 || target < 2 );
 
 		// Allocate new array (uses realloc and memcpies the data)
@@ -550,5 +551,5 @@
 
 // Shrink the ready queue
-void ready_queue_shrink(struct cluster * cltr) {
+void ready_queue_shrink(struct cluster * cltr, int target) {
 	/* paranoid */ verify( ready_mutate_islocked() );
 	__cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue\n");
@@ -566,5 +567,6 @@
 		// reduce the actual count so push doesn't use the old queues
 		lanes.count -= 4;
-		verify(ocount > lanes.count);
+		/* paranoid */ verify( ocount > lanes.count );
+		/* paranoid */ verify( lanes.count == target * 4 || target < 2 );
 
 		// for printing count the number of displaced threads
Index: libcfa/src/concurrency/snzi.hfa
===================================================================
--- libcfa/src/concurrency/snzi.hfa	(revision 54eb5ebd44d38f93d7a6ae88f064436d0953610d)
+++ libcfa/src/concurrency/snzi.hfa	(revision 320ec6fc95f0cd3eb0e069c2baf00faccaba5778)
@@ -148,4 +148,5 @@
 
 static inline void arrive( __snzi_t & this, int idx) {
+	idx >>= 2;
 	idx &= this.mask;
 	arrive( this.nodes[idx] );
@@ -153,4 +154,5 @@
 
 static inline void depart( __snzi_t & this, int idx) {
+	idx >>= 2;
 	idx &= this.mask;
 	depart( this.nodes[idx] );
