Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision 37ba6628ac22d4f11d3fb42a2c573168ac19bd03)
+++ libcfa/src/concurrency/kernel.cfa	(revision 04b5cef29b60d0ea2bc49962adba72c424a3e55e)
@@ -342,4 +342,5 @@
 				/* paranoid */ verifyf( readyThread->state == Ready || readyThread->preempted != __NO_PREEMPTION, "state : %d, preempted %d\n", readyThread->state, readyThread->preempted);
 				/* paranoid */ verifyf( readyThread->link.next == 0p, "Expected null got %p", readyThread->link.next );
+				__builtin_prefetch( readyThread->context.SP );
 
 				// We found a thread run it
Index: libcfa/src/concurrency/ready_queue.cfa
===================================================================
--- libcfa/src/concurrency/ready_queue.cfa	(revision 37ba6628ac22d4f11d3fb42a2c573168ac19bd03)
+++ libcfa/src/concurrency/ready_queue.cfa	(revision 04b5cef29b60d0ea2bc49962adba72c424a3e55e)
@@ -24,4 +24,6 @@
 #include "math.hfa"
 
+#include <unistd.h>
+
 static const size_t cache_line_size = 64;
 
@@ -31,4 +33,6 @@
 	#define __CFA_MAX_PROCESSORS__ 1024
 #endif
+
+#define BIAS 64
 
 // returns the maximum number of processors the RWLock support
@@ -568,5 +572,23 @@
 	do {
 		// Pick the index of a lane
-		i = __tls_rand() % lanes.count;
+		#if defined(BIAS)
+			unsigned r = __tls_rand();
+			unsigned rlow  = r % BIAS;
+			unsigned rhigh = r / BIAS;
+			if(0 != (rlow % BIAS) && kernelTLS.this_processor) {
+				// (BIAS - 1) out of BIAS chances
+				// Use perferred queues
+				i = (kernelTLS.this_processor->id * 4) + (rhigh % 4);
+			}
+			else {
+				// 1 out of BIAS chances
+				// Use all queues
+				i = rhigh;
+			}
+		#else
+			i = __tls_rand();
+		#endif
+
+		i %= __atomic_load_n( &lanes.count, __ATOMIC_RELAXED );
 
 		#if !defined(__CFA_NO_STATISTICS__)
@@ -666,6 +688,22 @@
 	while( query(snzi) ) {
 		// Pick two lists at random
-		int i = __tls_rand() % __atomic_load_n( &lanes.count, __ATOMIC_RELAXED );
-		int j = __tls_rand() % __atomic_load_n( &lanes.count, __ATOMIC_RELAXED );
+		#if defined(BIAS)
+			unsigned i = __tls_rand();
+			unsigned j = __tls_rand();
+
+			if(0 == (i % BIAS)) {
+				i = i / BIAS;
+			}
+			else {
+				i = ((kernelTLS.this_processor->id * 4) + ((i / BIAS) % 4));
+				j = ((kernelTLS.this_processor->id * 4) + ((j / BIAS) % 4));
+			}
+		#else
+			unsigned i = __tls_rand();
+			unsigned j = __tls_rand();
+		#endif
+
+		i %= __atomic_load_n( &lanes.count, __ATOMIC_RELAXED );
+ 		j %= __atomic_load_n( &lanes.count, __ATOMIC_RELAXED );
 
 		// try popping from the 2 picked lists
