Index: libcfa/src/concurrency/invoke.h
===================================================================
--- libcfa/src/concurrency/invoke.h	(revision eeb9f9f094667c8208ca6e79e9de4c9e6e60e836)
+++ libcfa/src/concurrency/invoke.h	(revision d3ba775d8ac5416c9c4015ef24dca70b09023946)
@@ -154,4 +154,8 @@
 		struct __stack_context_t context;
 
+		// Link lists fields
+		// instrusive link field for threads
+		struct __thread_desc_link link;
+
 		// current execution status for coroutine
 		// Possible values are:
@@ -168,7 +172,6 @@
 		struct cluster * curr_cluster;
 
-		// Link lists fields
-		// instrusive link field for threads
-		struct __thread_desc_link link;
+		// preferred ready-queue
+		unsigned preferred;
 
 		// coroutine body used to store context
Index: libcfa/src/concurrency/kernel/startup.cfa
===================================================================
--- libcfa/src/concurrency/kernel/startup.cfa	(revision eeb9f9f094667c8208ca6e79e9de4c9e6e60e836)
+++ libcfa/src/concurrency/kernel/startup.cfa	(revision d3ba775d8ac5416c9c4015ef24dca70b09023946)
@@ -462,5 +462,5 @@
 	link.next = 0p;
 	link.ts   = 0;
-	link.preferred = -1u;
+	preferred = -1u;
 	last_proc = 0p;
 	#if defined( __CFA_WITH_VERIFY__ )
Index: libcfa/src/concurrency/ready_queue.cfa
===================================================================
--- libcfa/src/concurrency/ready_queue.cfa	(revision eeb9f9f094667c8208ca6e79e9de4c9e6e60e836)
+++ libcfa/src/concurrency/ready_queue.cfa	(revision d3ba775d8ac5416c9c4015ef24dca70b09023946)
@@ -17,5 +17,4 @@
 // #define __CFA_DEBUG_PRINT_READY_QUEUE__
 
-// #define USE_MPSC
 
 #define USE_RELAXED_FIFO
@@ -274,19 +273,12 @@
 			#endif
 
-		#if defined(USE_MPSC)
-			// mpsc always succeeds
-		} while( false );
-		#else
 			// If we can't lock it retry
 		} while( !__atomic_try_acquire( &lanes.data[i].lock ) );
-		#endif
 
 		// Actually push it
 		push(lanes.data[i], thrd);
 
-		#if !defined(USE_MPSC)
 			// Unlock and return
 			__atomic_unlock( &lanes.data[i].lock );
-		#endif
 
 		// Mark the current index in the tls rng instance as having an item
@@ -347,10 +339,15 @@
 		__cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr);
 
+		// #define USE_PREFERRED
+		#if !defined(USE_PREFERRED)
 		const bool external = (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr);
 		/* paranoid */ verify(external || kernelTLS().this_processor->rdq.id < lanes.count );
-
-		// write timestamp
-		#if !defined(USE_NEW_SUBQUEUE)
-			thrd->link.ts = rdtscl();
+		#else
+			unsigned preferred = thrd->preferred;
+			const bool external = (!kernelTLS().this_processor) || preferred == -1u || thrd->curr_cluster != cltr;
+			/* paranoid */ verifyf(external || preferred < lanes.count, "Invalid preferred queue %u for %u lanes", preferred, lanes.count );
+
+			unsigned r = preferred % READYQ_SHARD_FACTOR;
+			const unsigned start = preferred - r;
 		#endif
 
@@ -367,25 +364,20 @@
 			}
 			else {
+				#if !defined(USE_PREFERRED)
 				processor * proc = kernelTLS().this_processor;
 				unsigned r = proc->rdq.its++;
 				i =  proc->rdq.id + (r % READYQ_SHARD_FACTOR);
+		#else
+					i = start + (r++ % READYQ_SHARD_FACTOR);
+				#endif
 			}
-
-
-		#if defined(USE_MPSC)
-			// mpsc always succeeds
-		} while( false );
-		#else
 			// If we can't lock it retry
 		} while( !__atomic_try_acquire( &lanes.data[i].lock ) );
-		#endif
 
 		// Actually push it
 		push(lanes.data[i], thrd);
 
-		#if !defined(USE_MPSC)
 			// Unlock and return
 			__atomic_unlock( &lanes.data[i].lock );
-		#endif
 
 		#if !defined(__CFA_NO_STATISTICS__)
@@ -491,4 +483,6 @@
 		lanes.tscs[w].tv = thrd->link.ts;
 	#endif
+
+	thrd->preferred = w;
 
 	// return the popped thread
@@ -518,5 +512,5 @@
 // Check that all the intrusive queues in the data structure are still consistent
 static void check( __ready_queue_t & q ) with (q) {
-	#if defined(__CFA_WITH_VERIFY__) && !defined(USE_MPSC)
+	#if defined(__CFA_WITH_VERIFY__)
 		{
 			for( idx ; lanes.count ) {
@@ -553,10 +547,8 @@
 // fixes the list so that the pointers back to anchors aren't left dangling
 static inline void fix(__intrusive_lane_t & ll) {
-	#if !defined(USE_MPSC)
 			if(is_empty(ll)) {
 				verify(ll.anchor.next == 0p);
 				ll.prev = mock_head(ll);
 			}
-	#endif
 }
 
Index: libcfa/src/concurrency/ready_subqueue.hfa
===================================================================
--- libcfa/src/concurrency/ready_subqueue.hfa	(revision eeb9f9f094667c8208ca6e79e9de4c9e6e60e836)
+++ libcfa/src/concurrency/ready_subqueue.hfa	(revision d3ba775d8ac5416c9c4015ef24dca70b09023946)
@@ -31,4 +31,5 @@
 
 	// We add a boat-load of assertions here because the anchor code is very fragile
+	/* paranoid */ verify( offsetof( $thread, link ) == offsetof(__intrusive_lane_t, anchor) );
 	/* paranoid */ verify( ((uintptr_t)( mock_head(this) ) + offsetof( $thread, link )) == (uintptr_t)(&this.anchor) );
 	/* paranoid */ verify( &mock_head(this)->link.next == &this.anchor.next );
Index: libcfa/src/concurrency/thread.cfa
===================================================================
--- libcfa/src/concurrency/thread.cfa	(revision eeb9f9f094667c8208ca6e79e9de4c9e6e60e836)
+++ libcfa/src/concurrency/thread.cfa	(revision d3ba775d8ac5416c9c4015ef24dca70b09023946)
@@ -39,5 +39,5 @@
 	link.next = 0p;
 	link.ts   = 0;
-	link.preferred = -1u;
+	preferred = -1u;
 	last_proc = 0p;
 	#if defined( __CFA_WITH_VERIFY__ )
