Index: libcfa/src/concurrency/invoke.h
===================================================================
--- libcfa/src/concurrency/invoke.h	(revision fc59df78713084cc115bdff89aa7588fe40bf955)
+++ libcfa/src/concurrency/invoke.h	(revision 89eff25a464816e7b42a749b35250bba33bd53e8)
@@ -148,4 +148,5 @@
 		struct $thread * prev;
 		volatile unsigned long long ts;
+		unsigned preferred;
 	};
 
@@ -199,4 +200,6 @@
 		} node;
 
+		struct processor * last_proc;
+
 		#if defined( __CFA_WITH_VERIFY__ )
 			void * canary;
Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision fc59df78713084cc115bdff89aa7588fe40bf955)
+++ libcfa/src/concurrency/kernel.cfa	(revision 89eff25a464816e7b42a749b35250bba33bd53e8)
@@ -34,4 +34,9 @@
 #include "invoke.h"
 
+#if !defined(__CFA_NO_STATISTICS__)
+	#define __STATS( ...) __VA_ARGS__
+#else
+	#define __STATS( ...)
+#endif
 
 //-----------------------------------------------------------------------------
@@ -153,7 +158,5 @@
 		preemption_scope scope = { this };
 
-		#if !defined(__CFA_NO_STATISTICS__)
-			unsigned long long last_tally = rdtscl();
-		#endif
+		__STATS( unsigned long long last_tally = rdtscl(); )
 
 		// if we need to run some special setup, now is the time to do it.
@@ -253,4 +256,117 @@
 				__cfa_io_flush( this );
 			}
+
+		// 	SEARCH: {
+		// 		/* paranoid */ verify( ! __preemption_enabled() );
+		// 		/* paranoid */ verify( kernelTLS().this_proc_id );
+
+		// 		// First, lock the scheduler since we are searching for a thread
+
+		// 		// Try to get the next thread
+		// 		ready_schedule_lock();
+		// 		readyThread = pop_fast( this->cltr );
+		// 		ready_schedule_unlock();
+		// 		if(readyThread) {  break SEARCH; }
+
+		// 		// If we can't find a thread, might as well flush any outstanding I/O
+		// 		if(this->io.pending) { __cfa_io_flush( this ); }
+
+		// 		// Spin a little on I/O, just in case
+		// 		for(25) {
+		// 			__maybe_io_drain( this );
+		// 			ready_schedule_lock();
+		// 			readyThread = pop_fast( this->cltr );
+		// 			ready_schedule_unlock();
+		// 			if(readyThread) {  break SEARCH; }
+		// 		}
+
+		// 		// no luck, try stealing a few times
+		// 		for(25) {
+		// 			if( __maybe_io_drain( this ) ) {
+		// 				ready_schedule_lock();
+		// 				readyThread = pop_fast( this->cltr );
+		// 			} else {
+		// 				ready_schedule_lock();
+		// 				readyThread = pop_slow( this->cltr );
+		// 			}
+		// 			ready_schedule_unlock();
+		// 			if(readyThread) {  break SEARCH; }
+		// 		}
+
+		// 		// still no luck, search for a thread
+		// 		ready_schedule_lock();
+		// 		readyThread = pop_search( this->cltr );
+		// 		ready_schedule_unlock();
+		// 		if(readyThread) { break SEARCH; }
+
+		// 		// Don't block if we are done
+		// 		if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP;
+
+		// 		__STATS( __tls_stats()->ready.sleep.halts++; )
+
+		// 		// Push self to idle stack
+		// 		mark_idle(this->cltr->procs, * this);
+
+		// 		// Confirm the ready-queue is empty
+		// 		__maybe_io_drain( this );
+		// 		ready_schedule_lock();
+		// 		readyThread = pop_search( this->cltr );
+		// 		ready_schedule_unlock();
+
+		// 		if( readyThread ) {
+		// 			// A thread was found, cancel the halt
+		// 			mark_awake(this->cltr->procs, * this);
+
+		// 			__STATS( __tls_stats()->ready.sleep.cancels++; )
+
+		// 			// continue the main loop
+		// 			break SEARCH;
+		// 		}
+
+		// 		__STATS( if(this->print_halts) __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->id, rdtscl()); )
+		// 		__cfadbg_print_safe(runtime_core, "Kernel : core %p waiting on eventfd %d\n", this, this->idle);
+
+		// 		// __disable_interrupts_hard();
+		// 		eventfd_t val;
+		// 		eventfd_read( this->idle, &val );
+		// 		// __enable_interrupts_hard();
+
+		// 		__STATS( if(this->print_halts) __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->id, rdtscl()); )
+
+		// 		// We were woken up, remove self from idle
+		// 		mark_awake(this->cltr->procs, * this);
+
+		// 		// DON'T just proceed, start looking again
+		// 		continue MAIN_LOOP;
+		// 	}
+
+		// RUN_THREAD:
+		// 	/* paranoid */ verify( kernelTLS().this_proc_id );
+		// 	/* paranoid */ verify( ! __preemption_enabled() );
+		// 	/* paranoid */ verify( readyThread );
+
+		// 	// Reset io dirty bit
+		// 	this->io.dirty = false;
+
+		// 	// We found a thread run it
+		// 	__run_thread(this, readyThread);
+
+		// 	// Are we done?
+		// 	if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP;
+
+		// 	#if !defined(__CFA_NO_STATISTICS__)
+		// 		unsigned long long curr = rdtscl();
+		// 		if(curr > (last_tally + 500000000)) {
+		// 			__tally_stats(this->cltr->stats, __cfaabi_tls.this_stats);
+		// 			last_tally = curr;
+		// 		}
+		// 	#endif
+
+		// 	if(this->io.pending && !this->io.dirty) {
+		// 		__cfa_io_flush( this );
+		// 	}
+
+		// 	// Check if there is pending io
+		// 	__maybe_io_drain( this );
 		}
 
@@ -389,7 +505,5 @@
 	$thread * thrd_src = kernelTLS().this_thread;
 
-	#if !defined(__CFA_NO_STATISTICS__)
-		struct processor * last_proc = kernelTLS().this_processor;
-	#endif
+	__STATS( thrd_src->last_proc = kernelTLS().this_processor; )
 
 	// Run the thread on this processor
@@ -410,5 +524,6 @@
 
 	#if !defined(__CFA_NO_STATISTICS__)
-		if(last_proc != kernelTLS().this_processor) {
+		/* paranoid */ verify( thrd_src->last_proc != 0p );
+		if(thrd_src->last_proc != kernelTLS().this_processor) {
 			__tls_stats()->ready.threads.migration++;
 		}
@@ -443,4 +558,5 @@
 	// Dereference the thread now because once we push it, there is not guaranteed it's still valid.
 	struct cluster * cl = thrd->curr_cluster;
+	__STATS(bool outside = thrd->last_proc && thrd->last_proc != kernelTLS().this_processor; )
 
 	ready_schedule_lock();
@@ -457,8 +573,12 @@
 		if( kernelTLS().this_stats ) {
 			__tls_stats()->ready.threads.threads++;
+			if(outside) {
+				__tls_stats()->ready.threads.extunpark++;
+			}
 			__push_stat( __tls_stats(), __tls_stats()->ready.threads.threads, false, "Processor", kernelTLS().this_processor );
 		}
 		else {
 			__atomic_fetch_add(&cl->stats->ready.threads.threads, 1, __ATOMIC_RELAXED);
+			__atomic_fetch_add(&cl->stats->ready.threads.extunpark, 1, __ATOMIC_RELAXED);
 			__push_stat( cl->stats, cl->stats->ready.threads.threads, true, "Cluster", cl );
 		}
Index: libcfa/src/concurrency/kernel/startup.cfa
===================================================================
--- libcfa/src/concurrency/kernel/startup.cfa	(revision fc59df78713084cc115bdff89aa7588fe40bf955)
+++ libcfa/src/concurrency/kernel/startup.cfa	(revision 89eff25a464816e7b42a749b35250bba33bd53e8)
@@ -447,4 +447,6 @@
 	link.next = 0p;
 	link.prev = 0p;
+	link.preferred = -1u;
+	last_proc = 0p;
 	#if defined( __CFA_WITH_VERIFY__ )
 		canary = 0x0D15EA5E0D15EA5Ep;
Index: libcfa/src/concurrency/thread.cfa
===================================================================
--- libcfa/src/concurrency/thread.cfa	(revision fc59df78713084cc115bdff89aa7588fe40bf955)
+++ libcfa/src/concurrency/thread.cfa	(revision 89eff25a464816e7b42a749b35250bba33bd53e8)
@@ -39,4 +39,6 @@
 	link.next = 0p;
 	link.prev = 0p;
+	link.preferred = -1u;
+	last_proc = 0p;
 	#if defined( __CFA_WITH_VERIFY__ )
 		canary = 0x0D15EA5E0D15EA5Ep;
