Index: libcfa/src/bits/containers.hfa
===================================================================
--- libcfa/src/bits/containers.hfa	(revision 9f575eacc1e359b231c369fde8ace4f941121f26)
+++ libcfa/src/bits/containers.hfa	(revision 3381ed7a703d0c964d6e94bffab7c58f0cdf0d17)
@@ -146,24 +146,31 @@
 	static inline forall( dtype T | is_node(T) ) {
 		void ?{}( __queue(T) & this ) with( this ) {
-			head{ 0p };
+			head{ 1p };
 			tail{ &head };
+			verify(*tail == 1p);
 		}
 
 		void append( __queue(T) & this, T * val ) with( this ) {
 			verify(tail != 0p);
+			verify(*tail == 1p);
 			*tail = val;
 			tail = &get_next( *val );
+			*tail = 1p;
 		}
 
 		T * pop_head( __queue(T) & this ) {
+			verify(*this.tail == 1p);
 			T * head = this.head;
-			if( head ) {
+			if( head != 1p ) {
 				this.head = get_next( *head );
-				if( !get_next( *head ) ) {
+				if( get_next( *head ) == 1p ) {
 					this.tail = &this.head;
 				}
 				get_next( *head ) = 0p;
-			}
-			return head;
+				verify(*this.tail == 1p);
+				return head;
+			}
+			verify(*this.tail == 1p);
+			return 0p;
 		}
 
@@ -180,6 +187,6 @@
 			get_next( *val ) = 0p;
 
-			verify( (head == 0p) == (&head == tail) );
-			verify( *tail == 0p );
+			verify( (head == 1p) == (&head == tail) );
+			verify( *tail == 1p );
 			return val;
 		}
Index: libcfa/src/bits/locks.hfa
===================================================================
--- libcfa/src/bits/locks.hfa	(revision 9f575eacc1e359b231c369fde8ace4f941121f26)
+++ libcfa/src/bits/locks.hfa	(revision 3381ed7a703d0c964d6e94bffab7c58f0cdf0d17)
@@ -60,6 +60,4 @@
 	}
 
-	extern void yield( unsigned int );
-
 	static inline void ?{}( __spinlock_t & this ) {
 		this.lock = 0;
@@ -68,8 +66,10 @@
 	// Lock the spinlock, return false if already acquired
 	static inline bool try_lock  ( __spinlock_t & this __cfaabi_dbg_ctx_param2 ) {
+		disable_interrupts();
 		bool result = (this.lock == 0) && (__atomic_test_and_set( &this.lock, __ATOMIC_ACQUIRE ) == 0);
 		if( result ) {
-			disable_interrupts();
 			__cfaabi_dbg_record( this, caller );
+		} else {
+			enable_interrupts_noPoll();
 		}
 		return result;
@@ -83,4 +83,5 @@
 		#endif
 
+		disable_interrupts();
 		for ( unsigned int i = 1;; i += 1 ) {
 			if ( (this.lock == 0) && (__atomic_test_and_set( &this.lock, __ATOMIC_ACQUIRE ) == 0) ) break;
@@ -98,11 +99,10 @@
 			#endif
 		}
-		disable_interrupts();
 		__cfaabi_dbg_record( this, caller );
 	}
 
 	static inline void unlock( __spinlock_t & this ) {
+		__atomic_clear( &this.lock, __ATOMIC_RELEASE );
 		enable_interrupts_noPoll();
-		__atomic_clear( &this.lock, __ATOMIC_RELEASE );
 	}
 
Index: libcfa/src/concurrency/invoke.h
===================================================================
--- libcfa/src/concurrency/invoke.h	(revision 9f575eacc1e359b231c369fde8ace4f941121f26)
+++ libcfa/src/concurrency/invoke.h	(revision 3381ed7a703d0c964d6e94bffab7c58f0cdf0d17)
@@ -93,4 +93,6 @@
 
 	enum coroutine_state { Halted, Start, Primed, Inactive, Active, Rerun, Reschedule };
+	enum __Preemption_Reason { __NO_PREEMPTION, __ALARM_PREEMPTION, __POLL_PREEMPTION };
+	enum __Owner_Reason { __NO_OWNER, __ENTER_FREE, __ENTER_ACCEPT, __ENTER_DTOR_FREE, __ENTER_DTOR_ACCEPT, __ENTER_SIGNAL_BLOCK, __WAITFOR, __LEAVE, __LEAVE_THREAD, __WAIT };
 
 	struct coroutine_desc {
@@ -134,4 +136,6 @@
 		struct thread_desc * owner;
 
+		enum __Owner_Reason owner_reason;
+
 		// queue of threads that are blocked waiting for the monitor
 		__queue_t(struct thread_desc) entry_queue;
@@ -165,5 +169,5 @@
 		// current execution status for coroutine
 		volatile int state;
-		int preempted;
+		enum __Preemption_Reason preempted;
 
 		//SKULLDUGGERY errno is not save in the thread data structure because returnToKernel appears to be the only function to require saving and restoring it
Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision 9f575eacc1e359b231c369fde8ace4f941121f26)
+++ libcfa/src/concurrency/kernel.cfa	(revision 3381ed7a703d0c964d6e94bffab7c58f0cdf0d17)
@@ -258,6 +258,6 @@
 // Kernel Scheduling logic
 //=============================================================================================
+static thread_desc * nextThread(cluster * this);
 static void runThread(processor * this, thread_desc * dst);
-static void finishRunning(processor * this);
 static void halt(processor * this);
 
@@ -286,17 +286,16 @@
 
 			if(readyThread) {
-				verify( ! kernelTLS.preemption_state.enabled );
+				/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+				/* paranoid */ verifyf( readyThread->state == Inactive || readyThread->state == Start || readyThread->preempted != __NO_PREEMPTION, "state : %d, preempted %d\n", readyThread->state, readyThread->preempted);
+				/* paranoid */ verifyf( readyThread->next == 0p, "Expected null got %p", readyThread->next );
 
 				runThread(this, readyThread);
 
-				verify( ! kernelTLS.preemption_state.enabled );
-
-				//Some actions need to be taken from the kernel
-				finishRunning(this);
+				/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
 
 				spin_count = 0;
 			} else {
 				// spin(this, &spin_count);
-				halt(this);
+				// halt(this);
 			}
 		}
@@ -332,11 +331,14 @@
 
 	// Actually run the thread
-	RUN:
-	{
+	RUNNING:  while(true) {
 		if(unlikely(thrd_dst->preempted)) {
-			thrd_dst->preempted = false;
+			thrd_dst->preempted = __NO_PREEMPTION;
+			verify(thrd_dst->state == Active || thrd_dst->state == Rerun || thrd_dst->state == Reschedule);
 		} else {
+			verify(thrd_dst->state == Start || thrd_dst->state == Primed || thrd_dst->state == Inactive);
 			thrd_dst->state = Active;
 		}
+
+		/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
 
 		// set context switch to the thread that the processor is executing
@@ -344,50 +346,48 @@
 		CtxSwitch( &proc_cor->context, &thrd_dst->context );
 		// when CtxSwitch returns we are back in the processor coroutine
-	}
-
-	// We just finished running a thread, there are a few things that could have happened.
-	// 1 - Regular case : the thread has blocked and now one has scheduled it yet.
-	// 2 - Racy case    : the thread has blocked but someone has already tried to schedule it.
-	// 3 - Polite Racy case : the thread has blocked, someone has already tried to schedule it, but the thread is nice and wants to go through the ready-queue any way
-	// 4 - Preempted
-	// In case 1, we may have won a race so we can't write to the state again.
-	// In case 2, we lost the race so we now own the thread.
-	// In case 3, we lost the race but can just reschedule the thread.
-
-	if(unlikely(thrd_dst->preempted)) {
-		// The thread was preempted, reschedule it and reset the flag
-		ScheduleThread( thrd_dst );
-
-		// Just before returning to the processor, set the processor coroutine to active
-		proc_cor->state = Active;
-		return;
-	}
-
-	// set state of processor coroutine to active and the thread to inactive
-	enum coroutine_state old_state = __atomic_exchange_n(&thrd_dst->state, Inactive, __ATOMIC_SEQ_CST);
-	switch(old_state) {
-		case Halted:
-			// The thread has halted, it should never be scheduled/run again, leave it back to Halted and move on
-			thrd_dst->state = Halted;
-			break;
-		case Active:
-			// This is case 1, the regular case, nothing more is needed
-			break;
-		case Rerun:
-			// This is case 2, the racy case, someone tried to run this thread before it finished blocking
-			// In this case, just run it again.
-			goto RUN;
-		case Reschedule:
-			// This is case 3, someone tried to run this before it finished blocking
-			// but it must go through the ready-queue
-			thrd_dst->state = Inactive;  /*restore invariant */
+
+		/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+
+
+		// We just finished running a thread, there are a few things that could have happened.
+		// 1 - Regular case : the thread has blocked and now one has scheduled it yet.
+		// 2 - Racy case    : the thread has blocked but someone has already tried to schedule it.
+		// 3 - Polite Racy case : the thread has blocked, someone has already tried to schedule it, but the thread is nice and wants to go through the ready-queue any way
+		// 4 - Preempted
+		// In case 1, we may have won a race so we can't write to the state again.
+		// In case 2, we lost the race so we now own the thread.
+		// In case 3, we lost the race but can just reschedule the thread.
+
+		if(unlikely(thrd_dst->preempted != __NO_PREEMPTION)) {
+			// The thread was preempted, reschedule it and reset the flag
 			ScheduleThread( thrd_dst );
-			break;
-		case Inactive:
-		case Start:
-		case Primed:
-		default:
-			// This makes no sense, something is wrong abort
-			abort("Finished running a thread that was Inactive/Start/Primed %d\n", old_state);
+			break RUNNING;
+		}
+
+		// set state of processor coroutine to active and the thread to inactive
+		static_assert(sizeof(thrd_dst->state) == sizeof(int));
+		enum coroutine_state old_state = __atomic_exchange_n(&thrd_dst->state, Inactive, __ATOMIC_SEQ_CST);
+		switch(old_state) {
+			case Halted:
+				// The thread has halted, it should never be scheduled/run again, leave it back to Halted and move on
+				thrd_dst->state = Halted;
+				break RUNNING;
+			case Active:
+				// This is case 1, the regular case, nothing more is needed
+				break RUNNING;
+			case Rerun:
+				// This is case 2, the racy case, someone tried to run this thread before it finished blocking
+				// In this case, just run it again.
+				continue RUNNING;
+			case Reschedule:
+				// This is case 3, someone tried to run this before it finished blocking
+				// but it must go through the ready-queue
+				thrd_dst->state = Inactive;  /*restore invariant */
+				ScheduleThread( thrd_dst );
+				break RUNNING;
+			default:
+				// This makes no sense, something is wrong abort
+				abort("Finished running a thread that was Inactive/Start/Primed %d\n", old_state);
+		}
 	}
 
@@ -398,5 +398,5 @@
 // KERNEL_ONLY
 static void returnToKernel() {
-	verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
 	coroutine_desc * proc_cor = get_coroutine(kernelTLS.this_processor->runner);
 	thread_desc * thrd_src = kernelTLS.this_thread;
@@ -416,39 +416,5 @@
 	}
 
-	verify( ! kernelTLS.preemption_state.enabled );
-}
-
-// KERNEL_ONLY
-// Once a thread has finished running, some of
-// its final actions must be executed from the kernel
-static void finishRunning(processor * this) with( this->finish ) {
-	verify( ! kernelTLS.preemption_state.enabled );
-	verify( action_code == No_Action );
-	choose( action_code ) {
-	case No_Action:
-		break;
-	case Release:
-		unlock( *lock );
-	case Schedule:
-		ScheduleThread( thrd );
-	case Release_Schedule:
-		unlock( *lock );
-		ScheduleThread( thrd );
-	case Release_Multi:
-		for(int i = 0; i < lock_count; i++) {
-			unlock( *locks[i] );
-		}
-	case Release_Multi_Schedule:
-		for(int i = 0; i < lock_count; i++) {
-			unlock( *locks[i] );
-		}
-		for(int i = 0; i < thrd_count; i++) {
-			ScheduleThread( thrds[i] );
-		}
-	case Callback:
-		callback();
-	default:
-		abort("KERNEL ERROR: Unexpected action to run after thread");
-	}
+	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
 }
 
@@ -581,9 +547,13 @@
 //-----------------------------------------------------------------------------
 // Scheduler routines
-
 // KERNEL ONLY
 void ScheduleThread( thread_desc * thrd ) with( *thrd->curr_cluster ) {
 	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-	/* paranoid */ verifyf( thrd->state == Inactive || thrd->state == Start || thrd->preempted, "state : %d, preempted %d\n", thrd->state, thrd->preempted);
+	/* paranoid */ #if defined( __CFA_WITH_VERIFY__ )
+	/* paranoid */ if( thrd->state == Inactive || thrd->state == Start ) assertf( thrd->preempted == __NO_PREEMPTION,
+	                  "Error inactive thread marked as preempted, state %d, preemption %d\n", thrd->state, thrd->preempted );
+	/* paranoid */ if( thrd->preempted != __NO_PREEMPTION ) assertf(thrd->state == Active || thrd->state == Rerun || thrd->state == Reschedule,
+	                  "Error preempted thread marked as not currently running, state %d, preemption %d\n", thrd->state, thrd->preempted );
+	/* paranoid */ #endif
 	/* paranoid */ verifyf( thrd->next == 0p, "Expected null got %p", thrd->next );
 
@@ -608,102 +578,93 @@
 
 // KERNEL ONLY
-thread_desc * nextThread(cluster * this) with( *this ) {
-	verify( ! kernelTLS.preemption_state.enabled );
+static thread_desc * nextThread(cluster * this) with( *this ) {
+	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+
 	lock( ready_queue_lock __cfaabi_dbg_ctx2 );
 	thread_desc * head = pop_head( ready_queue );
 	unlock( ready_queue_lock );
-	verify( ! kernelTLS.preemption_state.enabled );
+
+	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
 	return head;
 }
 
-void BlockInternal() {
+void unpark( thread_desc * thrd, bool must_yield ) {
+	if( !thrd ) return;
+
+	enum coroutine_state new_state = must_yield ? Reschedule : Rerun;
+
 	disable_interrupts();
-	verify( ! kernelTLS.preemption_state.enabled );
+	static_assert(sizeof(thrd->state) == sizeof(int));
+	enum coroutine_state old_state = __atomic_exchange_n(&thrd->state, new_state, __ATOMIC_SEQ_CST);
+	switch(old_state) {
+		case Active:
+			// Wake won the race, the thread will reschedule/rerun itself
+			break;
+		case Inactive:
+			/* paranoid */ verify( ! thrd->preempted != __NO_PREEMPTION );
+
+			// Wake lost the race,
+			thrd->state = Inactive;
+			ScheduleThread( thrd );
+			break;
+		case Rerun:
+		case Reschedule:
+			abort("More than one thread attempted to schedule thread %p\n", thrd);
+			break;
+		case Halted:
+		case Start:
+		case Primed:
+		default:
+			// This makes no sense, something is wrong abort
+			abort();
+	}
+	enable_interrupts( __cfaabi_dbg_ctx );
+}
+
+void park( void ) {
+	/* paranoid */ verify( kernelTLS.preemption_state.enabled );
+	disable_interrupts();
+	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( kernelTLS.this_thread->preempted == __NO_PREEMPTION );
+
 	returnToKernel();
-	verify( ! kernelTLS.preemption_state.enabled );
+
+	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
 	enable_interrupts( __cfaabi_dbg_ctx );
-}
-
-void BlockInternal( __spinlock_t * lock ) {
+	/* paranoid */ verify( kernelTLS.preemption_state.enabled );
+
+}
+
+// KERNEL ONLY
+void LeaveThread() {
+	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	returnToKernel();
+}
+
+// KERNEL ONLY
+bool force_yield( __Preemption_Reason reason ) {
+	/* paranoid */ verify( kernelTLS.preemption_state.enabled );
 	disable_interrupts();
-	unlock( *lock );
-
-	verify( ! kernelTLS.preemption_state.enabled );
-	returnToKernel();
-	verify( ! kernelTLS.preemption_state.enabled );
-
-	enable_interrupts( __cfaabi_dbg_ctx );
-}
-
-void BlockInternal( thread_desc * thrd ) {
-	disable_interrupts();
-	WakeThread( thrd, false );
-
-	verify( ! kernelTLS.preemption_state.enabled );
-	returnToKernel();
-	verify( ! kernelTLS.preemption_state.enabled );
-
-	enable_interrupts( __cfaabi_dbg_ctx );
-}
-
-void BlockInternal( __spinlock_t * lock, thread_desc * thrd ) {
-	disable_interrupts();
-	unlock( *lock );
-	WakeThread( thrd, false );
-
-	verify( ! kernelTLS.preemption_state.enabled );
-	returnToKernel();
-	verify( ! kernelTLS.preemption_state.enabled );
-
-	enable_interrupts( __cfaabi_dbg_ctx );
-}
-
-void BlockInternal(__spinlock_t * locks [], unsigned short count) {
-	disable_interrupts();
-	for(int i = 0; i < count; i++) {
-		unlock( *locks[i] );
-	}
-
-	verify( ! kernelTLS.preemption_state.enabled );
-	returnToKernel();
-	verify( ! kernelTLS.preemption_state.enabled );
-
-	enable_interrupts( __cfaabi_dbg_ctx );
-}
-
-void BlockInternal(__spinlock_t * locks [], unsigned short lock_count, thread_desc * thrds [], unsigned short thrd_count) {
-	disable_interrupts();
-	for(int i = 0; i < lock_count; i++) {
-		unlock( *locks[i] );
-	}
-	for(int i = 0; i < thrd_count; i++) {
-		WakeThread( thrds[i], false );
-	}
-
-	verify( ! kernelTLS.preemption_state.enabled );
-	returnToKernel();
-	verify( ! kernelTLS.preemption_state.enabled );
-
-	enable_interrupts( __cfaabi_dbg_ctx );
-}
-
-void BlockInternal(__finish_callback_fptr_t callback) {
-	disable_interrupts();
-	callback();
-
-	verify( ! kernelTLS.preemption_state.enabled );
-	returnToKernel();
-	verify( ! kernelTLS.preemption_state.enabled );
-
-	enable_interrupts( __cfaabi_dbg_ctx );
-}
-
-// KERNEL ONLY
-void LeaveThread(__spinlock_t * lock, thread_desc * thrd) {
-	verify( ! kernelTLS.preemption_state.enabled );
-	unlock( *lock );
-	WakeThread( thrd, false );
-
-	returnToKernel();
+	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+
+	thread_desc * thrd = kernelTLS.this_thread;
+	/* paranoid */ verify(thrd->state == Active || thrd->state == Rerun || thrd->state == Reschedule);
+
+	// SKULLDUGGERY: It is possible that we are preempting this thread just before
+	// it was going to park itself. If that is the case and it is already using the
+	// intrusive fields then we can't use them to preempt the thread
+	// If that is the case, abandon the preemption.
+	bool preempted = false;
+	if(thrd->next == 0p) {
+		preempted = true;
+		thrd->preempted = reason;
+		returnToKernel();
+	}
+
+	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	enable_interrupts_noPoll();
+	/* paranoid */ verify( kernelTLS.preemption_state.enabled );
+
+	return preempted;
 }
 
@@ -939,5 +900,6 @@
 
 		// atomically release spin lock and block
-		BlockInternal( &lock );
+		unlock( lock );
+		park();
 	}
 	else {
@@ -958,5 +920,5 @@
 
 	// make new owner
-	WakeThread( thrd, false );
+	unpark( thrd );
 }
 
Index: libcfa/src/concurrency/kernel_private.hfa
===================================================================
--- libcfa/src/concurrency/kernel_private.hfa	(revision 9f575eacc1e359b231c369fde8ace4f941121f26)
+++ libcfa/src/concurrency/kernel_private.hfa	(revision 3381ed7a703d0c964d6e94bffab7c58f0cdf0d17)
@@ -32,45 +32,9 @@
 
 void ScheduleThread( thread_desc * ) __attribute__((nonnull (1)));
-static inline void WakeThread( thread_desc * thrd, bool must_yield ) {
-	if( !thrd ) return;
-
-	enum coroutine_state new_state = must_yield ? Reschedule : Rerun;
-
-	disable_interrupts();
-	static_assert(sizeof(thrd->state) == sizeof(int));
-	enum coroutine_state old_state = (enum coroutine_state)__atomic_exchange_n((volatile int *)&thrd->state, (int)new_state, __ATOMIC_SEQ_CST);
-	switch(old_state) {
-		case Active:
-			// Wake won the race, the thread will reschedule/rerun itself
-			break;
-		case Inactive:
-			// Wake lost the race,
-			thrd->state = Inactive;
-			ScheduleThread( thrd );
-			break;
-		case Rerun:
-		case Reschedule:
-			abort("More than one thread attempted to schedule thread %p\n", thrd);
-			break;
-		case Halted:
-		case Start:
-		case Primed:
-		default:
-			// This makes no sense, something is wrong abort
-			abort();
-	}
-	enable_interrupts( __cfaabi_dbg_ctx );
-}
-thread_desc * nextThread(cluster * this);
 
 //Block current thread and release/wake-up the following resources
-void BlockInternal(void);
-void BlockInternal(__spinlock_t * lock);
-void BlockInternal(thread_desc * thrd);
-void BlockInternal(__spinlock_t * lock, thread_desc * thrd);
-void BlockInternal(__spinlock_t * locks [], unsigned short count);
-void BlockInternal(__spinlock_t * locks [], unsigned short count, thread_desc * thrds [], unsigned short thrd_count);
-void BlockInternal(__finish_callback_fptr_t callback);
-void LeaveThread(__spinlock_t * lock, thread_desc * thrd);
+void LeaveThread();
+
+bool force_yield( enum __Preemption_Reason );
 
 //-----------------------------------------------------------------------------
Index: libcfa/src/concurrency/monitor.cfa
===================================================================
--- libcfa/src/concurrency/monitor.cfa	(revision 9f575eacc1e359b231c369fde8ace4f941121f26)
+++ libcfa/src/concurrency/monitor.cfa	(revision 3381ed7a703d0c964d6e94bffab7c58f0cdf0d17)
@@ -27,10 +27,10 @@
 //-----------------------------------------------------------------------------
 // Forward declarations
-static inline void set_owner ( monitor_desc * this, thread_desc * owner );
-static inline void set_owner ( monitor_desc * storage [], __lock_size_t count, thread_desc * owner );
+static inline void set_owner ( monitor_desc * this, thread_desc * owner, enum __Owner_Reason );
+static inline void set_owner ( monitor_desc * storage [], __lock_size_t count, thread_desc * owner, enum __Owner_Reason );
 static inline void set_mask  ( monitor_desc * storage [], __lock_size_t count, const __waitfor_mask_t & mask );
 static inline void reset_mask( monitor_desc * this );
 
-static inline thread_desc * next_thread( monitor_desc * this );
+static inline thread_desc * next_thread( monitor_desc * this, enum __Owner_Reason );
 static inline bool is_accepted( monitor_desc * this, const __monitor_group_t & monitors );
 
@@ -94,5 +94,5 @@
 		if( !this->owner ) {
 			// No one has the monitor, just take it
-			set_owner( this, thrd );
+			set_owner( this, thrd, __ENTER_FREE );
 
 			__cfaabi_dbg_print_safe( "Kernel :  mon is free \n" );
@@ -106,5 +106,5 @@
 		else if( is_accepted( this, group) ) {
 			// Some one was waiting for us, enter
-			set_owner( this, thrd );
+			set_owner( this, thrd, __ENTER_ACCEPT );
 
 			// Reset mask
@@ -117,15 +117,21 @@
 
 			// Some one else has the monitor, wait in line for it
+			/* paranoid */ verify( thrd->next == 0p );
 			append( this->entry_queue, thrd );
-
-			BlockInternal( &this->lock );
+			/* paranoid */ verify( thrd->next == 1p );
+
+			unlock( this->lock );
+			park();
 
 			__cfaabi_dbg_print_safe( "Kernel : %10p Entered  mon %p\n", thrd, this);
 
-			// BlockInternal will unlock spinlock, no need to unlock ourselves
+			/* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
 			return;
 		}
 
 		__cfaabi_dbg_print_safe( "Kernel : %10p Entered  mon %p\n", thrd, this);
+
+		/* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+		/* paranoid */ verify( this->lock.lock );
 
 		// Release the lock and leave
@@ -147,5 +153,7 @@
 
 			// No one has the monitor, just take it
-			set_owner( this, thrd );
+			set_owner( this, thrd, __ENTER_DTOR_FREE );
+
+			verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
 
 			unlock( this->lock );
@@ -166,5 +174,5 @@
 			// Wake the thread that is waiting for this
 			__condition_criterion_t * urgent = pop( this->signal_stack );
-			verify( urgent );
+			/* paranoid */ verify( urgent );
 
 			// Reset mask
@@ -175,8 +183,15 @@
 
 			// Some one else has the monitor, wait for him to finish and then run
-			BlockInternal( &this->lock, urgent->owner->waiting_thread );
+			unlock( this->lock );
+
+			// Release the next thread
+			/* paranoid */ verifyf( urgent->owner->waiting_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+			unpark( urgent->owner->waiting_thread );
+
+			// Park current thread waiting
+			park();
 
 			// Some one was waiting for us, enter
-			set_owner( this, thrd );
+			/* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
 		}
 		else {
@@ -187,8 +202,13 @@
 
 			// Some one else has the monitor, wait in line for it
+			/* paranoid */ verify( thrd->next == 0p );
 			append( this->entry_queue, thrd );
-			BlockInternal( &this->lock );
-
-			// BlockInternal will unlock spinlock, no need to unlock ourselves
+			/* paranoid */ verify( thrd->next == 1p );
+			unlock( this->lock );
+
+			// Park current thread waiting
+			park();
+
+			/* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
 			return;
 		}
@@ -205,5 +225,5 @@
 		__cfaabi_dbg_print_safe( "Kernel : %10p Leaving mon %p (%p)\n", kernelTLS.this_thread, this, this->owner);
 
-		verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+		/* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
 
 		// Leaving a recursion level, decrement the counter
@@ -219,5 +239,9 @@
 
 		// Get the next thread, will be null on low contention monitor
-		thread_desc * new_owner = next_thread( this );
+		thread_desc * new_owner = next_thread( this, __LEAVE );
+
+		// Check the new owner is consistent with who we wake-up
+		// new_owner might be null even if someone owns the monitor when the owner is still waiting for another monitor
+		/* paranoid */ verifyf( !new_owner || new_owner == this->owner, "Expected owner to be %p, got %p (m: %p)", new_owner, this->owner, this );
 
 		// We can now let other threads in safely
@@ -225,5 +249,6 @@
 
 		//We need to wake-up the thread
-		WakeThread( new_owner, false );
+		/* paranoid */ verifyf( !new_owner || new_owner == this->owner, "Expected owner to be %p, got %p (m: %p)", new_owner, this->owner, this );
+		unpark( new_owner );
 	}
 
@@ -254,5 +279,5 @@
 		thrd->self_cor.state = Halted;
 
-		verifyf( thrd == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", thrd, this->owner, this->recursion, this );
+		/* paranoid */ verifyf( thrd == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", thrd, this->owner, this->recursion, this );
 
 		// Leaving a recursion level, decrement the counter
@@ -264,10 +289,17 @@
 
 		// Fetch the next thread, can be null
-		thread_desc * new_owner = next_thread( this );
+		thread_desc * new_owner = next_thread( this, __LEAVE_THREAD );
+
+		// Release the monitor lock
+		unlock( this->lock );
+
+		// Unpark the next owner if needed
+		/* paranoid */ verifyf( !new_owner || new_owner == this->owner, "Expected owner to be %p, got %p (m: %p)", new_owner, this->owner, this );
+		unpark( new_owner );
 
 		// Leave the thread, this will unlock the spinlock
-		// Use leave thread instead of BlockInternal which is
-		// specialized for this case and supports null new_owner
-		LeaveThread( &this->lock, new_owner );
+		// Use leave thread instead of park which is
+		// specialized for this case
+		LeaveThread();
 
 		// Control flow should never reach here!
@@ -400,5 +432,7 @@
 	// Append the current wait operation to the ones already queued on the condition
 	// We don't need locks for that since conditions must always be waited on inside monitor mutual exclusion
+	/* paranoid */ verify( waiter.next == 0p );
 	append( this.blocked, &waiter );
+	/* paranoid */ verify( waiter.next == 1p );
 
 	// Lock all monitors (aggregates the locks as well)
@@ -415,10 +449,20 @@
 	// Remove any duplicate threads
 	for( __lock_size_t i = 0; i < count; i++) {
-		thread_desc * new_owner = next_thread( monitors[i] );
+		thread_desc * new_owner = next_thread( monitors[i], __WAIT );
 		insert_unique( threads, thread_count, new_owner );
 	}
 
+	// Unlock the locks, we don't need them anymore
+	for(int i = 0; i < count; i++) {
+		unlock( *locks[i] );
+	}
+
+	// Wake the threads
+	for(int i = 0; i < thread_count; i++) {
+		unpark( threads[i] );
+	}
+
 	// Everything is ready to go to sleep
-	BlockInternal( locks, count, threads, thread_count );
+	park();
 
 	// We are back, restore the owners and recursions
@@ -490,10 +534,17 @@
 	//Find the thread to run
 	thread_desc * signallee = pop_head( this.blocked )->waiting_thread;
-	set_owner( monitors, count, signallee );
+	/* paranoid */ verify( signallee->next == 0p );
+	set_owner( monitors, count, signallee, __ENTER_SIGNAL_BLOCK );
 
 	__cfaabi_dbg_print_buffer_decl( "Kernel : signal_block condition %p (s: %p)\n", &this, signallee );
 
+	// unlock all the monitors
+	unlock_all( locks, count );
+
+	// unpark the thread we signalled
+	unpark( signallee );
+
 	//Everything is ready to go to sleep
-	BlockInternal( locks, count, &signallee, 1 );
+	park();
 
 
@@ -590,8 +641,14 @@
 
 				// Set the owners to be the next thread
-				set_owner( monitors, count, next );
-
-				// Everything is ready to go to sleep
-				BlockInternal( locks, count, &next, 1 );
+				set_owner( monitors, count, next, __WAITFOR );
+
+				// unlock all the monitors
+				unlock_all( locks, count );
+
+				// unpark the thread we signalled
+				unpark( next );
+
+				//Everything is ready to go to sleep
+				park();
 
 				// We are back, restore the owners and recursions
@@ -631,6 +688,9 @@
 	}
 
+	// unlock all the monitors
+	unlock_all( locks, count );
+
 	//Everything is ready to go to sleep
-	BlockInternal( locks, count );
+	park();
 
 
@@ -649,9 +709,10 @@
 // Utilities
 
-static inline void set_owner( monitor_desc * this, thread_desc * owner ) {
-	// __cfaabi_dbg_print_safe( "Kernal :   Setting owner of %p to %p ( was %p)\n", this, owner, this->owner );
+static inline void set_owner( monitor_desc * this, thread_desc * owner, enum __Owner_Reason reason ) {
+	/* paranoid */ verify( this->lock.lock );
 
 	//Pass the monitor appropriately
 	this->owner = owner;
+	this->owner_reason = reason;
 
 	//We are passing the monitor to someone else, which means recursion level is not 0
@@ -659,10 +720,16 @@
 }
 
-static inline void set_owner( monitor_desc * monitors [], __lock_size_t count, thread_desc * owner ) {
-	monitors[0]->owner     = owner;
-	monitors[0]->recursion = 1;
+static inline void set_owner( monitor_desc * monitors [], __lock_size_t count, thread_desc * owner, enum __Owner_Reason reason ) {
+	/* paranoid */ verify ( monitors[0]->lock.lock );
+	/* paranoid */ verifyf( monitors[0]->owner == kernelTLS.this_thread, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, monitors[0]->owner, monitors[0]->recursion, monitors[0] );
+	monitors[0]->owner        = owner;
+	monitors[0]->owner_reason = reason;
+	monitors[0]->recursion    = 1;
 	for( __lock_size_t i = 1; i < count; i++ ) {
-		monitors[i]->owner     = owner;
-		monitors[i]->recursion = 0;
+		/* paranoid */ verify ( monitors[i]->lock.lock );
+		/* paranoid */ verifyf( monitors[i]->owner == kernelTLS.this_thread, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, monitors[i]->owner, monitors[i]->recursion, monitors[i] );
+		monitors[i]->owner        = owner;
+		monitors[i]->owner_reason = reason;
+		monitors[i]->recursion    = 0;
 	}
 }
@@ -680,5 +747,5 @@
 }
 
-static inline thread_desc * next_thread( monitor_desc * this ) {
+static inline thread_desc * next_thread( monitor_desc * this, enum __Owner_Reason reason ) {
 	//Check the signaller stack
 	__cfaabi_dbg_print_safe( "Kernel :  mon %p AS-stack top %p\n", this, this->signal_stack.top);
@@ -688,5 +755,6 @@
 		//regardless of if we are ready to baton pass,
 		//we need to set the monitor as in use
-		set_owner( this,  urgent->owner->waiting_thread );
+		/* paranoid */ verifyf( !this->owner || kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+		set_owner( this,  urgent->owner->waiting_thread, reason );
 
 		return check_condition( urgent );
@@ -696,5 +764,7 @@
 	// Get the next thread in the entry_queue
 	thread_desc * new_owner = pop_head( this->entry_queue );
-	set_owner( this, new_owner );
+	/* paranoid */ verifyf( !this->owner || kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+	/* paranoid */ verify( !new_owner || new_owner->next == 0p );
+	set_owner( this, new_owner, reason );
 
 	return new_owner;
@@ -841,5 +911,5 @@
 	// For each thread in the entry-queue
 	for(	thread_desc ** thrd_it = &entry_queue.head;
-		*thrd_it;
+		*thrd_it != 1p;
 		thrd_it = &(*thrd_it)->next
 	) {
Index: libcfa/src/concurrency/monitor.hfa
===================================================================
--- libcfa/src/concurrency/monitor.hfa	(revision 9f575eacc1e359b231c369fde8ace4f941121f26)
+++ libcfa/src/concurrency/monitor.hfa	(revision 3381ed7a703d0c964d6e94bffab7c58f0cdf0d17)
@@ -32,4 +32,5 @@
 	signal_stack{};
 	owner         = 0p;
+	owner_reason  = __NO_OWNER;
 	recursion     = 0;
 	mask.accepted = 0p;
@@ -133,5 +134,5 @@
               bool signal      ( condition & this );
               bool signal_block( condition & this );
-static inline bool is_empty    ( condition & this ) { return !this.blocked.head; }
+static inline bool is_empty    ( condition & this ) { return this.blocked.head == 1p; }
          uintptr_t front       ( condition & this );
 
Index: libcfa/src/concurrency/mutex.cfa
===================================================================
--- libcfa/src/concurrency/mutex.cfa	(revision 9f575eacc1e359b231c369fde8ace4f941121f26)
+++ libcfa/src/concurrency/mutex.cfa	(revision 3381ed7a703d0c964d6e94bffab7c58f0cdf0d17)
@@ -40,5 +40,6 @@
 	if( is_locked ) {
 		append( blocked_threads, kernelTLS.this_thread );
-		BlockInternal( &lock );
+		unlock( lock );
+		park();
 	}
 	else {
@@ -62,6 +63,6 @@
 	lock( this.lock __cfaabi_dbg_ctx2 );
 	this.is_locked = (this.blocked_threads != 0);
-	WakeThread(
-		pop_head( this.blocked_threads ), false
+	unpark(
+		pop_head( this.blocked_threads )
 	);
 	unlock( this.lock );
@@ -94,5 +95,6 @@
 	else {
 		append( blocked_threads, kernelTLS.this_thread );
-		BlockInternal( &lock );
+		unlock( lock );
+		park();
 	}
 }
@@ -121,5 +123,5 @@
 		owner = thrd;
 		recursion_count = (thrd ? 1 : 0);
-		WakeThread( thrd, false );
+		unpark( thrd );
 	}
 	unlock( lock );
@@ -138,6 +140,6 @@
 void notify_one(condition_variable & this) with(this) {
 	lock( lock __cfaabi_dbg_ctx2 );
-	WakeThread(
-		pop_head( this.blocked_threads ), false
+	unpark(
+		pop_head( this.blocked_threads )
 	);
 	unlock( lock );
@@ -147,6 +149,6 @@
 	lock( lock __cfaabi_dbg_ctx2 );
 	while(this.blocked_threads) {
-		WakeThread(
-			pop_head( this.blocked_threads ), false
+		unpark(
+			pop_head( this.blocked_threads )
 		);
 	}
@@ -157,5 +159,6 @@
 	lock( this.lock __cfaabi_dbg_ctx2 );
 	append( this.blocked_threads, kernelTLS.this_thread );
-	BlockInternal( &this.lock );
+	unlock( this.lock );
+	park();
 }
 
@@ -164,9 +167,7 @@
 	lock( this.lock __cfaabi_dbg_ctx2 );
 	append( this.blocked_threads, kernelTLS.this_thread );
-	void __unlock(void) {
-		unlock(l);
-		unlock(this.lock);
-	}
-	BlockInternal( __unlock );
+	unlock(l);
+	unlock(this.lock);
+	park();
 	lock(l);
 }
Index: libcfa/src/concurrency/preemption.cfa
===================================================================
--- libcfa/src/concurrency/preemption.cfa	(revision 9f575eacc1e359b231c369fde8ace4f941121f26)
+++ libcfa/src/concurrency/preemption.cfa	(revision 3381ed7a703d0c964d6e94bffab7c58f0cdf0d17)
@@ -187,5 +187,4 @@
 	void enable_interrupts( __cfaabi_dbg_ctx_param ) {
 		processor   * proc = kernelTLS.this_processor; // Cache the processor now since interrupts can start happening after the atomic store
-		thread_desc * thrd = kernelTLS.this_thread;	  // Cache the thread now since interrupts can start happening after the atomic store
 
 		with( kernelTLS.preemption_state ){
@@ -209,5 +208,5 @@
 				if( proc->pending_preemption ) {
 					proc->pending_preemption = false;
-					BlockInternal( thrd );
+					force_yield( __POLL_PREEMPTION );
 				}
 			}
@@ -394,6 +393,5 @@
 	// Preemption can occur here
 
-	kernelTLS.this_thread->preempted = true;
-	BlockInternal(); // Do the actual CtxSwitch
+	force_yield( __ALARM_PREEMPTION ); // Do the actual CtxSwitch
 }
 
Index: libcfa/src/concurrency/thread.cfa
===================================================================
--- libcfa/src/concurrency/thread.cfa	(revision 9f575eacc1e359b231c369fde8ace4f941121f26)
+++ libcfa/src/concurrency/thread.cfa	(revision 3381ed7a703d0c964d6e94bffab7c58f0cdf0d17)
@@ -36,5 +36,5 @@
 	self_cor{ name, storage, storageSize };
 	state = Start;
-	preempted = false;
+	preempted = __NO_PREEMPTION;
 	curr_cor = &self_cor;
 	self_mon.owner = &this;
@@ -78,5 +78,4 @@
 void __thrd_start( T & this, void (*main_p)(T &) ) {
 	thread_desc * this_thrd = get_thread(this);
-	thread_desc * curr_thrd = TL_GET( this_thread );
 
 	disable_interrupts();
@@ -85,22 +84,7 @@
 	this_thrd->context.[SP, FP] = this_thrd->self_cor.context.[SP, FP];
 	verify( this_thrd->context.SP );
-	// CtxSwitch( &curr_thrd->context, &this_thrd->context );
 
 	ScheduleThread(this_thrd);
 	enable_interrupts( __cfaabi_dbg_ctx );
-}
-
-void yield( void ) {
-	// Safety note : This could cause some false positives due to preemption
-      verify( TL_GET( preemption_state.enabled ) );
-	BlockInternal( TL_GET( this_thread ) );
-	// Safety note : This could cause some false positives due to preemption
-      verify( TL_GET( preemption_state.enabled ) );
-}
-
-void yield( unsigned times ) {
-	for( unsigned i = 0; i < times; i++ ) {
-		yield();
-	}
 }
 
Index: libcfa/src/concurrency/thread.hfa
===================================================================
--- libcfa/src/concurrency/thread.hfa	(revision 9f575eacc1e359b231c369fde8ace4f941121f26)
+++ libcfa/src/concurrency/thread.hfa	(revision 3381ed7a703d0c964d6e94bffab7c58f0cdf0d17)
@@ -88,8 +88,41 @@
 void ^?{}( scoped(T)& this );
 
-void yield();
-void yield( unsigned times );
+//-----------------------------------------------------------------------------
+// Thread getters
+static inline struct thread_desc * active_thread () { return TL_GET( this_thread ); }
 
-static inline struct thread_desc * active_thread () { return TL_GET( this_thread ); }
+//-----------------------------------------------------------------------------
+// Scheduler API
+
+//----------
+// Park thread: block until corresponding call to unpark, won't block if unpark is already called
+void park( void );
+
+//----------
+// Unpark a thread, if the thread is already blocked, schedule it
+//                  if the thread is not yet block, signal that it should rerun immediately or reschedule itself
+void unpark( thread_desc * this, bool must_yield );
+
+static inline void unpark( thread_desc * this ) { unpark( this, false ); }
+
+forall( dtype T | is_thread(T) )
+static inline void unpark( T & this ) { if(!&this) return; unpark( get_thread( this ), false );}
+
+forall( dtype T | is_thread(T) )
+static inline void unpark( T & this, bool must_yield ) { if(!&this) return; unpark( get_thread( this ), must_yield );}
+
+//----------
+// Yield: force thread to block and be rescheduled
+static inline void yield() {
+	unpark( active_thread(), true );
+	park();
+}
+
+// Yield: yield N times
+static inline void yield( unsigned times ) {
+	for( times ) {
+		yield();
+	}
+}
 
 // Local Variables: //