Index: src/ControlStruct/ExceptTranslate.cc
===================================================================
--- src/ControlStruct/ExceptTranslate.cc	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/ControlStruct/ExceptTranslate.cc	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -10,6 +10,6 @@
 // Created On       : Wed Jun 14 16:49:00 2017
 // Last Modified By : Andrew Beach
-// Last Modified On : Tus Jul 11 16:33:00 2017
-// Update Count     : 2
+// Last Modified On : Wed Jul 12 15:07:00 2017
+// Update Count     : 3
 //
 
@@ -593,7 +593,5 @@
 
 		PassVisitor<ExceptionMutatorCore> translator;
-		for ( Declaration * decl : translationUnit ) {
-			decl->acceptMutator( translator );
-		}
+		mutateAll( translationUnit, translator );
 	}
 }
Index: src/benchmark/CorCtxSwitch.c
===================================================================
--- src/benchmark/CorCtxSwitch.c	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/benchmark/CorCtxSwitch.c	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -31,8 +31,4 @@
 
 	StartTime = Time();
-	// for ( volatile unsigned int i = 0; i < NoOfTimes; i += 1 ) {
-	// 	resume( this_coroutine() );
-	// 	// resume( &s );	
-	// }
 	resumer( &s, NoOfTimes );
 	EndTime = Time();
Index: src/benchmark/bench.h
===================================================================
--- src/benchmark/bench.h	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/benchmark/bench.h	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -26,2 +26,6 @@
 #define N 10000000
 #endif
+
+unsigned int default_preemption() {
+	return 0;
+}
Index: src/benchmark/csv-data.c
===================================================================
--- src/benchmark/csv-data.c	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/benchmark/csv-data.c	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -25,8 +25,4 @@
 }
 
-#ifndef N
-#define N 100000000
-#endif
-
 //-----------------------------------------------------------------------------
 // coroutine context switch
@@ -38,8 +34,4 @@
 
 	StartTime = Time();
-	// for ( volatile unsigned int i = 0; i < NoOfTimes; i += 1 ) {
-	// 	resume( this_coroutine() );
-	// 	// resume( &s );
-	// }
 	resumer( &s, NoOfTimes );
 	EndTime = Time();
@@ -104,5 +96,5 @@
 mon_t mon1;
 
-condition cond1a; 
+condition cond1a;
 condition cond1b;
 
@@ -152,5 +144,5 @@
 mon_t mon2;
 
-condition cond2a; 
+condition cond2a;
 condition cond2b;
 
Index: src/benchmark/interrupt_linux.c
===================================================================
--- src/benchmark/interrupt_linux.c	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
+++ src/benchmark/interrupt_linux.c	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -0,0 +1,35 @@
+#include <pthread.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#define __CFA_SIGCXT__ ucontext_t *
+#define __CFA_SIGPARMS__ __attribute__((unused)) int sig, __attribute__((unused)) siginfo_t *sfp, __attribute__((unused)) __CFA_SIGCXT__ cxt
+
+void sigHandler( __CFA_SIGPARMS__ ) {
+	sigset_t mask;
+	sigemptyset( &mask );
+	sigaddset( &mask, SIGUSR1 );
+
+	if ( sigprocmask( SIG_UNBLOCK, &mask, NULL ) == -1 ) {
+		abort();
+	} // if
+}
+
+int main() {
+
+	struct sigaction act;
+
+	act.sa_sigaction = (void (*)(int, siginfo_t *, void *))sigHandler;
+	sigemptyset( &act.sa_mask );
+	sigaddset( &act.sa_mask, SIGUSR1 );
+
+	act.sa_flags = SA_SIGINFO;
+
+	if ( sigaction( SIGUSR1, &act, NULL ) == -1 ) {
+		abort();
+	} // if
+
+	for( int i = 0; i < 50000000ul; i++ ) {
+		pthread_kill( pthread_self(), SIGUSR1 );
+	}
+}
Index: src/libcfa/concurrency/alarm.c
===================================================================
--- src/libcfa/concurrency/alarm.c	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/libcfa/concurrency/alarm.c	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -16,7 +16,13 @@
 
 extern "C" {
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
 #include <time.h>
+#include <unistd.h>
 #include <sys/time.h>
 }
+
+#include "libhdr.h"
 
 #include "alarm.h"
@@ -31,8 +37,11 @@
 	timespec curr;
 	clock_gettime( CLOCK_REALTIME, &curr );
-	return ((__cfa_time_t)curr.tv_sec * TIMEGRAN) + curr.tv_nsec;
+	__cfa_time_t curr_time = ((__cfa_time_t)curr.tv_sec * TIMEGRAN) + curr.tv_nsec;
+	// LIB_DEBUG_PRINT_BUFFER_DECL( STDERR_FILENO, "Kernel : current time is %lu\n", curr_time );
+	return curr_time;
 }
 
 void __kernel_set_timer( __cfa_time_t alarm ) {
+	LIB_DEBUG_PRINT_BUFFER_DECL( STDERR_FILENO, "Kernel : set timer to %lu\n", (__cfa_time_t)alarm );
 	itimerval val;
 	val.it_value.tv_sec = alarm / TIMEGRAN;			// seconds
@@ -71,6 +80,15 @@
 }
 
+LIB_DEBUG_DO( bool validate( alarm_list_t * this ) {
+	alarm_node_t ** it = &this->head;
+	while( (*it) ) {
+		it = &(*it)->next;
+	}
+
+	return it == this->tail;
+})
+
 static inline void insert_at( alarm_list_t * this, alarm_node_t * n, __alarm_it_t p ) {
-	assert( !n->next );
+	verify( !n->next );
 	if( p == this->tail ) {
 		this->tail = &n->next;
@@ -80,4 +98,6 @@
 	}
 	*p = n;
+
+	verify( validate( this ) );
 }
 
@@ -89,4 +109,6 @@
 
 	insert_at( this, n, it );
+
+	verify( validate( this ) );
 }
 
@@ -100,4 +122,5 @@
 		head->next = NULL;
 	}
+	verify( validate( this ) );
 	return head;
 }
@@ -105,43 +128,60 @@
 static inline void remove_at( alarm_list_t * this, alarm_node_t * n, __alarm_it_t it ) {
 	verify( it );
-	verify( (*it)->next == n );
+	verify( (*it) == n );
 
-	(*it)->next = n->next;
+	(*it) = n->next;
 	if( !n-> next ) {
 		this->tail = it;
 	}
 	n->next = NULL;
+
+	verify( validate( this ) );
 }
 
 static inline void remove( alarm_list_t * this, alarm_node_t * n ) {
 	alarm_node_t ** it = &this->head;
-	while( (*it) && (*it)->next != n ) {
+	while( (*it) && (*it) != n ) {
 		it = &(*it)->next;
 	}
 
+	verify( validate( this ) );
+
 	if( *it ) { remove_at( this, n, it ); }
+
+	verify( validate( this ) );
 }
 
 void register_self( alarm_node_t * this ) {
 	disable_interrupts();
-	assert( !systemProcessor->pending_alarm );
-	lock( &systemProcessor->alarm_lock );
+	verify( !systemProcessor->pending_alarm );
+	lock( &systemProcessor->alarm_lock DEBUG_CTX2 );
 	{
+		verify( validate( &systemProcessor->alarms ) );
+		bool first = !systemProcessor->alarms.head;
+
 		insert( &systemProcessor->alarms, this );
 		if( systemProcessor->pending_alarm ) {
 			tick_preemption();
 		}
+		if( first ) {
+			__kernel_set_timer( systemProcessor->alarms.head->alarm - __kernel_get_time() );
+		}
 	}
 	unlock( &systemProcessor->alarm_lock );
 	this->set = true;
-	enable_interrupts();
+	enable_interrupts( DEBUG_CTX );
 }
 
 void unregister_self( alarm_node_t * this ) {
+	// LIB_DEBUG_PRINT_BUFFER_DECL( STDERR_FILENO, "Kernel : unregister %p start\n", this );
 	disable_interrupts();
-	lock( &systemProcessor->alarm_lock );
-	remove( &systemProcessor->alarms, this );
+	lock( &systemProcessor->alarm_lock DEBUG_CTX2 );
+	{
+		verify( validate( &systemProcessor->alarms ) );
+		remove( &systemProcessor->alarms, this );
+	}
 	unlock( &systemProcessor->alarm_lock );
-	disable_interrupts();
+	enable_interrupts( DEBUG_CTX );
 	this->set = false;
+	// LIB_DEBUG_PRINT_BUFFER_LOCAL( STDERR_FILENO, "Kernel : unregister %p end\n", this );
 }
Index: src/libcfa/concurrency/coroutine
===================================================================
--- src/libcfa/concurrency/coroutine	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/libcfa/concurrency/coroutine	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -63,5 +63,5 @@
 
 // Get current coroutine
-coroutine_desc * this_coroutine(void);
+extern volatile thread_local coroutine_desc * this_coroutine;
 
 // Private wrappers for context switch and stack creation
@@ -71,5 +71,5 @@
 // Suspend implementation inlined for performance
 static inline void suspend() {
-	coroutine_desc * src = this_coroutine();		// optimization
+	coroutine_desc * src = this_coroutine;		// optimization
 
 	assertf( src->last != 0,
@@ -88,5 +88,5 @@
 forall(dtype T | is_coroutine(T))
 static inline void resume(T * cor) {
-	coroutine_desc * src = this_coroutine();		// optimization
+	coroutine_desc * src = this_coroutine;		// optimization
 	coroutine_desc * dst = get_coroutine(cor);
 
@@ -112,5 +112,5 @@
 
 static inline void resume(coroutine_desc * dst) {
-	coroutine_desc * src = this_coroutine();		// optimization
+	coroutine_desc * src = this_coroutine;		// optimization
 
 	// not resuming self ?
Index: src/libcfa/concurrency/coroutine.c
===================================================================
--- src/libcfa/concurrency/coroutine.c	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/libcfa/concurrency/coroutine.c	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -32,5 +32,5 @@
 #include "invoke.h"
 
-extern thread_local processor * this_processor;
+extern volatile thread_local processor * this_processor;
 
 //-----------------------------------------------------------------------------
@@ -44,5 +44,5 @@
 // Coroutine ctors and dtors
 void ?{}(coStack_t* this) {
-	this->size		= 10240;	// size of stack
+	this->size		= 65000;	// size of stack
 	this->storage	= NULL;	// pointer to stack
 	this->limit		= NULL;	// stack grows towards stack limit
@@ -50,5 +50,5 @@
 	this->context	= NULL;	// address of cfa_context_t
 	this->top		= NULL;	// address of top of storage
-	this->userStack	= false;	
+	this->userStack	= false;
 }
 
@@ -106,12 +106,13 @@
 
 	// set state of current coroutine to inactive
-	src->state = Inactive;
+	src->state = src->state == Halted ? Halted : Inactive;
 
 	// set new coroutine that task is executing
-	this_processor->current_coroutine = dst;
+	this_coroutine = dst;
 
 	// context switch to specified coroutine
+	assert( src->stack.context );
 	CtxSwitch( src->stack.context, dst->stack.context );
-	// when CtxSwitch returns we are back in the src coroutine		
+	// when CtxSwitch returns we are back in the src coroutine
 
 	// set state of new coroutine to active
@@ -131,5 +132,5 @@
 		this->size = libCeiling( storageSize, 16 );
 		// use malloc/memalign because "new" raises an exception for out-of-memory
-		
+
 		// assume malloc has 8 byte alignment so add 8 to allow rounding up to 16 byte alignment
 		LIB_DEBUG_DO( this->storage = memalign( pageSize, cxtSize + this->size + pageSize ) );
Index: src/libcfa/concurrency/invoke.c
===================================================================
--- src/libcfa/concurrency/invoke.c	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/libcfa/concurrency/invoke.c	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -29,9 +29,11 @@
 
 extern void __suspend_internal(void);
-extern void __leave_monitor_desc( struct monitor_desc * this );
+extern void __leave_thread_monitor( struct thread_desc * this );
+extern void disable_interrupts();
+extern void enable_interrupts( DEBUG_CTX_PARAM );
 
 void CtxInvokeCoroutine(
-      void (*main)(void *), 
-      struct coroutine_desc *(*get_coroutine)(void *), 
+      void (*main)(void *),
+      struct coroutine_desc *(*get_coroutine)(void *),
       void *this
 ) {
@@ -56,23 +58,30 @@
 
 void CtxInvokeThread(
-      void (*dtor)(void *), 
-      void (*main)(void *), 
-      struct thread_desc *(*get_thread)(void *), 
+      void (*dtor)(void *),
+      void (*main)(void *),
+      struct thread_desc *(*get_thread)(void *),
       void *this
 ) {
+      // First suspend, once the thread arrives here,
+      // the function pointer to main can be invalidated without risk
       __suspend_internal();
 
+      // Fetch the thread handle from the user defined thread structure
       struct thread_desc* thrd = get_thread( this );
-      struct coroutine_desc* cor = &thrd->cor;
-      struct monitor_desc* mon = &thrd->mon;
-      cor->state = Active;
 
-      // LIB_DEBUG_PRINTF("Invoke Thread : invoking main %p (args %p)\n", main, this);
+      // Officially start the thread by enabling preemption
+      enable_interrupts( DEBUG_CTX );
+
+      // Call the main of the thread
       main( this );
 
-      __leave_monitor_desc( mon );
-
+      // To exit a thread we must :
+      // 1 - Mark it as halted
+      // 2 - Leave its monitor
+      // 3 - Disable the interupts
+      // 4 - Final suspend
+      // The order of these 4 operations is very important
       //Final suspend, should never return
-      __suspend_internal();
+      __leave_thread_monitor( thrd );
       abortf("Resumed dead thread");
 }
@@ -80,7 +89,7 @@
 
 void CtxStart(
-      void (*main)(void *), 
-      struct coroutine_desc *(*get_coroutine)(void *), 
-      void *this, 
+      void (*main)(void *),
+      struct coroutine_desc *(*get_coroutine)(void *),
+      void *this,
       void (*invoke)(void *)
 ) {
@@ -108,5 +117,5 @@
 	((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->rturn = invoke;
       ((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->mxcr = 0x1F80; //Vol. 2A 3-520
-      ((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->fcw = 0x037F;  //Vol. 1 8-7 
+      ((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->fcw = 0x037F;  //Vol. 1 8-7
 
 #elif defined( __x86_64__ )
@@ -128,5 +137,5 @@
       ((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->fixedRegisters[1] = invoke;
       ((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->mxcr = 0x1F80; //Vol. 2A 3-520
-      ((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->fcw = 0x037F;  //Vol. 1 8-7 
+      ((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->fcw = 0x037F;  //Vol. 1 8-7
 #else
       #error Only __i386__ and __x86_64__ is supported for threads in cfa
Index: src/libcfa/concurrency/invoke.h
===================================================================
--- src/libcfa/concurrency/invoke.h	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/libcfa/concurrency/invoke.h	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -31,4 +31,8 @@
       struct spinlock {
             volatile int lock;
+            #ifdef __CFA_DEBUG__
+                  const char * prev_name;
+                  void* prev_thrd;
+            #endif
       };
 
@@ -83,5 +87,4 @@
             struct __thread_queue_t entry_queue;      // queue of threads that are blocked waiting for the monitor
             struct __condition_stack_t signal_stack;  // stack of conditions to run next once we exit the monitor
-            struct monitor_desc * stack_owner;        // if bulk acquiring was used we need to synchronize signals with an other monitor
             unsigned int recursion;                   // monitor routines can be called recursively, we need to keep track of that
       };
Index: src/libcfa/concurrency/kernel
===================================================================
--- src/libcfa/concurrency/kernel	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/libcfa/concurrency/kernel	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -28,19 +28,20 @@
 //-----------------------------------------------------------------------------
 // Locks
-bool try_lock( spinlock * );
-void lock( spinlock * );
-void unlock( spinlock * );
+bool try_lock  ( spinlock * DEBUG_CTX_PARAM2 );
+void lock      ( spinlock * DEBUG_CTX_PARAM2 );
+void lock_yield( spinlock * DEBUG_CTX_PARAM2 );
+void unlock    ( spinlock * );
 
-struct signal_once {
-	volatile bool cond;
-	struct spinlock lock;
-	struct __thread_queue_t blocked;
+struct semaphore {
+	spinlock lock;
+	int count;
+	__thread_queue_t waiting;
 };
 
-void ?{}(signal_once * this);
-void ^?{}(signal_once * this);
+void  ?{}(semaphore * this, int count = 1);
+void ^?{}(semaphore * this);
+void P(semaphore * this);
+void V(semaphore * this);
 
-void wait( signal_once * );
-void signal( signal_once * );
 
 //-----------------------------------------------------------------------------
@@ -68,5 +69,5 @@
 	unsigned short thrd_count;
 };
-static inline void ?{}(FinishAction * this) { 
+static inline void ?{}(FinishAction * this) {
 	this->action_code = No_Action;
 	this->thrd = NULL;
@@ -78,9 +79,7 @@
 	struct processorCtx_t * runner;
 	cluster * cltr;
-	coroutine_desc * current_coroutine;
-	thread_desc * current_thread;
 	pthread_t kernel_thread;
-	
-	signal_once terminated;
+
+	semaphore terminated;
 	volatile bool is_terminated;
 
@@ -90,7 +89,7 @@
 	unsigned int preemption;
 
-	unsigned short disable_preempt_count;
+	bool pending_preemption;
 
-	bool pending_preemption;
+	char * last_enable;
 };
 
Index: src/libcfa/concurrency/kernel.c
===================================================================
--- src/libcfa/concurrency/kernel.c	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/libcfa/concurrency/kernel.c	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -15,12 +15,5 @@
 //
 
-#include "startup.h"
-
-//Start and stop routine for the kernel, declared first to make sure they run first
-void kernel_startup(void)  __attribute__(( constructor( STARTUP_PRIORITY_KERNEL ) ));
-void kernel_shutdown(void) __attribute__(( destructor ( STARTUP_PRIORITY_KERNEL ) ));
-
-//Header
-#include "kernel_private.h"
+#include "libhdr.h"
 
 //C Includes
@@ -35,6 +28,7 @@
 
 //CFA Includes
-#include "libhdr.h"
+#include "kernel_private.h"
 #include "preemption.h"
+#include "startup.h"
 
 //Private includes
@@ -42,7 +36,11 @@
 #include "invoke.h"
 
+//Start and stop routine for the kernel, declared first to make sure they run first
+void kernel_startup(void)  __attribute__(( constructor( STARTUP_PRIORITY_KERNEL ) ));
+void kernel_shutdown(void) __attribute__(( destructor ( STARTUP_PRIORITY_KERNEL ) ));
+
 //-----------------------------------------------------------------------------
 // Kernel storage
-#define KERNEL_STORAGE(T,X) static char X##_storage[sizeof(T)]
+#define KERNEL_STORAGE(T,X) static char X##Storage[sizeof(T)]
 
 KERNEL_STORAGE(processorCtx_t, systemProcessorCtx);
@@ -50,5 +48,5 @@
 KERNEL_STORAGE(system_proc_t, systemProcessor);
 KERNEL_STORAGE(thread_desc, mainThread);
-KERNEL_STORAGE(machine_context_t, mainThread_context);
+KERNEL_STORAGE(machine_context_t, mainThreadCtx);
 
 cluster * systemCluster;
@@ -59,13 +57,8 @@
 // Global state
 
-thread_local processor * this_processor;
-
-coroutine_desc * this_coroutine(void) {
-	return this_processor->current_coroutine;
-}
-
-thread_desc * this_thread(void) {
-	return this_processor->current_thread;
-}
+volatile thread_local processor * this_processor;
+volatile thread_local coroutine_desc * this_coroutine;
+volatile thread_local thread_desc * this_thread;
+volatile thread_local unsigned short disable_preempt_count = 1;
 
 //-----------------------------------------------------------------------------
@@ -91,5 +84,5 @@
 
 	this->limit = (void *)(((intptr_t)this->base) - this->size);
-	this->context = &mainThread_context_storage;
+	this->context = &mainThreadCtxStorage;
 	this->top = this->base;
 }
@@ -136,11 +129,8 @@
 void ?{}(processor * this, cluster * cltr) {
 	this->cltr = cltr;
-	this->current_coroutine = NULL;
-	this->current_thread = NULL;
-	(&this->terminated){};
+	(&this->terminated){ 0 };
 	this->is_terminated = false;
 	this->preemption_alarm = NULL;
 	this->preemption = default_preemption();
-	this->disable_preempt_count = 1;		//Start with interrupts disabled
 	this->pending_preemption = false;
 
@@ -150,15 +140,17 @@
 void ?{}(processor * this, cluster * cltr, processorCtx_t * runner) {
 	this->cltr = cltr;
-	this->current_coroutine = NULL;
-	this->current_thread = NULL;
-	(&this->terminated){};
+	(&this->terminated){ 0 };
 	this->is_terminated = false;
-	this->disable_preempt_count = 0;
+	this->preemption_alarm = NULL;
+	this->preemption = default_preemption();
 	this->pending_preemption = false;
+	this->kernel_thread = pthread_self();
 
 	this->runner = runner;
-	LIB_DEBUG_PRINT_SAFE("Kernel : constructing processor context %p\n", runner);
+	LIB_DEBUG_PRINT_SAFE("Kernel : constructing system processor context %p\n", runner);
 	runner{ this };
 }
+
+LIB_DEBUG_DO( bool validate( alarm_list_t * this ); )
 
 void ?{}(system_proc_t * this, cluster * cltr, processorCtx_t * runner) {
@@ -168,4 +160,6 @@
 
 	(&this->proc){ cltr, runner };
+
+	verify( validate( &this->alarms ) );
 }
 
@@ -174,5 +168,6 @@
 		LIB_DEBUG_PRINT_SAFE("Kernel : core %p signaling termination\n", this);
 		this->is_terminated = true;
-		wait( &this->terminated );
+		P( &this->terminated );
+		pthread_join( this->kernel_thread, NULL );
 	}
 }
@@ -209,5 +204,9 @@
 			if(readyThread)
 			{
+				verify( disable_preempt_count > 0 );
+
 				runThread(this, readyThread);
+
+				verify( disable_preempt_count > 0 );
 
 				//Some actions need to be taken from the kernel
@@ -225,5 +224,6 @@
 	}
 
-	signal( &this->terminated );
+	V( &this->terminated );
+
 	LIB_DEBUG_PRINT_SAFE("Kernel : core %p terminated\n", this);
 }
@@ -239,5 +239,5 @@
 
 	//Update global state
-	this->current_thread = dst;
+	this_thread = dst;
 
 	// Context Switch to the thread
@@ -289,4 +289,7 @@
 	processor * proc = (processor *) arg;
 	this_processor = proc;
+	this_coroutine = NULL;
+	this_thread = NULL;
+	disable_preempt_count = 1;
 	// SKULLDUGGERY: We want to create a context for the processor coroutine
 	// which is needed for the 2-step context switch. However, there is no reason
@@ -300,6 +303,6 @@
 
 	//Set global state
-	proc->current_coroutine = &proc->runner->__cor;
-	proc->current_thread = NULL;
+	this_coroutine = &proc->runner->__cor;
+	this_thread = NULL;
 
 	//We now have a proper context from which to schedule threads
@@ -331,51 +334,89 @@
 // Scheduler routines
 void ScheduleThread( thread_desc * thrd ) {
-	if( !thrd ) return;
+	// if( !thrd ) return;
+	assert( thrd );
+	assert( thrd->cor.state != Halted );
+
+	verify( disable_preempt_count > 0 );
 
 	verifyf( thrd->next == NULL, "Expected null got %p", thrd->next );
 
-	lock( &systemProcessor->proc.cltr->lock );
+	lock( &systemProcessor->proc.cltr->lock DEBUG_CTX2 );
 	append( &systemProcessor->proc.cltr->ready_queue, thrd );
 	unlock( &systemProcessor->proc.cltr->lock );
+
+	verify( disable_preempt_count > 0 );
 }
 
 thread_desc * nextThread(cluster * this) {
-	lock( &this->lock );
+	verify( disable_preempt_count > 0 );
+	lock( &this->lock DEBUG_CTX2 );
 	thread_desc * head = pop_head( &this->ready_queue );
 	unlock( &this->lock );
+	verify( disable_preempt_count > 0 );
 	return head;
 }
 
-void ScheduleInternal() {
+void BlockInternal() {
+	disable_interrupts();
+	verify( disable_preempt_count > 0 );
 	suspend();
-}
-
-void ScheduleInternal( spinlock * lock ) {
+	verify( disable_preempt_count > 0 );
+	enable_interrupts( DEBUG_CTX );
+}
+
+void BlockInternal( spinlock * lock ) {
+	disable_interrupts();
 	this_processor->finish.action_code = Release;
 	this_processor->finish.lock = lock;
+
+	verify( disable_preempt_count > 0 );
 	suspend();
-}
-
-void ScheduleInternal( thread_desc * thrd ) {
+	verify( disable_preempt_count > 0 );
+
+	enable_interrupts( DEBUG_CTX );
+}
+
+void BlockInternal( thread_desc * thrd ) {
+	disable_interrupts();
+	assert( thrd->cor.state != Halted );
 	this_processor->finish.action_code = Schedule;
 	this_processor->finish.thrd = thrd;
+
+	verify( disable_preempt_count > 0 );
 	suspend();
-}
-
-void ScheduleInternal( spinlock * lock, thread_desc * thrd ) {
+	verify( disable_preempt_count > 0 );
+
+	enable_interrupts( DEBUG_CTX );
+}
+
+void BlockInternal( spinlock * lock, thread_desc * thrd ) {
+	disable_interrupts();
 	this_processor->finish.action_code = Release_Schedule;
 	this_processor->finish.lock = lock;
 	this_processor->finish.thrd = thrd;
+
+	verify( disable_preempt_count > 0 );
 	suspend();
-}
-
-void ScheduleInternal(spinlock ** locks, unsigned short count) {
+	verify( disable_preempt_count > 0 );
+
+	enable_interrupts( DEBUG_CTX );
+}
+
+void BlockInternal(spinlock ** locks, unsigned short count) {
+	disable_interrupts();
 	this_processor->finish.action_code = Release_Multi;
 	this_processor->finish.locks = locks;
 	this_processor->finish.lock_count = count;
+
+	verify( disable_preempt_count > 0 );
 	suspend();
-}
-
-void ScheduleInternal(spinlock ** locks, unsigned short lock_count, thread_desc ** thrds, unsigned short thrd_count) {
+	verify( disable_preempt_count > 0 );
+
+	enable_interrupts( DEBUG_CTX );
+}
+
+void BlockInternal(spinlock ** locks, unsigned short lock_count, thread_desc ** thrds, unsigned short thrd_count) {
+	disable_interrupts();
 	this_processor->finish.action_code = Release_Multi_Schedule;
 	this_processor->finish.locks = locks;
@@ -383,4 +424,18 @@
 	this_processor->finish.thrds = thrds;
 	this_processor->finish.thrd_count = thrd_count;
+
+	verify( disable_preempt_count > 0 );
+	suspend();
+	verify( disable_preempt_count > 0 );
+
+	enable_interrupts( DEBUG_CTX );
+}
+
+void LeaveThread(spinlock * lock, thread_desc * thrd) {
+	verify( disable_preempt_count > 0 );
+	this_processor->finish.action_code = thrd ? Release_Schedule : Release;
+	this_processor->finish.lock = lock;
+	this_processor->finish.thrd = thrd;
+
 	suspend();
 }
@@ -397,5 +452,5 @@
 	// SKULLDUGGERY: the mainThread steals the process main thread
 	// which will then be scheduled by the systemProcessor normally
-	mainThread = (thread_desc *)&mainThread_storage;
+	mainThread = (thread_desc *)&mainThreadStorage;
 	current_stack_info_t info;
 	mainThread{ &info };
@@ -403,9 +458,6 @@
 	LIB_DEBUG_PRINT_SAFE("Kernel : Main thread ready\n");
 
-	// Enable preemption
-	kernel_start_preemption();
-
 	// Initialize the system cluster
-	systemCluster = (cluster *)&systemCluster_storage;
+	systemCluster = (cluster *)&systemClusterStorage;
 	systemCluster{};
 
@@ -414,6 +466,6 @@
 	// Initialize the system processor and the system processor ctx
 	// (the coroutine that contains the processing control flow)
-	systemProcessor = (system_proc_t *)&systemProcessor_storage;
-	systemProcessor{ systemCluster, (processorCtx_t *)&systemProcessorCtx_storage };
+	systemProcessor = (system_proc_t *)&systemProcessorStorage;
+	systemProcessor{ systemCluster, (processorCtx_t *)&systemProcessorCtxStorage };
 
 	// Add the main thread to the ready queue
@@ -423,6 +475,10 @@
 	//initialize the global state variables
 	this_processor = &systemProcessor->proc;
-	this_processor->current_thread = mainThread;
-	this_processor->current_coroutine = &mainThread->cor;
+	this_thread = mainThread;
+	this_coroutine = &mainThread->cor;
+	disable_preempt_count = 1;
+
+	// Enable preemption
+	kernel_start_preemption();
 
 	// SKULLDUGGERY: Force a context switch to the system processor to set the main thread's context to the current UNIX
@@ -435,8 +491,12 @@
 	// THE SYSTEM IS NOW COMPLETELY RUNNING
 	LIB_DEBUG_PRINT_SAFE("Kernel : Started\n--------------------------------------------------\n\n");
+
+	enable_interrupts( DEBUG_CTX );
 }
 
 void kernel_shutdown(void) {
 	LIB_DEBUG_PRINT_SAFE("\n--------------------------------------------------\nKernel : Shutting down\n");
+
+	disable_interrupts();
 
 	// SKULLDUGGERY: Notify the systemProcessor it needs to terminates.
@@ -448,4 +508,7 @@
 	// THE SYSTEM IS NOW COMPLETELY STOPPED
 
+	// Disable preemption
+	kernel_stop_preemption();
+
 	// Destroy the system processor and its context in reverse order of construction
 	// These were manually constructed so we need manually destroy them
@@ -467,5 +530,5 @@
 	// abort cannot be recursively entered by the same or different processors because all signal handlers return when
 	// the globalAbort flag is true.
-	lock( &kernel_abort_lock );
+	lock( &kernel_abort_lock DEBUG_CTX2 );
 
 	// first task to abort ?
@@ -485,5 +548,5 @@
 	}
 
-	return this_thread();
+	return this_thread;
 }
 
@@ -494,6 +557,6 @@
 	__lib_debug_write( STDERR_FILENO, abort_text, len );
 
-	if ( thrd != this_coroutine() ) {
-		len = snprintf( abort_text, abort_text_size, " in coroutine %.256s (%p).\n", this_coroutine()->name, this_coroutine() );
+	if ( thrd != this_coroutine ) {
+		len = snprintf( abort_text, abort_text_size, " in coroutine %.256s (%p).\n", this_coroutine->name, this_coroutine );
 		__lib_debug_write( STDERR_FILENO, abort_text, len );
 	}
@@ -505,9 +568,9 @@
 extern "C" {
 	void __lib_debug_acquire() {
-		lock(&kernel_debug_lock);
+		lock( &kernel_debug_lock DEBUG_CTX2 );
 	}
 
 	void __lib_debug_release() {
-		unlock(&kernel_debug_lock);
+		unlock( &kernel_debug_lock );
 	}
 }
@@ -525,13 +588,29 @@
 }
 
-bool try_lock( spinlock * this ) {
+bool try_lock( spinlock * this DEBUG_CTX_PARAM2 ) {
 	return this->lock == 0 && __sync_lock_test_and_set_4( &this->lock, 1 ) == 0;
 }
 
-void lock( spinlock * this ) {
+void lock( spinlock * this DEBUG_CTX_PARAM2 ) {
 	for ( unsigned int i = 1;; i += 1 ) {
-	  	if ( this->lock == 0 && __sync_lock_test_and_set_4( &this->lock, 1 ) == 0 ) break;
-	}
-}
+		if ( this->lock == 0 && __sync_lock_test_and_set_4( &this->lock, 1 ) == 0 ) { break; }
+	}
+	LIB_DEBUG_DO(
+		this->prev_name = caller;
+		this->prev_thrd = this_thread;
+	)
+}
+
+void lock_yield( spinlock * this DEBUG_CTX_PARAM2 ) {
+	for ( unsigned int i = 1;; i += 1 ) {
+		if ( this->lock == 0 && __sync_lock_test_and_set_4( &this->lock, 1 ) == 0 ) { break; }
+		yield();
+	}
+	LIB_DEBUG_DO(
+		this->prev_name = caller;
+		this->prev_thrd = this_thread;
+	)
+}
+
 
 void unlock( spinlock * this ) {
@@ -539,32 +618,39 @@
 }
 
-void ?{}( signal_once * this ) {
-	this->cond = false;
-}
-void ^?{}( signal_once * this ) {
-
-}
-
-void wait( signal_once * this ) {
-	lock( &this->lock );
-	if( !this->cond ) {
-		append( &this->blocked, this_thread() );
-		ScheduleInternal( &this->lock );
-		lock( &this->lock );
-	}
+void  ?{}( semaphore * this, int count = 1 ) {
+	(&this->lock){};
+	this->count = count;
+	(&this->waiting){};
+}
+void ^?{}(semaphore * this) {}
+
+void P(semaphore * this) {
+	lock( &this->lock DEBUG_CTX2 );
+	this->count -= 1;
+	if ( this->count < 0 ) {
+		// queue current task
+		append( &this->waiting, (thread_desc *)this_thread );
+
+		// atomically release spin lock and block
+		BlockInternal( &this->lock );
+	}
+	else {
+	    unlock( &this->lock );
+	}
+}
+
+void V(semaphore * this) {
+	thread_desc * thrd = NULL;
+	lock( &this->lock DEBUG_CTX2 );
+	this->count += 1;
+	if ( this->count <= 0 ) {
+		// remove task at head of waiting list
+		thrd = pop_head( &this->waiting );
+	}
+
 	unlock( &this->lock );
-}
-
-void signal( signal_once * this ) {
-	lock( &this->lock );
-	{
-		this->cond = true;
-
-		thread_desc * it;
-		while( it = pop_head( &this->blocked) ) {
-			ScheduleThread( it );
-		}
-	}
-	unlock( &this->lock );
+
+	// make new owner
+	WakeThread( thrd );
 }
 
Index: src/libcfa/concurrency/kernel_private.h
===================================================================
--- src/libcfa/concurrency/kernel_private.h	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/libcfa/concurrency/kernel_private.h	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -18,4 +18,6 @@
 #define KERNEL_PRIVATE_H
 
+#include "libhdr.h"
+
 #include "kernel"
 #include "thread"
@@ -23,17 +25,31 @@
 #include "alarm.h"
 
-#include "libhdr.h"
 
 //-----------------------------------------------------------------------------
 // Scheduler
+
+extern "C" {
+	void disable_interrupts();
+	void enable_interrupts_noRF();
+	void enable_interrupts( DEBUG_CTX_PARAM );
+}
+
 void ScheduleThread( thread_desc * );
+static inline void WakeThread( thread_desc * thrd ) {
+	if( !thrd ) return;
+
+	disable_interrupts();
+	ScheduleThread( thrd );
+	enable_interrupts( DEBUG_CTX );
+}
 thread_desc * nextThread(cluster * this);
 
-void ScheduleInternal(void);
-void ScheduleInternal(spinlock * lock);
-void ScheduleInternal(thread_desc * thrd);
-void ScheduleInternal(spinlock * lock, thread_desc * thrd);
-void ScheduleInternal(spinlock ** locks, unsigned short count);
-void ScheduleInternal(spinlock ** locks, unsigned short count, thread_desc ** thrds, unsigned short thrd_count);
+void BlockInternal(void);
+void BlockInternal(spinlock * lock);
+void BlockInternal(thread_desc * thrd);
+void BlockInternal(spinlock * lock, thread_desc * thrd);
+void BlockInternal(spinlock ** locks, unsigned short count);
+void BlockInternal(spinlock ** locks, unsigned short count, thread_desc ** thrds, unsigned short thrd_count);
+void LeaveThread(spinlock * lock, thread_desc * thrd);
 
 //-----------------------------------------------------------------------------
@@ -60,24 +76,8 @@
 extern cluster * systemCluster;
 extern system_proc_t * systemProcessor;
-extern thread_local processor * this_processor;
-
-static inline void disable_interrupts() {
-	__attribute__((unused)) unsigned short prev = __atomic_fetch_add_2( &this_processor->disable_preempt_count, 1, __ATOMIC_SEQ_CST );
-	assert( prev != (unsigned short) -1 );
-}
-
-static inline void enable_interrupts_noRF() {
-	__attribute__((unused)) unsigned short prev = __atomic_fetch_add_2( &this_processor->disable_preempt_count, -1, __ATOMIC_SEQ_CST );
-	verify( prev != (unsigned short) 0 );
-}
-
-static inline void enable_interrupts() {
-	__attribute__((unused)) unsigned short prev = __atomic_fetch_add_2( &this_processor->disable_preempt_count, -1, __ATOMIC_SEQ_CST );
-	verify( prev != (unsigned short) 0 );
-	if( prev == 1 && this_processor->pending_preemption ) {
-		ScheduleInternal( this_processor->current_thread );
-		this_processor->pending_preemption = false;
-	}
-}
+extern volatile thread_local processor * this_processor;
+extern volatile thread_local coroutine_desc * this_coroutine;
+extern volatile thread_local thread_desc * this_thread;
+extern volatile thread_local unsigned short disable_preempt_count;
 
 //-----------------------------------------------------------------------------
Index: src/libcfa/concurrency/monitor
===================================================================
--- src/libcfa/concurrency/monitor	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/libcfa/concurrency/monitor	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -26,5 +26,4 @@
 static inline void ?{}(monitor_desc * this) {
 	this->owner = NULL;
-	this->stack_owner = NULL;
 	this->recursion = 0;
 }
Index: src/libcfa/concurrency/monitor.c
===================================================================
--- src/libcfa/concurrency/monitor.c	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/libcfa/concurrency/monitor.c	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -19,6 +19,6 @@
 #include <stdlib>
 
+#include "libhdr.h"
 #include "kernel_private.h"
-#include "libhdr.h"
 
 //-----------------------------------------------------------------------------
@@ -44,9 +44,9 @@
 
 extern "C" {
-	void __enter_monitor_desc(monitor_desc * this) {
-		lock( &this->lock );
-		thread_desc * thrd = this_thread();
-
-		LIB_DEBUG_PRINT_SAFE("%p Entering %p (o: %p, r: %i)\n", thrd, this, this->owner, this->recursion);
+	void __enter_monitor_desc( monitor_desc * this ) {
+		lock_yield( &this->lock DEBUG_CTX2 );
+		thread_desc * thrd = this_thread;
+
+		// LIB_DEBUG_PRINT_SAFE("%p Entering %p (o: %p, r: %i)\n", thrd, this, this->owner, this->recursion);
 
 		if( !this->owner ) {
@@ -62,9 +62,9 @@
 			//Some one else has the monitor, wait in line for it
 			append( &this->entry_queue, thrd );
-			LIB_DEBUG_PRINT_SAFE("%p Blocking on entry\n", thrd);
-			ScheduleInternal( &this->lock );
-
-			//ScheduleInternal will unlock spinlock, no need to unlock ourselves
-			return; 
+			// LIB_DEBUG_PRINT_SAFE("%p Blocking on entry\n", thrd);
+			BlockInternal( &this->lock );
+
+			//BlockInternal will unlock spinlock, no need to unlock ourselves
+			return;
 		}
 
@@ -75,9 +75,9 @@
 	// leave pseudo code :
 	//	TODO
-	void __leave_monitor_desc(monitor_desc * this) {
-		lock( &this->lock );
-
-		LIB_DEBUG_PRINT_SAFE("%p Leaving %p (o: %p, r: %i)\n", thrd, this, this->owner, this->recursion);
-		verifyf( this_thread() == this->owner, "Expected owner to be %p, got %p (r: %i)", this_thread(), this->owner, this->recursion );
+	void __leave_monitor_desc( monitor_desc * this ) {
+		lock_yield( &this->lock DEBUG_CTX2 );
+
+		// LIB_DEBUG_PRINT_SAFE("%p Leaving %p (o: %p, r: %i). ", this_thread, this, this->owner, this->recursion);
+		verifyf( this_thread == this->owner, "Expected owner to be %p, got %p (r: %i)", this_thread, this->owner, this->recursion );
 
 		//Leaving a recursion level, decrement the counter
@@ -96,8 +96,33 @@
 		unlock( &this->lock );
 
-		LIB_DEBUG_PRINT_SAFE("Next owner is %p\n", new_owner);
+		// LIB_DEBUG_PRINT_SAFE("Next owner is %p\n", new_owner);
 
 		//We need to wake-up the thread
-		ScheduleThread( new_owner );
+		WakeThread( new_owner );
+	}
+
+	void __leave_thread_monitor( thread_desc * thrd ) {
+		monitor_desc * this = &thrd->mon;
+		lock_yield( &this->lock DEBUG_CTX2 );
+
+		disable_interrupts();
+
+		thrd->cor.state = Halted;
+
+		verifyf( thrd == this->owner, "Expected owner to be %p, got %p (r: %i)", thrd, this->owner, this->recursion );
+
+		//Leaving a recursion level, decrement the counter
+		this->recursion -= 1;
+
+		//If we haven't left the last level of recursion
+		//it means we don't need to do anything
+		if( this->recursion != 0) {
+			unlock( &this->lock );
+			return;
+		}
+
+		thread_desc * new_owner = next_thread( this );
+
+		LeaveThread( &this->lock, new_owner );
 	}
 }
@@ -121,9 +146,9 @@
 	enter( this->m, this->count );
 
-	this->prev_mntrs = this_thread()->current_monitors;
-	this->prev_count = this_thread()->current_monitor_count;
-
-	this_thread()->current_monitors      = m;
-	this_thread()->current_monitor_count = count;
+	this->prev_mntrs = this_thread->current_monitors;
+	this->prev_count = this_thread->current_monitor_count;
+
+	this_thread->current_monitors      = m;
+	this_thread->current_monitor_count = count;
 }
 
@@ -131,6 +156,6 @@
 	leave( this->m, this->count );
 
-	this_thread()->current_monitors      = this->prev_mntrs;
-	this_thread()->current_monitor_count = this->prev_count;
+	this_thread->current_monitors      = this->prev_mntrs;
+	this_thread->current_monitor_count = this->prev_count;
 }
 
@@ -159,5 +184,5 @@
 // Internal scheduling
 void wait( condition * this, uintptr_t user_info = 0 ) {
-	LIB_DEBUG_PRINT_SAFE("Waiting\n");
+	// LIB_DEBUG_PRINT_SAFE("Waiting\n");
 
 	brand_condition( this );
@@ -170,14 +195,14 @@
 	unsigned short count = this->monitor_count;
 	unsigned int recursions[ count ];		//Save the current recursion levels to restore them later
-	spinlock *   locks     [ count ];		//We need to pass-in an array of locks to ScheduleInternal
-
-	LIB_DEBUG_PRINT_SAFE("count %i\n", count);
-
-	__condition_node_t waiter = { this_thread(), count, user_info };
+	spinlock *   locks     [ count ];		//We need to pass-in an array of locks to BlockInternal
+
+	// LIB_DEBUG_PRINT_SAFE("count %i\n", count);
+
+	__condition_node_t waiter = { (thread_desc*)this_thread, count, user_info };
 
 	__condition_criterion_t criteria[count];
 	for(int i = 0; i < count; i++) {
 		(&criteria[i]){ this->monitors[i], &waiter };
-		LIB_DEBUG_PRINT_SAFE( "Criterion %p\n", &criteria[i] );
+		// LIB_DEBUG_PRINT_SAFE( "Criterion %p\n", &criteria[i] );
 	}
 
@@ -201,12 +226,12 @@
 	}
 
-	LIB_DEBUG_PRINT_SAFE("Will unblock: ");
+	// LIB_DEBUG_PRINT_SAFE("Will unblock: ");
 	for(int i = 0; i < thread_count; i++) {
-		LIB_DEBUG_PRINT_SAFE("%p ", threads[i]);
-	}
-	LIB_DEBUG_PRINT_SAFE("\n");
+		// LIB_DEBUG_PRINT_SAFE("%p ", threads[i]);
+	}
+	// LIB_DEBUG_PRINT_SAFE("\n");
 
 	// Everything is ready to go to sleep
-	ScheduleInternal( locks, count, threads, thread_count );
+	BlockInternal( locks, count, threads, thread_count );
 
 
@@ -222,5 +247,5 @@
 bool signal( condition * this ) {
 	if( is_empty( this ) ) {
-		LIB_DEBUG_PRINT_SAFE("Nothing to signal\n");
+		// LIB_DEBUG_PRINT_SAFE("Nothing to signal\n");
 		return false;
 	}
@@ -231,8 +256,8 @@
 
 	unsigned short count = this->monitor_count;
-	
+
 	//Some more checking in debug
 	LIB_DEBUG_DO(
-		thread_desc * this_thrd = this_thread();
+		thread_desc * this_thrd = this_thread;
 		if ( this->monitor_count != this_thrd->current_monitor_count ) {
 			abortf( "Signal on condition %p made with different number of monitor(s), expected %i got %i", this, this->monitor_count, this_thrd->current_monitor_count );
@@ -248,5 +273,5 @@
 	//Lock all the monitors
 	lock_all( this->monitors, NULL, count );
-	LIB_DEBUG_PRINT_SAFE("Signalling");
+	// LIB_DEBUG_PRINT_SAFE("Signalling");
 
 	//Pop the head of the waiting queue
@@ -256,10 +281,10 @@
 	for(int i = 0; i < count; i++) {
 		__condition_criterion_t * crit = &node->criteria[i];
-		LIB_DEBUG_PRINT_SAFE(" %p", crit->target);
+		// LIB_DEBUG_PRINT_SAFE(" %p", crit->target);
 		assert( !crit->ready );
 		push( &crit->target->signal_stack, crit );
 	}
 
-	LIB_DEBUG_PRINT_SAFE("\n");
+	// LIB_DEBUG_PRINT_SAFE("\n");
 
 	//Release
@@ -281,15 +306,15 @@
 	unsigned short count = this->monitor_count;
 	unsigned int recursions[ count ];		//Save the current recursion levels to restore them later
-	spinlock *   locks     [ count ];		//We need to pass-in an array of locks to ScheduleInternal
+	spinlock *   locks     [ count ];		//We need to pass-in an array of locks to BlockInternal
 
 	lock_all( this->monitors, locks, count );
 
 	//create creteria
-	__condition_node_t waiter = { this_thread(), count, 0 };
+	__condition_node_t waiter = { (thread_desc*)this_thread, count, 0 };
 
 	__condition_criterion_t criteria[count];
 	for(int i = 0; i < count; i++) {
 		(&criteria[i]){ this->monitors[i], &waiter };
-		LIB_DEBUG_PRINT_SAFE( "Criterion %p\n", &criteria[i] );
+		// LIB_DEBUG_PRINT_SAFE( "Criterion %p\n", &criteria[i] );
 		push( &criteria[i].target->signal_stack, &criteria[i] );
 	}
@@ -309,5 +334,5 @@
 
 	//Everything is ready to go to sleep
-	ScheduleInternal( locks, count, &signallee, 1 );
+	BlockInternal( locks, count, &signallee, 1 );
 
 
@@ -325,5 +350,5 @@
 
 uintptr_t front( condition * this ) {
-	verifyf( !is_empty(this), 
+	verifyf( !is_empty(this),
 		"Attempt to access user data on an empty condition.\n"
 		"Possible cause is not checking if the condition is empty before reading stored data."
@@ -335,9 +360,9 @@
 // Internal scheduling
 void __accept_internal( unsigned short count, __acceptable_t * acceptables, void (*func)(void) ) {
-	// thread_desc * this = this_thread();
+	// thread_desc * this = this_thread;
 
 	// unsigned short count = this->current_monitor_count;
 	// unsigned int recursions[ count ];		//Save the current recursion levels to restore them later
-	// spinlock *   locks     [ count ];		//We need to pass-in an array of locks to ScheduleInternal
+	// spinlock *   locks     [ count ];		//We need to pass-in an array of locks to BlockInternal
 
 	// lock_all( this->current_monitors, locks, count );
@@ -348,5 +373,5 @@
 
 	// // // Everything is ready to go to sleep
-	// // ScheduleInternal( locks, count, threads, thread_count );
+	// // BlockInternal( locks, count, threads, thread_count );
 
 
@@ -393,5 +418,5 @@
 static inline void lock_all( spinlock ** locks, unsigned short count ) {
 	for( int i = 0; i < count; i++ ) {
-		lock( locks[i] );
+		lock_yield( locks[i] DEBUG_CTX2 );
 	}
 }
@@ -400,5 +425,5 @@
 	for( int i = 0; i < count; i++ ) {
 		spinlock * l = &source[i]->lock;
-		lock( l );
+		lock_yield( l DEBUG_CTX2 );
 		if(locks) locks[i] = l;
 	}
@@ -443,8 +468,8 @@
 	for(	int i = 0; i < count; i++ ) {
 
-		LIB_DEBUG_PRINT_SAFE( "Checking %p for %p\n", &criteria[i], target );
+		// LIB_DEBUG_PRINT_SAFE( "Checking %p for %p\n", &criteria[i], target );
 		if( &criteria[i] == target ) {
 			criteria[i].ready = true;
-			LIB_DEBUG_PRINT_SAFE( "True\n" );
+			// LIB_DEBUG_PRINT_SAFE( "True\n" );
 		}
 
@@ -452,12 +477,12 @@
 	}
 
-	LIB_DEBUG_PRINT_SAFE( "Runing %i\n", ready2run );
+	// LIB_DEBUG_PRINT_SAFE( "Runing %i\n", ready2run );
 	return ready2run ? node->waiting_thread : NULL;
 }
 
 static inline void brand_condition( condition * this ) {
-	thread_desc * thrd = this_thread();
+	thread_desc * thrd = this_thread;
 	if( !this->monitors ) {
-		LIB_DEBUG_PRINT_SAFE("Branding\n");
+		// LIB_DEBUG_PRINT_SAFE("Branding\n");
 		assertf( thrd->current_monitors != NULL, "No current monitor to brand condition", thrd->current_monitors );
 		this->monitor_count = thrd->current_monitor_count;
Index: src/libcfa/concurrency/preemption.c
===================================================================
--- src/libcfa/concurrency/preemption.c	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/libcfa/concurrency/preemption.c	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -15,11 +15,24 @@
 //
 
+#include "libhdr.h"
 #include "preemption.h"
 
 extern "C" {
+#include <errno.h>
+#include <execinfo.h>
+#define __USE_GNU
 #include <signal.h>
-}
-
-#define __CFA_DEFAULT_PREEMPTION__ 10
+#undef __USE_GNU
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+}
+
+
+#ifdef __USE_STREAM__
+#include "fstream"
+#endif
+
+#define __CFA_DEFAULT_PREEMPTION__ 10000
 
 __attribute__((weak)) unsigned int default_preemption() {
@@ -27,20 +40,38 @@
 }
 
+#define __CFA_SIGCXT__ ucontext_t *
+#define __CFA_SIGPARMS__ __attribute__((unused)) int sig, __attribute__((unused)) siginfo_t *sfp, __attribute__((unused)) __CFA_SIGCXT__ cxt
+
 static void preempt( processor   * this );
 static void timeout( thread_desc * this );
 
+void sigHandler_ctxSwitch( __CFA_SIGPARMS__ );
+void sigHandler_alarm    ( __CFA_SIGPARMS__ );
+void sigHandler_segv     ( __CFA_SIGPARMS__ );
+void sigHandler_abort    ( __CFA_SIGPARMS__ );
+
+static void __kernel_sigaction( int sig, void (*handler)(__CFA_SIGPARMS__), int flags );
+LIB_DEBUG_DO( bool validate( alarm_list_t * this ); )
+
+#ifdef __x86_64__
+#define CFA_REG_IP REG_RIP
+#else
+#define CFA_REG_IP REG_EIP
+#endif
+
+
 //=============================================================================================
 // Kernel Preemption logic
 //=============================================================================================
 
-void kernel_start_preemption() {
-
-}
-
 void tick_preemption() {
+	// LIB_DEBUG_PRINT_BUFFER_DECL( STDERR_FILENO, "Ticking preemption\n" );
+
 	alarm_list_t * alarms = &systemProcessor->alarms;
 	__cfa_time_t currtime = __kernel_get_time();
 	while( alarms->head && alarms->head->alarm < currtime ) {
 		alarm_node_t * node = pop(alarms);
+		// LIB_DEBUG_PRINT_BUFFER_LOCAL( STDERR_FILENO, "Ticking %p\n", node );
+
 		if( node->kernel_alarm ) {
 			preempt( node->proc );
@@ -50,6 +81,8 @@
 		}
 
+		verify( validate( alarms ) );
+
 		if( node->period > 0 ) {
-			node->alarm += node->period;
+			node->alarm = currtime + node->period;
 			insert( alarms, node );
 		}
@@ -62,9 +95,14 @@
 		__kernel_set_timer( alarms->head->alarm - currtime );
 	}
+
+	verify( validate( alarms ) );
+	// LIB_DEBUG_PRINT_BUFFER_LOCAL( STDERR_FILENO, "Ticking preemption done\n" );
 }
 
 void update_preemption( processor * this, __cfa_time_t duration ) {
-	//     assert( THREAD_GETMEM( disableInt ) && THREAD_GETMEM( disableIntCnt ) == 1 );
+	LIB_DEBUG_PRINT_BUFFER_DECL( STDERR_FILENO, "Processor : %p updating preemption to %lu\n", this, duration );
+
 	alarm_node_t * alarm = this->preemption_alarm;
+	duration *= 1000;
 
 	// Alarms need to be enabled
@@ -89,4 +127,104 @@
 }
 
+//=============================================================================================
+// Kernel Signal Tools
+//=============================================================================================
+
+LIB_DEBUG_DO( static thread_local void * last_interrupt = 0; )
+
+extern "C" {
+	void disable_interrupts() {
+		__attribute__((unused)) unsigned short new_val = __atomic_add_fetch_2( &disable_preempt_count, 1, __ATOMIC_SEQ_CST );
+		verify( new_val < (unsigned short)65_000 );
+		verify( new_val != (unsigned short) 0 );
+	}
+
+	void enable_interrupts_noRF() {
+		__attribute__((unused)) unsigned short prev = __atomic_fetch_add_2( &disable_preempt_count, -1, __ATOMIC_SEQ_CST );
+		verify( prev != (unsigned short) 0 );
+	}
+
+	void enable_interrupts( DEBUG_CTX_PARAM ) {
+		processor * proc   = this_processor;
+		thread_desc * thrd = this_thread;
+		unsigned short prev = __atomic_fetch_add_2( &disable_preempt_count, -1, __ATOMIC_SEQ_CST );
+		verify( prev != (unsigned short) 0 );
+		if( prev == 1 && proc->pending_preemption ) {
+			proc->pending_preemption = false;
+			BlockInternal( thrd );
+		}
+
+		LIB_DEBUG_DO( proc->last_enable = caller; )
+	}
+}
+
+static inline void signal_unblock( int sig ) {
+	sigset_t mask;
+	sigemptyset( &mask );
+	sigaddset( &mask, sig );
+
+	if ( pthread_sigmask( SIG_UNBLOCK, &mask, NULL ) == -1 ) {
+	    abortf( "internal error, pthread_sigmask" );
+	}
+}
+
+static inline void signal_block( int sig ) {
+	sigset_t mask;
+	sigemptyset( &mask );
+	sigaddset( &mask, sig );
+
+	if ( pthread_sigmask( SIG_BLOCK, &mask, NULL ) == -1 ) {
+	    abortf( "internal error, pthread_sigmask" );
+	}
+}
+
+static inline bool preemption_ready() {
+	return disable_preempt_count == 0;
+}
+
+static inline void defer_ctxSwitch() {
+	this_processor->pending_preemption = true;
+}
+
+static inline void defer_alarm() {
+	systemProcessor->pending_alarm = true;
+}
+
+static void preempt( processor * this ) {
+	pthread_kill( this->kernel_thread, SIGUSR1 );
+}
+
+static void timeout( thread_desc * this ) {
+	//TODO : implement waking threads
+}
+
+//=============================================================================================
+// Kernel Signal Startup/Shutdown logic
+//=============================================================================================
+
+static pthread_t alarm_thread;
+void * alarm_loop( __attribute__((unused)) void * args );
+
+void kernel_start_preemption() {
+	LIB_DEBUG_PRINT_SAFE("Kernel : Starting preemption\n");
+	__kernel_sigaction( SIGUSR1, sigHandler_ctxSwitch, SA_SIGINFO );
+	// __kernel_sigaction( SIGSEGV, sigHandler_segv     , SA_SIGINFO );
+	// __kernel_sigaction( SIGBUS , sigHandler_segv     , SA_SIGINFO );
+
+	signal_block( SIGALRM );
+
+	pthread_create( &alarm_thread, NULL, alarm_loop, NULL );
+}
+
+void kernel_stop_preemption() {
+	sigset_t mask;
+	sigfillset( &mask );
+	sigprocmask( SIG_BLOCK, &mask, NULL );
+
+	pthread_kill( alarm_thread, SIGINT );
+	pthread_join( alarm_thread, NULL );
+	LIB_DEBUG_PRINT_SAFE("Kernel : Preemption stopped\n");
+}
+
 void ?{}( preemption_scope * this, processor * proc ) {
 	(&this->alarm){ proc };
@@ -97,26 +235,18 @@
 
 void ^?{}( preemption_scope * this ) {
+	disable_interrupts();
+
 	update_preemption( this->proc, 0 );
 }
 
 //=============================================================================================
-// Kernel Signal logic
-//=============================================================================================
-
-static inline bool preemption_ready() {
-	return this_processor->disable_preempt_count == 0;
-}
-
-static inline void defer_ctxSwitch() {
-	this_processor->pending_preemption = true;
-}
-
-static inline void defer_alarm() {
-	systemProcessor->pending_alarm = true;
-}
-
-void sigHandler_ctxSwitch( __attribute__((unused)) int sig ) {
+// Kernel Signal Handlers
+//=============================================================================================
+
+void sigHandler_ctxSwitch( __CFA_SIGPARMS__ ) {
+	LIB_DEBUG_DO( last_interrupt = (void *)(cxt->uc_mcontext.gregs[CFA_REG_IP]); )
 	if( preemption_ready() ) {
-		ScheduleInternal( this_processor->current_thread );
+		signal_unblock( SIGUSR1 );
+		BlockInternal( (thread_desc*)this_thread );
 	}
 	else {
@@ -125,19 +255,189 @@
 }
 
-void sigHandler_alarm( __attribute__((unused)) int sig ) {
-	if( try_lock( &systemProcessor->alarm_lock ) ) {
-		tick_preemption();
-		unlock( &systemProcessor->alarm_lock );
-	}
-	else {
-		defer_alarm();
-	}
-}
-
-static void preempt( processor * this ) {
-	pthread_kill( this->kernel_thread, SIGUSR1 );
-}
-
-static void timeout( thread_desc * this ) {
-	//TODO : implement waking threads
-}
+// void sigHandler_alarm( __CFA_SIGPARMS__ ) {
+// 	LIB_DEBUG_DO( last_interrupt = (void *)(cxt->uc_mcontext.gregs[CFA_REG_IP]); )
+// 	verify( this_processor == systemProcessor );
+
+// 	if( try_lock( &systemProcessor->alarm_lock DEBUG_CTX2 ) ) {
+// 		tick_preemption();
+// 		systemProcessor->pending_alarm = false;
+// 		unlock( &systemProcessor->alarm_lock );
+// 	}
+// 	else {
+// 		defer_alarm();
+// 	}
+
+// 	signal_unblock( SIGALRM );
+
+// 	if( preemption_ready() && this_processor->pending_preemption ) {
+
+// 		this_processor->pending_preemption = false;
+// 		BlockInternal( (thread_desc*)this_thread );
+// 	}
+// }
+
+void * alarm_loop( __attribute__((unused)) void * args ) {
+	sigset_t mask;
+	sigemptyset( &mask );
+	sigaddset( &mask, SIGALRM );
+	sigaddset( &mask, SIGUSR2 );
+	sigaddset( &mask, SIGINT  );
+
+	if ( pthread_sigmask( SIG_BLOCK, &mask, NULL ) == -1 ) {
+	    abortf( "internal error, pthread_sigmask" );
+	}
+
+	while( true ) {
+		int sig;
+		if( sigwait( &mask, &sig ) != 0  ) {
+			abortf( "internal error, sigwait" );
+		}
+
+		switch( sig) {
+			case SIGALRM:
+				LIB_DEBUG_PRINT_SAFE("Kernel : Preemption thread tick\n");
+				lock( &systemProcessor->alarm_lock DEBUG_CTX2 );
+				tick_preemption();
+				unlock( &systemProcessor->alarm_lock );
+				break;
+			case SIGUSR2:
+				//TODO other actions
+				break;
+			case SIGINT:
+				LIB_DEBUG_PRINT_SAFE("Kernel : Preemption thread stopping\n");
+				return NULL;
+			default:
+				abortf( "internal error, sigwait returned sig %d", sig );
+				break;
+		}
+	}
+}
+
+static void __kernel_sigaction( int sig, void (*handler)(__CFA_SIGPARMS__), int flags ) {
+	struct sigaction act;
+
+	act.sa_sigaction = (void (*)(int, siginfo_t *, void *))handler;
+	act.sa_flags = flags;
+
+	if ( sigaction( sig, &act, NULL ) == -1 ) {
+		LIB_DEBUG_PRINT_BUFFER_DECL( STDERR_FILENO,
+			" __kernel_sigaction( sig:%d, handler:%p, flags:%d ), problem installing signal handler, error(%d) %s.\n",
+			sig, handler, flags, errno, strerror( errno )
+		);
+		_exit( EXIT_FAILURE );
+	}
+}
+
+typedef void (*sa_handler_t)(int);
+
+static void __kernel_sigdefault( int sig ) {
+	struct sigaction act;
+
+	// act.sa_handler = SIG_DFL;
+	act.sa_flags = 0;
+	sigemptyset( &act.sa_mask );
+
+	if ( sigaction( sig, &act, NULL ) == -1 ) {
+		LIB_DEBUG_PRINT_BUFFER_DECL( STDERR_FILENO,
+			" __kernel_sigdefault( sig:%d ), problem reseting signal handler, error(%d) %s.\n",
+			sig, errno, strerror( errno )
+		);
+		_exit( EXIT_FAILURE );
+	}
+}
+
+//=============================================================================================
+// Terminating Signals logic
+//=============================================================================================
+
+LIB_DEBUG_DO(
+	static void __kernel_backtrace( int start ) {
+		// skip first N stack frames
+
+		enum { Frames = 50 };
+		void * array[Frames];
+		int size = backtrace( array, Frames );
+		char ** messages = backtrace_symbols( array, size );
+
+		// find executable name
+		*index( messages[0], '(' ) = '\0';
+		#ifdef __USE_STREAM__
+		serr | "Stack back trace for:" | messages[0] | endl;
+		#else
+		fprintf( stderr, "Stack back trace for: %s\n", messages[0]);
+		#endif
+
+		// skip last 2 stack frames after main
+		for ( int i = start; i < size && messages != NULL; i += 1 ) {
+			char * name = NULL;
+			char * offset_begin = NULL;
+			char * offset_end = NULL;
+
+			for ( char *p = messages[i]; *p; ++p ) {
+				// find parantheses and +offset
+				if ( *p == '(' ) {
+					name = p;
+				}
+				else if ( *p == '+' ) {
+					offset_begin = p;
+				}
+				else if ( *p == ')' ) {
+					offset_end = p;
+					break;
+				}
+			}
+
+			// if line contains symbol print it
+			int frameNo = i - start;
+			if ( name && offset_begin && offset_end && name < offset_begin ) {
+				// delimit strings
+				*name++ = '\0';
+				*offset_begin++ = '\0';
+				*offset_end++ = '\0';
+
+				#ifdef __USE_STREAM__
+				serr 	| "("  | frameNo | ")" | messages[i] | ":"
+					| name | "+" | offset_begin | offset_end | endl;
+				#else
+				fprintf( stderr, "(%i) %s : %s + %s %s\n", frameNo, messages[i], name, offset_begin, offset_end);
+				#endif
+			}
+			// otherwise, print the whole line
+			else {
+				#ifdef __USE_STREAM__
+				serr | "(" | frameNo | ")" | messages[i] | endl;
+				#else
+				fprintf( stderr, "(%i) %s\n", frameNo, messages[i] );
+				#endif
+			}
+		}
+
+		free( messages );
+	}
+)
+
+// void sigHandler_segv( __CFA_SIGPARMS__ ) {
+// 	LIB_DEBUG_DO(
+// 		#ifdef __USE_STREAM__
+// 		serr 	| "*CFA runtime error* program cfa-cpp terminated with"
+// 			| (sig == SIGSEGV ? "segment fault." : "bus error.")
+// 			| endl;
+// 		#else
+// 		fprintf( stderr, "*CFA runtime error* program cfa-cpp terminated with %s\n", sig == SIGSEGV ? "segment fault." : "bus error." );
+// 		#endif
+
+// 		// skip first 2 stack frames
+// 		__kernel_backtrace( 1 );
+// 	)
+// 	exit( EXIT_FAILURE );
+// }
+
+// void sigHandler_abort( __CFA_SIGPARMS__ ) {
+// 	// skip first 6 stack frames
+// 	LIB_DEBUG_DO( __kernel_backtrace( 6 ); )
+
+// 	// reset default signal handler
+// 	__kernel_sigdefault( SIGABRT );
+
+// 	raise( SIGABRT );
+// }
Index: src/libcfa/concurrency/thread
===================================================================
--- src/libcfa/concurrency/thread	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/libcfa/concurrency/thread	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -54,5 +54,5 @@
 }
 
-thread_desc * this_thread(void);
+extern volatile thread_local thread_desc * this_thread;
 
 forall( dtype T | is_thread(T) )
Index: src/libcfa/concurrency/thread.c
===================================================================
--- src/libcfa/concurrency/thread.c	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/libcfa/concurrency/thread.c	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -28,5 +28,5 @@
 }
 
-extern thread_local processor * this_processor;
+extern volatile thread_local processor * this_processor;
 
 //-----------------------------------------------------------------------------
@@ -71,18 +71,21 @@
 	coroutine_desc* thrd_c = get_coroutine(this);
 	thread_desc*  thrd_h = get_thread   (this);
-	thrd_c->last = this_coroutine();
-	this_processor->current_coroutine = thrd_c;
+	thrd_c->last = this_coroutine;
 
-	LIB_DEBUG_PRINT_SAFE("Thread start : %p (t %p, c %p)\n", this, thrd_c, thrd_h);
+	// LIB_DEBUG_PRINT_SAFE("Thread start : %p (t %p, c %p)\n", this, thrd_c, thrd_h);
 
+	disable_interrupts();
 	create_stack(&thrd_c->stack, thrd_c->stack.size);
+	this_coroutine = thrd_c;
 	CtxStart(this, CtxInvokeThread);
+	assert( thrd_c->last->stack.context );
 	CtxSwitch( thrd_c->last->stack.context, thrd_c->stack.context );
 
 	ScheduleThread(thrd_h);
+	enable_interrupts( DEBUG_CTX );
 }
 
 void yield( void ) {
-	ScheduleInternal( this_processor->current_thread );
+	BlockInternal( (thread_desc *)this_thread );
 }
 
@@ -95,5 +98,5 @@
 void ThreadCtxSwitch(coroutine_desc* src, coroutine_desc* dst) {
 	// set state of current coroutine to inactive
-	src->state = Inactive;
+	src->state = src->state == Halted ? Halted : Inactive;
 	dst->state = Active;
 
@@ -103,10 +106,11 @@
 	// set new coroutine that the processor is executing
 	// and context switch to it
-	this_processor->current_coroutine = dst;
+	this_coroutine = dst;
+	assert( src->stack.context );
 	CtxSwitch( src->stack.context, dst->stack.context );
-	this_processor->current_coroutine = src;
+	this_coroutine = src;
 
 	// set state of new coroutine to active
-	dst->state = Inactive;
+	dst->state = dst->state == Halted ? Halted : Inactive;
 	src->state = Active;
 }
Index: src/libcfa/libhdr/libalign.h
===================================================================
--- src/libcfa/libhdr/libalign.h	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/libcfa/libhdr/libalign.h	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -1,3 +1,3 @@
-//                              -*- Mode: C++ -*- 
+//                              -*- Mode: C++ -*-
 //
 // Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
@@ -18,13 +18,13 @@
 // Free Software  Foundation; either  version 2.1 of  the License, or  (at your
 // option) any later version.
-// 
+//
 // This library is distributed in the  hope that it will be useful, but WITHOUT
 // ANY  WARRANTY;  without even  the  implied  warranty  of MERCHANTABILITY  or
 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 // for more details.
-// 
+//
 // You should  have received a  copy of the  GNU Lesser General  Public License
 // along  with this library.
-// 
+//
 
 
@@ -33,6 +33,7 @@
 
 #include "assert"
+#include <stdbool.h>
 
-// Minimum size used to align memory boundaries for memory allocations. 
+// Minimum size used to align memory boundaries for memory allocations.
 #define libAlign() (sizeof(double))
 
Index: src/libcfa/libhdr/libdebug.h
===================================================================
--- src/libcfa/libhdr/libdebug.h	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/libcfa/libhdr/libdebug.h	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -18,9 +18,17 @@
 
 #ifdef __CFA_DEBUG__
-	#define LIB_DEBUG_DO(x) x
-	#define LIB_NO_DEBUG_DO(x) ((void)0)
+	#define LIB_DEBUG_DO(...) __VA_ARGS__
+	#define LIB_NO_DEBUG_DO(...)
+	#define DEBUG_CTX __PRETTY_FUNCTION__
+	#define DEBUG_CTX2 , __PRETTY_FUNCTION__
+	#define DEBUG_CTX_PARAM const char * caller
+	#define DEBUG_CTX_PARAM2 , const char * caller
 #else
-	#define LIB_DEBUG_DO(x) ((void)0)
-	#define LIB_NO_DEBUG_DO(x) x      
+	#define LIB_DEBUG_DO(...)
+	#define LIB_NO_DEBUG_DO(...) __VA_ARGS__
+	#define DEBUG_CTX
+	#define DEBUG_CTX2
+	#define DEBUG_CTX_PARAM
+	#define DEBUG_CTX_PARAM2
 #endif
 
@@ -51,17 +59,21 @@
 
 #ifdef __CFA_DEBUG_PRINT__
-      #define LIB_DEBUG_WRITE( fd, buffer, len )  __lib_debug_write( fd, buffer, len )
-      #define LIB_DEBUG_ACQUIRE()                 __lib_debug_acquire()
-      #define LIB_DEBUG_RELEASE()                 __lib_debug_release()
-      #define LIB_DEBUG_PRINT_SAFE(...)           __lib_debug_print_safe   (__VA_ARGS__)
-      #define LIB_DEBUG_PRINT_NOLOCK(...)         __lib_debug_print_nolock (__VA_ARGS__)
-      #define LIB_DEBUG_PRINT_BUFFER(...)         __lib_debug_print_buffer (__VA_ARGS__)
+	#define LIB_DEBUG_WRITE( fd, buffer, len )     __lib_debug_write( fd, buffer, len )
+	#define LIB_DEBUG_ACQUIRE()                    __lib_debug_acquire()
+	#define LIB_DEBUG_RELEASE()                    __lib_debug_release()
+	#define LIB_DEBUG_PRINT_SAFE(...)              __lib_debug_print_safe   (__VA_ARGS__)
+	#define LIB_DEBUG_PRINT_NOLOCK(...)            __lib_debug_print_nolock (__VA_ARGS__)
+	#define LIB_DEBUG_PRINT_BUFFER(...)            __lib_debug_print_buffer (__VA_ARGS__)
+	#define LIB_DEBUG_PRINT_BUFFER_DECL(fd, ...)   char text[256]; int len = snprintf( text, 256, __VA_ARGS__ ); __lib_debug_write( fd, text, len );
+	#define LIB_DEBUG_PRINT_BUFFER_LOCAL(fd, ...)  len = snprintf( text, 256, __VA_ARGS__ ); __lib_debug_write( fd, text, len );
 #else
-      #define LIB_DEBUG_WRITE(...)          ((void)0)
-      #define LIB_DEBUG_ACQUIRE()           ((void)0)
-      #define LIB_DEBUG_RELEASE()           ((void)0)
-      #define LIB_DEBUG_PRINT_SAFE(...)     ((void)0)
-      #define LIB_DEBUG_PRINT_NOLOCK(...)   ((void)0)
-      #define LIB_DEBUG_PRINT_BUFFER(...)   ((void)0)
+	#define LIB_DEBUG_WRITE(...)               ((void)0)
+	#define LIB_DEBUG_ACQUIRE()                ((void)0)
+	#define LIB_DEBUG_RELEASE()                ((void)0)
+	#define LIB_DEBUG_PRINT_SAFE(...)          ((void)0)
+	#define LIB_DEBUG_PRINT_NOLOCK(...)        ((void)0)
+	#define LIB_DEBUG_PRINT_BUFFER(...)        ((void)0)
+	#define LIB_DEBUG_PRINT_BUFFER_DECL(...)   ((void)0)
+	#define LIB_DEBUG_PRINT_BUFFER_LOCAL(...)  ((void)0)
 #endif
 
Index: src/tests/.expect/concurrent/preempt.txt
===================================================================
--- src/tests/.expect/concurrent/preempt.txt	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
+++ src/tests/.expect/concurrent/preempt.txt	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -0,0 +1,10 @@
+100
+200
+300
+400
+500
+600
+700
+800
+900
+1000
Index: src/tests/.expect/concurrent/sched-int-disjoint.txt
===================================================================
--- src/tests/.expect/concurrent/sched-int-disjoint.txt	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/tests/.expect/concurrent/sched-int-disjoint.txt	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -9,93 +9,3 @@
 9000
 10000
-11000
-12000
-13000
-14000
-15000
-16000
-17000
-18000
-19000
-20000
-21000
-22000
-23000
-24000
-25000
-26000
-27000
-28000
-29000
-30000
-31000
-32000
-33000
-34000
-35000
-36000
-37000
-38000
-39000
-40000
-41000
-42000
-43000
-44000
-45000
-46000
-47000
-48000
-49000
-50000
-51000
-52000
-53000
-54000
-55000
-56000
-57000
-58000
-59000
-60000
-61000
-62000
-63000
-64000
-65000
-66000
-67000
-68000
-69000
-70000
-71000
-72000
-73000
-74000
-75000
-76000
-77000
-78000
-79000
-80000
-81000
-82000
-83000
-84000
-85000
-86000
-87000
-88000
-89000
-90000
-91000
-92000
-93000
-94000
-95000
-96000
-97000
-98000
-99000
-100000
 All waiter done
Index: src/tests/preempt.c
===================================================================
--- src/tests/preempt.c	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
+++ src/tests/preempt.c	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -0,0 +1,42 @@
+#include <kernel>
+#include <thread>
+
+#ifndef PREEMPTION_RATE
+#define PREEMPTION_RATE 10_000ul
+#endif
+
+unsigned int default_preemption() {
+	return PREEMPTION_RATE;
+}
+
+static volatile int counter = 0;
+
+thread worker_t {
+	int value;
+};
+
+void ?{}( worker_t * this, int value ) {
+	this->value = value;
+}
+
+void main(worker_t * this) {
+	while(counter < 1000) {
+		if( (counter % 7) == this->value ) {
+			int next = __atomic_add_fetch_4(&counter, 1, __ATOMIC_SEQ_CST);
+			if( (next % 100) == 0 ) printf("%d\n", next);
+		}
+	}
+}
+
+int main(int argc, char* argv[]) {
+	processor p;
+	{
+		worker_t w0 = 0;
+		worker_t w1 = 1;
+		worker_t w2 = 2;
+		worker_t w3 = 3;
+		worker_t w4 = 4;
+		worker_t w5 = 5;
+		worker_t w6 = 6;
+	}
+}
Index: src/tests/preempt_longrun/Makefile.am
===================================================================
--- src/tests/preempt_longrun/Makefile.am	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/tests/preempt_longrun/Makefile.am	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -17,5 +17,5 @@
 repeats=10
 max_time=30
-preempt=10_000ul
+preempt=1_000ul
 
 REPEAT = ${abs_top_srcdir}/tools/repeat -s
@@ -25,5 +25,5 @@
 CC = @CFA_BINDIR@/@CFA_NAME@
 
-TESTS = barge block create disjoint processor stack wait yield
+TESTS = barge block create disjoint enter enter3 processor stack wait yield
 
 .INTERMEDIATE: ${TESTS}
Index: src/tests/preempt_longrun/Makefile.in
===================================================================
--- src/tests/preempt_longrun/Makefile.in	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/tests/preempt_longrun/Makefile.in	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -450,8 +450,8 @@
 repeats = 10
 max_time = 30
-preempt = 10_000ul
+preempt = 1_000ul
 REPEAT = ${abs_top_srcdir}/tools/repeat -s
 BUILD_FLAGS = -g -Wall -Wno-unused-function -quiet @CFA_FLAGS@ -debug -O2 -DPREEMPTION_RATE=${preempt}
-TESTS = barge block create disjoint processor stack wait yield
+TESTS = barge block create disjoint enter enter3 processor stack wait yield
 all: all-am
 
@@ -663,4 +663,18 @@
 	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
 	"$$tst" $(AM_TESTS_FD_REDIRECT)
+enter.log: enter
+	@p='enter'; \
+	b='enter'; \
+	$(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
+	--log-file $$b.log --trs-file $$b.trs \
+	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
+	"$$tst" $(AM_TESTS_FD_REDIRECT)
+enter3.log: enter3
+	@p='enter3'; \
+	b='enter3'; \
+	$(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
+	--log-file $$b.log --trs-file $$b.trs \
+	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
+	"$$tst" $(AM_TESTS_FD_REDIRECT)
 processor.log: processor
 	@p='processor'; \
Index: src/tests/preempt_longrun/create.c
===================================================================
--- src/tests/preempt_longrun/create.c	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/tests/preempt_longrun/create.c	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -10,11 +10,11 @@
 }
 
-thread Worker {};
+thread worker_t {};
 
-void main(Worker * this) {}
+void main(worker_t * this) {}
 
 int main(int argc, char* argv[]) {
-	for(int i = 0; i < 100_000ul; i++) {
-		Worker w;
+	for(int i = 0; i < 10_000ul; i++) {
+		worker_t w[7];
 	}
 }
Index: src/tests/preempt_longrun/enter.c
===================================================================
--- src/tests/preempt_longrun/enter.c	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
+++ src/tests/preempt_longrun/enter.c	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -0,0 +1,40 @@
+#include <kernel>
+#include <monitor>
+#include <thread>
+
+#undef N
+static const unsigned long N  = 70_000ul;
+
+#ifndef PREEMPTION_RATE
+#define PREEMPTION_RATE 10_000ul
+#endif
+
+unsigned int default_preemption() {
+	return PREEMPTION_RATE;
+}
+
+monitor mon_t {};
+
+mon_t mon;
+
+void foo( mon_t * mutex this ) {}
+
+thread worker_t {};
+
+void main( worker_t * this ) {
+	for( unsigned long i = 0; i < N; i++ ) {
+		foo( &mon );
+	}
+}
+
+extern "C" {
+static worker_t * workers;
+}
+
+int main(int argc, char * argv[] ) {
+	processor p;
+	{
+		worker_t w[7];
+		workers = w;
+	}
+}
Index: src/tests/preempt_longrun/enter3.c
===================================================================
--- src/tests/preempt_longrun/enter3.c	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
+++ src/tests/preempt_longrun/enter3.c	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -0,0 +1,40 @@
+#include <kernel>
+#include <monitor>
+#include <thread>
+
+#undef N
+static const unsigned long N  = 50_000ul;
+
+#ifndef PREEMPTION_RATE
+#define PREEMPTION_RATE 10_000ul
+#endif
+
+unsigned int default_preemption() {
+	return PREEMPTION_RATE;
+}
+
+monitor mon_t {};
+
+mon_t mon1, mon2, mon3;
+
+void foo( mon_t * mutex a, mon_t * mutex b, mon_t * mutex c ) {}
+
+thread worker_t {};
+
+void main( worker_t * this ) {
+	for( unsigned long i = 0; i < N; i++ ) {
+		foo( &mon1, &mon2, &mon3 );
+	}
+}
+
+extern "C" {
+static worker_t * workers;
+}
+
+int main(int argc, char * argv[] ) {
+	processor p;
+	{
+		worker_t w[7];
+		workers = w;
+	}
+}
Index: src/tests/preempt_longrun/processor.c
===================================================================
--- src/tests/preempt_longrun/processor.c	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/tests/preempt_longrun/processor.c	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -10,10 +10,10 @@
 }
 
-thread Worker {};
+thread worker_t {};
 
-void main(Worker * this) {}
+void main(worker_t * this) {}
 
 int main(int argc, char* argv[]) {
-	for(int i = 0; i < 100_000ul; i++) {
+	for(int i = 0; i < 10_000ul; i++) {
 		processor p;
 	}
Index: src/tests/preempt_longrun/stack.c
===================================================================
--- src/tests/preempt_longrun/stack.c	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/tests/preempt_longrun/stack.c	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -12,15 +12,15 @@
 }
 
-thread Worker {};
+thread worker_t {};
 
-void main(Worker * this) {
+void main(worker_t * this) {
 	volatile long p = 5_021_609ul;
 	volatile long a = 326_417ul;
 	volatile long n = 1l;
-	for (volatile long i = 0; i < p; i++) { 
-		n *= a; 
-		n %= p; 
+	for (volatile long i = 0; i < p; i++) {
+		n *= a;
+		n %= p;
 	}
-		
+
 	if( n != a ) {
 		abort();
@@ -28,8 +28,13 @@
 }
 
+extern "C" {
+static worker_t * workers;
+}
+
 int main(int argc, char* argv[]) {
 	processor p;
 	{
-		Worker w[7];
+		worker_t w[7];
+		workers = w;
 	}
 }
Index: src/tests/preempt_longrun/yield.c
===================================================================
--- src/tests/preempt_longrun/yield.c	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/tests/preempt_longrun/yield.c	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -10,10 +10,14 @@
 }
 
-thread Worker {};
+thread worker_t {};
 
-void main(Worker * this) {
-	for(int i = 0; i < 100_000ul; i++) {
+void main(worker_t * this) {
+	for(int i = 0; i < 325_000ul; i++) {
 		yield();
 	}
+}
+
+extern "C" {
+static worker_t * workers;
 }
 
@@ -21,5 +25,6 @@
 	processor p;
 	{
-		Worker w[7];
+		worker_t w[7];
+		workers = w;
 	}
 }
Index: src/tests/sched-int-block.c
===================================================================
--- src/tests/sched-int-block.c	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/tests/sched-int-block.c	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -6,5 +6,5 @@
 
 #ifndef N
-#define N 100_000
+#define N 10_000
 #endif
 
@@ -31,5 +31,5 @@
 //------------------------------------------------------------------------------
 void wait_op( global_data_t * mutex a, global_data_t * mutex b, unsigned i ) {
-	wait( &cond, (uintptr_t)this_thread() );
+	wait( &cond, (uintptr_t)this_thread );
 
 	yield( ((unsigned)rand48()) % 10 );
@@ -40,5 +40,5 @@
 	}
 
-	a->last_thread = b->last_thread = this_thread();
+	a->last_thread = b->last_thread = this_thread;
 
 	yield( ((unsigned)rand48()) % 10 );
@@ -56,5 +56,5 @@
 	yield( ((unsigned)rand48()) % 10 );
 
-	a->last_thread = b->last_thread = a->last_signaller = b->last_signaller = this_thread();
+	a->last_thread = b->last_thread = a->last_signaller = b->last_signaller = this_thread;
 
 	if( !is_empty( &cond ) ) {
@@ -86,5 +86,5 @@
 //------------------------------------------------------------------------------
 void barge_op( global_data_t * mutex a ) {
-	a->last_thread = this_thread();
+	a->last_thread = this_thread;
 }
 
Index: src/tests/sched-int-disjoint.c
===================================================================
--- src/tests/sched-int-disjoint.c	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/tests/sched-int-disjoint.c	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -5,5 +5,5 @@
 
 #ifndef N
-#define N 100_000
+#define N 10_000
 #endif
 
@@ -42,7 +42,7 @@
 
 void main( Barger * this ) {
-	while( !all_done ) { 
+	while( !all_done ) {
 		barge( &data );
-		yield(); 
+		yield();
 	}
 }
@@ -53,5 +53,5 @@
 	wait( &cond );
 	if( d->state != SIGNAL ) {
-		sout | "ERROR barging!" | endl; 
+		sout | "ERROR barging!" | endl;
 	}
 
@@ -85,5 +85,5 @@
 	bool running = data.counter < N && data.counter > 0;
 	if( data.state != SIGNAL && running ) {
-		sout | "ERROR Eager signal" | data.state | endl; 
+		sout | "ERROR Eager signal" | data.state | endl;
 	}
 }
@@ -92,7 +92,7 @@
 
 void main( Signaller * this ) {
-	while( !all_done ) { 
+	while( !all_done ) {
 		logic( &mut );
-		yield(); 
+		yield();
 	}
 }
@@ -111,4 +111,4 @@
 		sout | "All waiter done" | endl;
 		all_done = true;
-	}	
+	}
 }
Index: src/tests/sched-int-wait.c
===================================================================
--- src/tests/sched-int-wait.c	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/tests/sched-int-wait.c	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -50,14 +50,14 @@
 		unsigned action = (unsigned)rand48() % 4;
 		switch( action ) {
-			case 0: 
+			case 0:
 				signal( &condABC, &globalA, &globalB, &globalC );
 				break;
-			case 1: 
+			case 1:
 				signal( &condAB , &globalA, &globalB );
 				break;
-			case 2: 
+			case 2:
 				signal( &condBC , &globalB, &globalC );
 				break;
-			case 3: 
+			case 3:
 				signal( &condAC , &globalA, &globalC );
 				break;
@@ -67,5 +67,5 @@
 		}
 		yield();
-	}	
+	}
 }
 
Index: src/tests/thread.c
===================================================================
--- src/tests/thread.c	(revision 8b47e50537b2c9fbfcba7eea4459e2286c474e5c)
+++ src/tests/thread.c	(revision e9bb0e5c355dba790bfbe4c3dafa61cde70ede52)
@@ -4,15 +4,9 @@
 #include <thread>
 
-// thread First;
-// void main(First* this);
+thread First  { semaphore* lock; };
+thread Second { semaphore* lock; };
 
-// thread Second;
-// void main(Second* this);
-
-thread First  { signal_once* lock; };
-thread Second { signal_once* lock; };
-
-void ?{}( First * this, signal_once* lock ) { this->lock = lock; }
-void ?{}( Second * this, signal_once* lock ) { this->lock = lock; }
+void ?{}( First * this, semaphore* lock ) { this->lock = lock; }
+void ?{}( Second * this, semaphore* lock ) { this->lock = lock; }
 
 void main(First* this) {
@@ -21,9 +15,9 @@
 		yield();
 	}
-	signal(this->lock);
+	V(this->lock);
 }
 
 void main(Second* this) {
-	wait(this->lock);
+	P(this->lock);
 	for(int i = 0; i < 10; i++) {
 		sout | "Second : Suspend No." | i + 1 | endl;
@@ -34,5 +28,5 @@
 
 int main(int argc, char* argv[]) {
-	signal_once lock;
+	semaphore lock = { 0 };
 	sout | "User main begin" | endl;
 	{
