Index: src/libcfa/concurrency/alarm.c
===================================================================
--- src/libcfa/concurrency/alarm.c	(revision b72d4ed3c032c44b2e77aa0b0822f63ebaf1bea0)
+++ src/libcfa/concurrency/alarm.c	(revision 0614d14b45bad296918b3f83239838125bed84db)
@@ -16,7 +16,13 @@
 
 extern "C" {
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
 #include <time.h>
+#include <unistd.h>
 #include <sys/time.h>
 }
+
+#include "libhdr.h"
 
 #include "alarm.h"
@@ -31,8 +37,11 @@
 	timespec curr;
 	clock_gettime( CLOCK_REALTIME, &curr );
-	return ((__cfa_time_t)curr.tv_sec * TIMEGRAN) + curr.tv_nsec;
+	__cfa_time_t curr_time = ((__cfa_time_t)curr.tv_sec * TIMEGRAN) + curr.tv_nsec;
+	// LIB_DEBUG_PRINT_BUFFER_DECL( STDERR_FILENO, "Kernel : current time is %lu\n", curr_time );
+	return curr_time;
 }
 
 void __kernel_set_timer( __cfa_time_t alarm ) {
+	LIB_DEBUG_PRINT_BUFFER_DECL( STDERR_FILENO, "Kernel : set timer to %lu\n", (__cfa_time_t)alarm );
 	itimerval val;
 	val.it_value.tv_sec = alarm / TIMEGRAN;			// seconds
@@ -71,6 +80,15 @@
 }
 
+LIB_DEBUG_DO( bool validate( alarm_list_t * this ) {
+	alarm_node_t ** it = &this->head;
+	while( (*it) ) {
+		it = &(*it)->next;
+	}
+
+	return it == this->tail;
+})
+
 static inline void insert_at( alarm_list_t * this, alarm_node_t * n, __alarm_it_t p ) {
-	assert( !n->next );
+	verify( !n->next );
 	if( p == this->tail ) {
 		this->tail = &n->next;
@@ -80,4 +98,6 @@
 	}
 	*p = n;
+
+	verify( validate( this ) );
 }
 
@@ -89,4 +109,6 @@
 
 	insert_at( this, n, it );
+
+	verify( validate( this ) );
 }
 
@@ -100,4 +122,5 @@
 		head->next = NULL;
 	}
+	verify( validate( this ) );
 	return head;
 }
@@ -105,43 +128,60 @@
 static inline void remove_at( alarm_list_t * this, alarm_node_t * n, __alarm_it_t it ) {
 	verify( it );
-	verify( (*it)->next == n );
+	verify( (*it) == n );
 
-	(*it)->next = n->next;
+	(*it) = n->next;
 	if( !n-> next ) {
 		this->tail = it;
 	}
 	n->next = NULL;
+
+	verify( validate( this ) );
 }
 
 static inline void remove( alarm_list_t * this, alarm_node_t * n ) {
 	alarm_node_t ** it = &this->head;
-	while( (*it) && (*it)->next != n ) {
+	while( (*it) && (*it) != n ) {
 		it = &(*it)->next;
 	}
 
+	verify( validate( this ) );
+
 	if( *it ) { remove_at( this, n, it ); }
+
+	verify( validate( this ) );
 }
 
 void register_self( alarm_node_t * this ) {
 	disable_interrupts();
-	assert( !systemProcessor->pending_alarm );
-	lock( &systemProcessor->alarm_lock );
+	verify( !systemProcessor->pending_alarm );
+	lock( &systemProcessor->alarm_lock DEBUG_CTX2 );
 	{
+		verify( validate( &systemProcessor->alarms ) );
+		bool first = !systemProcessor->alarms.head;
+
 		insert( &systemProcessor->alarms, this );
 		if( systemProcessor->pending_alarm ) {
 			tick_preemption();
 		}
+		if( first ) {
+			__kernel_set_timer( systemProcessor->alarms.head->alarm - __kernel_get_time() );
+		}
 	}
 	unlock( &systemProcessor->alarm_lock );
 	this->set = true;
-	enable_interrupts();
+	enable_interrupts( DEBUG_CTX );
 }
 
 void unregister_self( alarm_node_t * this ) {
+	// LIB_DEBUG_PRINT_BUFFER_DECL( STDERR_FILENO, "Kernel : unregister %p start\n", this );
 	disable_interrupts();
-	lock( &systemProcessor->alarm_lock );
-	remove( &systemProcessor->alarms, this );
+	lock( &systemProcessor->alarm_lock DEBUG_CTX2 );
+	{
+		verify( validate( &systemProcessor->alarms ) );
+		remove( &systemProcessor->alarms, this );
+	}
 	unlock( &systemProcessor->alarm_lock );
-	disable_interrupts();
+	enable_interrupts( DEBUG_CTX );
 	this->set = false;
+	// LIB_DEBUG_PRINT_BUFFER_LOCAL( STDERR_FILENO, "Kernel : unregister %p end\n", this );
 }
Index: src/libcfa/concurrency/coroutine
===================================================================
--- src/libcfa/concurrency/coroutine	(revision b72d4ed3c032c44b2e77aa0b0822f63ebaf1bea0)
+++ src/libcfa/concurrency/coroutine	(revision 0614d14b45bad296918b3f83239838125bed84db)
@@ -63,5 +63,5 @@
 
 // Get current coroutine
-coroutine_desc * this_coroutine(void);
+extern volatile thread_local coroutine_desc * this_coroutine;
 
 // Private wrappers for context switch and stack creation
@@ -71,5 +71,5 @@
 // Suspend implementation inlined for performance
 static inline void suspend() {
-	coroutine_desc * src = this_coroutine();		// optimization
+	coroutine_desc * src = this_coroutine;		// optimization
 
 	assertf( src->last != 0,
@@ -88,5 +88,5 @@
 forall(dtype T | is_coroutine(T))
 static inline void resume(T * cor) {
-	coroutine_desc * src = this_coroutine();		// optimization
+	coroutine_desc * src = this_coroutine;		// optimization
 	coroutine_desc * dst = get_coroutine(cor);
 
@@ -112,5 +112,5 @@
 
 static inline void resume(coroutine_desc * dst) {
-	coroutine_desc * src = this_coroutine();		// optimization
+	coroutine_desc * src = this_coroutine;		// optimization
 
 	// not resuming self ?
Index: src/libcfa/concurrency/coroutine.c
===================================================================
--- src/libcfa/concurrency/coroutine.c	(revision b72d4ed3c032c44b2e77aa0b0822f63ebaf1bea0)
+++ src/libcfa/concurrency/coroutine.c	(revision 0614d14b45bad296918b3f83239838125bed84db)
@@ -32,5 +32,5 @@
 #include "invoke.h"
 
-extern thread_local processor * this_processor;
+extern volatile thread_local processor * this_processor;
 
 //-----------------------------------------------------------------------------
@@ -44,5 +44,5 @@
 // Coroutine ctors and dtors
 void ?{}(coStack_t* this) {
-	this->size		= 10240;	// size of stack
+	this->size		= 65000;	// size of stack
 	this->storage	= NULL;	// pointer to stack
 	this->limit		= NULL;	// stack grows towards stack limit
@@ -50,5 +50,5 @@
 	this->context	= NULL;	// address of cfa_context_t
 	this->top		= NULL;	// address of top of storage
-	this->userStack	= false;	
+	this->userStack	= false;
 }
 
@@ -106,12 +106,13 @@
 
 	// set state of current coroutine to inactive
-	src->state = Inactive;
+	src->state = src->state == Halted ? Halted : Inactive;
 
 	// set new coroutine that task is executing
-	this_processor->current_coroutine = dst;
+	this_coroutine = dst;
 
 	// context switch to specified coroutine
+	assert( src->stack.context );
 	CtxSwitch( src->stack.context, dst->stack.context );
-	// when CtxSwitch returns we are back in the src coroutine		
+	// when CtxSwitch returns we are back in the src coroutine
 
 	// set state of new coroutine to active
@@ -131,5 +132,5 @@
 		this->size = libCeiling( storageSize, 16 );
 		// use malloc/memalign because "new" raises an exception for out-of-memory
-		
+
 		// assume malloc has 8 byte alignment so add 8 to allow rounding up to 16 byte alignment
 		LIB_DEBUG_DO( this->storage = memalign( pageSize, cxtSize + this->size + pageSize ) );
Index: src/libcfa/concurrency/invoke.c
===================================================================
--- src/libcfa/concurrency/invoke.c	(revision b72d4ed3c032c44b2e77aa0b0822f63ebaf1bea0)
+++ src/libcfa/concurrency/invoke.c	(revision 0614d14b45bad296918b3f83239838125bed84db)
@@ -29,9 +29,11 @@
 
 extern void __suspend_internal(void);
-extern void __leave_monitor_desc( struct monitor_desc * this );
+extern void __leave_thread_monitor( struct thread_desc * this );
+extern void disable_interrupts();
+extern void enable_interrupts( DEBUG_CTX_PARAM );
 
 void CtxInvokeCoroutine(
-      void (*main)(void *), 
-      struct coroutine_desc *(*get_coroutine)(void *), 
+      void (*main)(void *),
+      struct coroutine_desc *(*get_coroutine)(void *),
       void *this
 ) {
@@ -56,20 +58,28 @@
 
 void CtxInvokeThread(
-      void (*dtor)(void *), 
-      void (*main)(void *), 
-      struct thread_desc *(*get_thread)(void *), 
+      void (*dtor)(void *),
+      void (*main)(void *),
+      struct thread_desc *(*get_thread)(void *),
       void *this
 ) {
+      // First suspend, once the thread arrives here,
+      // the function pointer to main can be invalidated without risk
       __suspend_internal();
 
+      // Fetch the thread handle from the user defined thread structure
       struct thread_desc* thrd = get_thread( this );
-      struct coroutine_desc* cor = &thrd->cor;
-      struct monitor_desc* mon = &thrd->mon;
-      cor->state = Active;
 
-      // LIB_DEBUG_PRINTF("Invoke Thread : invoking main %p (args %p)\n", main, this);
+      // Officially start the thread by enabling preemption
+      enable_interrupts( DEBUG_CTX );
+
+      // Call the main of the thread
       main( this );
 
-      __leave_monitor_desc( mon );
+      // To exit a thread we must :
+      // 1 - Mark it as halted
+      // 2 - Leave its monitor
+      // 3 - Disable the interupts
+      // The order of these 3 operations is very important
+      __leave_thread_monitor( thrd );
 
       //Final suspend, should never return
@@ -80,7 +90,7 @@
 
 void CtxStart(
-      void (*main)(void *), 
-      struct coroutine_desc *(*get_coroutine)(void *), 
-      void *this, 
+      void (*main)(void *),
+      struct coroutine_desc *(*get_coroutine)(void *),
+      void *this,
       void (*invoke)(void *)
 ) {
@@ -108,5 +118,5 @@
 	((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->rturn = invoke;
       ((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->mxcr = 0x1F80; //Vol. 2A 3-520
-      ((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->fcw = 0x037F;  //Vol. 1 8-7 
+      ((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->fcw = 0x037F;  //Vol. 1 8-7
 
 #elif defined( __x86_64__ )
@@ -128,5 +138,5 @@
       ((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->fixedRegisters[1] = invoke;
       ((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->mxcr = 0x1F80; //Vol. 2A 3-520
-      ((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->fcw = 0x037F;  //Vol. 1 8-7 
+      ((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->fcw = 0x037F;  //Vol. 1 8-7
 #else
       #error Only __i386__ and __x86_64__ is supported for threads in cfa
Index: src/libcfa/concurrency/invoke.h
===================================================================
--- src/libcfa/concurrency/invoke.h	(revision b72d4ed3c032c44b2e77aa0b0822f63ebaf1bea0)
+++ src/libcfa/concurrency/invoke.h	(revision 0614d14b45bad296918b3f83239838125bed84db)
@@ -31,4 +31,8 @@
       struct spinlock {
             volatile int lock;
+            #ifdef __CFA_DEBUG__
+                  const char * prev_name;
+                  void* prev_thrd;
+            #endif
       };
 
@@ -83,5 +87,4 @@
             struct __thread_queue_t entry_queue;      // queue of threads that are blocked waiting for the monitor
             struct __condition_stack_t signal_stack;  // stack of conditions to run next once we exit the monitor
-            struct monitor_desc * stack_owner;        // if bulk acquiring was used we need to synchronize signals with an other monitor
             unsigned int recursion;                   // monitor routines can be called recursively, we need to keep track of that
       };
@@ -99,5 +102,5 @@
 #ifndef _INVOKE_PRIVATE_H_
 #define _INVOKE_PRIVATE_H_
-      
+
       struct machine_context_t {
             void *SP;
Index: src/libcfa/concurrency/kernel
===================================================================
--- src/libcfa/concurrency/kernel	(revision b72d4ed3c032c44b2e77aa0b0822f63ebaf1bea0)
+++ src/libcfa/concurrency/kernel	(revision 0614d14b45bad296918b3f83239838125bed84db)
@@ -28,7 +28,8 @@
 //-----------------------------------------------------------------------------
 // Locks
-bool try_lock( spinlock * );
-void lock( spinlock * );
-void unlock( spinlock * );
+bool try_lock  ( spinlock * DEBUG_CTX_PARAM2 );
+void lock      ( spinlock * DEBUG_CTX_PARAM2 );
+void lock_yield( spinlock * DEBUG_CTX_PARAM2 );
+void unlock    ( spinlock * );
 
 struct signal_once {
@@ -68,5 +69,5 @@
 	unsigned short thrd_count;
 };
-static inline void ?{}(FinishAction * this) { 
+static inline void ?{}(FinishAction * this) {
 	this->action_code = No_Action;
 	this->thrd = NULL;
@@ -78,8 +79,6 @@
 	struct processorCtx_t * runner;
 	cluster * cltr;
-	coroutine_desc * current_coroutine;
-	thread_desc * current_thread;
 	pthread_t kernel_thread;
-	
+
 	signal_once terminated;
 	volatile bool is_terminated;
@@ -90,7 +89,7 @@
 	unsigned int preemption;
 
-	unsigned short disable_preempt_count;
+	bool pending_preemption;
 
-	bool pending_preemption;
+	char * last_enable;
 };
 
Index: src/libcfa/concurrency/kernel.c
===================================================================
--- src/libcfa/concurrency/kernel.c	(revision b72d4ed3c032c44b2e77aa0b0822f63ebaf1bea0)
+++ src/libcfa/concurrency/kernel.c	(revision 0614d14b45bad296918b3f83239838125bed84db)
@@ -15,12 +15,5 @@
 //
 
-#include "startup.h"
-
-//Start and stop routine for the kernel, declared first to make sure they run first
-void kernel_startup(void)  __attribute__(( constructor( STARTUP_PRIORITY_KERNEL ) ));
-void kernel_shutdown(void) __attribute__(( destructor ( STARTUP_PRIORITY_KERNEL ) ));
-
-//Header
-#include "kernel_private.h"
+#include "libhdr.h"
 
 //C Includes
@@ -35,10 +28,15 @@
 
 //CFA Includes
-#include "libhdr.h"
+#include "kernel_private.h"
 #include "preemption.h"
+#include "startup.h"
 
 //Private includes
 #define __CFA_INVOKE_PRIVATE__
 #include "invoke.h"
+
+//Start and stop routine for the kernel, declared first to make sure they run first
+void kernel_startup(void)  __attribute__(( constructor( STARTUP_PRIORITY_KERNEL ) ));
+void kernel_shutdown(void) __attribute__(( destructor ( STARTUP_PRIORITY_KERNEL ) ));
 
 //-----------------------------------------------------------------------------
@@ -59,18 +57,13 @@
 // Global state
 
-thread_local processor * this_processor;
-
-coroutine_desc * this_coroutine(void) {
-	return this_processor->current_coroutine;
-}
-
-thread_desc * this_thread(void) {
-	return this_processor->current_thread;
-}
+volatile thread_local processor * this_processor;
+volatile thread_local coroutine_desc * this_coroutine;
+volatile thread_local thread_desc * this_thread;
+volatile thread_local unsigned short disable_preempt_count = 1;
 
 //-----------------------------------------------------------------------------
 // Main thread construction
 struct current_stack_info_t {
-	machine_context_t ctx;	
+	machine_context_t ctx;
 	unsigned int size;		// size of stack
 	void *base;				// base of stack
@@ -106,5 +99,5 @@
 
 void ?{}( coroutine_desc * this, current_stack_info_t * info) {
-	(&this->stack){ info };	
+	(&this->stack){ info };
 	this->name = "Main Thread";
 	this->errno_ = 0;
@@ -136,11 +129,8 @@
 void ?{}(processor * this, cluster * cltr) {
 	this->cltr = cltr;
-	this->current_coroutine = NULL;
-	this->current_thread = NULL;
 	(&this->terminated){};
 	this->is_terminated = false;
 	this->preemption_alarm = NULL;
 	this->preemption = default_preemption();
-	this->disable_preempt_count = 1;		//Start with interrupts disabled
 	this->pending_preemption = false;
 
@@ -150,15 +140,17 @@
 void ?{}(processor * this, cluster * cltr, processorCtx_t * runner) {
 	this->cltr = cltr;
-	this->current_coroutine = NULL;
-	this->current_thread = NULL;
 	(&this->terminated){};
 	this->is_terminated = false;
-	this->disable_preempt_count = 0;
+	this->preemption_alarm = NULL;
+	this->preemption = default_preemption();
 	this->pending_preemption = false;
+	this->kernel_thread = pthread_self();
 
 	this->runner = runner;
-	LIB_DEBUG_PRINT_SAFE("Kernel : constructing processor context %p\n", runner);
+	LIB_DEBUG_PRINT_SAFE("Kernel : constructing system processor context %p\n", runner);
 	runner{ this };
 }
+
+LIB_DEBUG_DO( bool validate( alarm_list_t * this ); )
 
 void ?{}(system_proc_t * this, cluster * cltr, processorCtx_t * runner) {
@@ -168,4 +160,6 @@
 
 	(&this->proc){ cltr, runner };
+
+	verify( validate( &this->alarms ) );
 }
 
@@ -184,5 +178,5 @@
 
 void ^?{}(cluster * this) {
-	
+
 }
 
@@ -203,5 +197,5 @@
 
 		thread_desc * readyThread = NULL;
-		for( unsigned int spin_count = 0; ! this->is_terminated; spin_count++ ) 
+		for( unsigned int spin_count = 0; ! this->is_terminated; spin_count++ )
 		{
 			readyThread = nextThread( this->cltr );
@@ -209,5 +203,9 @@
 			if(readyThread)
 			{
+				verify( disable_preempt_count > 0 );
+
 				runThread(this, readyThread);
+
+				verify( disable_preempt_count > 0 );
 
 				//Some actions need to be taken from the kernel
@@ -229,15 +227,15 @@
 }
 
-// runThread runs a thread by context switching 
-// from the processor coroutine to the target thread 
+// runThread runs a thread by context switching
+// from the processor coroutine to the target thread
 void runThread(processor * this, thread_desc * dst) {
 	coroutine_desc * proc_cor = get_coroutine(this->runner);
 	coroutine_desc * thrd_cor = get_coroutine(dst);
-	
+
 	//Reset the terminating actions here
 	this->finish.action_code = No_Action;
 
 	//Update global state
-	this->current_thread = dst;
+	this_thread = dst;
 
 	// Context Switch to the thread
@@ -246,5 +244,5 @@
 }
 
-// Once a thread has finished running, some of 
+// Once a thread has finished running, some of
 // its final actions must be executed from the kernel
 void finishRunning(processor * this) {
@@ -256,5 +254,5 @@
 	}
 	else if( this->finish.action_code == Release_Schedule ) {
-		unlock( this->finish.lock );		
+		unlock( this->finish.lock );
 		ScheduleThread( this->finish.thrd );
 	}
@@ -289,7 +287,10 @@
 	processor * proc = (processor *) arg;
 	this_processor = proc;
+	this_coroutine = NULL;
+	this_thread = NULL;
+	disable_preempt_count = 1;
 	// SKULLDUGGERY: We want to create a context for the processor coroutine
 	// which is needed for the 2-step context switch. However, there is no reason
-	// to waste the perfectly valid stack create by pthread. 
+	// to waste the perfectly valid stack create by pthread.
 	current_stack_info_t info;
 	machine_context_t ctx;
@@ -300,13 +301,13 @@
 
 	//Set global state
-	proc->current_coroutine = &proc->runner->__cor;
-	proc->current_thread = NULL;
+	this_coroutine = &proc->runner->__cor;
+	this_thread = NULL;
 
 	//We now have a proper context from which to schedule threads
 	LIB_DEBUG_PRINT_SAFE("Kernel : core %p created (%p, %p)\n", proc, proc->runner, &ctx);
 
-	// SKULLDUGGERY: Since the coroutine doesn't have its own stack, we can't 
-	// resume it to start it like it normally would, it will just context switch 
-	// back to here. Instead directly call the main since we already are on the 
+	// SKULLDUGGERY: Since the coroutine doesn't have its own stack, we can't
+	// resume it to start it like it normally would, it will just context switch
+	// back to here. Instead directly call the main since we already are on the
 	// appropriate stack.
 	proc_cor_storage.__cor.state = Active;
@@ -315,5 +316,5 @@
 
 	// Main routine of the core returned, the core is now fully terminated
-	LIB_DEBUG_PRINT_SAFE("Kernel : core %p main ended (%p)\n", proc, proc->runner);	
+	LIB_DEBUG_PRINT_SAFE("Kernel : core %p main ended (%p)\n", proc, proc->runner);
 
 	return NULL;
@@ -322,8 +323,8 @@
 void start(processor * this) {
 	LIB_DEBUG_PRINT_SAFE("Kernel : Starting core %p\n", this);
-	
+
 	pthread_create( &this->kernel_thread, NULL, CtxInvokeProcessor, (void*)this );
 
-	LIB_DEBUG_PRINT_SAFE("Kernel : core %p started\n", this);	
+	LIB_DEBUG_PRINT_SAFE("Kernel : core %p started\n", this);
 }
 
@@ -331,51 +332,89 @@
 // Scheduler routines
 void ScheduleThread( thread_desc * thrd ) {
-	if( !thrd ) return;
+	// if( !thrd ) return;
+	assert( thrd );
+	assert( thrd->cor.state != Halted );
+
+	verify( disable_preempt_count > 0 );
 
 	verifyf( thrd->next == NULL, "Expected null got %p", thrd->next );
-	
-	lock( &systemProcessor->proc.cltr->lock );
+
+	lock( &systemProcessor->proc.cltr->lock DEBUG_CTX2 );
 	append( &systemProcessor->proc.cltr->ready_queue, thrd );
 	unlock( &systemProcessor->proc.cltr->lock );
+
+	verify( disable_preempt_count > 0 );
 }
 
 thread_desc * nextThread(cluster * this) {
-	lock( &this->lock );
+	verify( disable_preempt_count > 0 );
+	lock( &this->lock DEBUG_CTX2 );
 	thread_desc * head = pop_head( &this->ready_queue );
 	unlock( &this->lock );
+	verify( disable_preempt_count > 0 );
 	return head;
 }
 
-void ScheduleInternal() {
+void BlockInternal() {
+	disable_interrupts();
+	verify( disable_preempt_count > 0 );
 	suspend();
-}
-
-void ScheduleInternal( spinlock * lock ) {
+	verify( disable_preempt_count > 0 );
+	enable_interrupts( DEBUG_CTX );
+}
+
+void BlockInternal( spinlock * lock ) {
+	disable_interrupts();
 	this_processor->finish.action_code = Release;
 	this_processor->finish.lock = lock;
+
+	verify( disable_preempt_count > 0 );
 	suspend();
-}
-
-void ScheduleInternal( thread_desc * thrd ) {
+	verify( disable_preempt_count > 0 );
+
+	enable_interrupts( DEBUG_CTX );
+}
+
+void BlockInternal( thread_desc * thrd ) {
+	disable_interrupts();
+	assert( thrd->cor.state != Halted );
 	this_processor->finish.action_code = Schedule;
 	this_processor->finish.thrd = thrd;
+
+	verify( disable_preempt_count > 0 );
 	suspend();
-}
-
-void ScheduleInternal( spinlock * lock, thread_desc * thrd ) {
+	verify( disable_preempt_count > 0 );
+
+	enable_interrupts( DEBUG_CTX );
+}
+
+void BlockInternal( spinlock * lock, thread_desc * thrd ) {
+	disable_interrupts();
 	this_processor->finish.action_code = Release_Schedule;
 	this_processor->finish.lock = lock;
 	this_processor->finish.thrd = thrd;
+
+	verify( disable_preempt_count > 0 );
 	suspend();
-}
-
-void ScheduleInternal(spinlock ** locks, unsigned short count) {
+	verify( disable_preempt_count > 0 );
+
+	enable_interrupts( DEBUG_CTX );
+}
+
+void BlockInternal(spinlock ** locks, unsigned short count) {
+	disable_interrupts();
 	this_processor->finish.action_code = Release_Multi;
 	this_processor->finish.locks = locks;
 	this_processor->finish.lock_count = count;
+
+	verify( disable_preempt_count > 0 );
 	suspend();
-}
-
-void ScheduleInternal(spinlock ** locks, unsigned short lock_count, thread_desc ** thrds, unsigned short thrd_count) {
+	verify( disable_preempt_count > 0 );
+
+	enable_interrupts( DEBUG_CTX );
+}
+
+void BlockInternal(spinlock ** locks, unsigned short lock_count, thread_desc ** thrds, unsigned short thrd_count) {
+	disable_interrupts();
 	this_processor->finish.action_code = Release_Multi_Schedule;
 	this_processor->finish.locks = locks;
@@ -383,5 +422,10 @@
 	this_processor->finish.thrds = thrds;
 	this_processor->finish.thrd_count = thrd_count;
+
+	verify( disable_preempt_count > 0 );
 	suspend();
+	verify( disable_preempt_count > 0 );
+
+	enable_interrupts( DEBUG_CTX );
 }
 
@@ -392,8 +436,8 @@
 // Kernel boot procedures
 void kernel_startup(void) {
-	LIB_DEBUG_PRINT_SAFE("Kernel : Starting\n");	
+	LIB_DEBUG_PRINT_SAFE("Kernel : Starting\n");
 
 	// Start by initializing the main thread
-	// SKULLDUGGERY: the mainThread steals the process main thread 
+	// SKULLDUGGERY: the mainThread steals the process main thread
 	// which will then be scheduled by the systemProcessor normally
 	mainThread = (thread_desc *)&mainThread_storage;
@@ -403,7 +447,4 @@
 	LIB_DEBUG_PRINT_SAFE("Kernel : Main thread ready\n");
 
-	// Enable preemption
-	kernel_start_preemption();
-
 	// Initialize the system cluster
 	systemCluster = (cluster *)&systemCluster_storage;
@@ -417,5 +458,5 @@
 	systemProcessor{ systemCluster, (processorCtx_t *)&systemProcessorCtx_storage };
 
-	// Add the main thread to the ready queue 
+	// Add the main thread to the ready queue
 	// once resume is called on systemProcessor->runner the mainThread needs to be scheduled like any normal thread
 	ScheduleThread(mainThread);
@@ -423,10 +464,14 @@
 	//initialize the global state variables
 	this_processor = &systemProcessor->proc;
-	this_processor->current_thread = mainThread;
-	this_processor->current_coroutine = &mainThread->cor;
+	this_thread = mainThread;
+	this_coroutine = &mainThread->cor;
+	disable_preempt_count = 1;
+
+	// Enable preemption
+	kernel_start_preemption();
 
 	// SKULLDUGGERY: Force a context switch to the system processor to set the main thread's context to the current UNIX
 	// context. Hence, the main thread does not begin through CtxInvokeThread, like all other threads. The trick here is that
-	// mainThread is on the ready queue when this call is made. 
+	// mainThread is on the ready queue when this call is made.
 	resume( systemProcessor->proc.runner );
 
@@ -435,8 +480,12 @@
 	// THE SYSTEM IS NOW COMPLETELY RUNNING
 	LIB_DEBUG_PRINT_SAFE("Kernel : Started\n--------------------------------------------------\n\n");
+
+	enable_interrupts( DEBUG_CTX );
 }
 
 void kernel_shutdown(void) {
 	LIB_DEBUG_PRINT_SAFE("\n--------------------------------------------------\nKernel : Shutting down\n");
+
+	disable_interrupts();
 
 	// SKULLDUGGERY: Notify the systemProcessor it needs to terminates.
@@ -448,4 +497,7 @@
 	// THE SYSTEM IS NOW COMPLETELY STOPPED
 
+	// Disable preemption
+	kernel_stop_preemption();
+
 	// Destroy the system processor and its context in reverse order of construction
 	// These were manually constructed so we need manually destroy them
@@ -457,5 +509,5 @@
 	^(mainThread){};
 
-	LIB_DEBUG_PRINT_SAFE("Kernel : Shutdown complete\n");	
+	LIB_DEBUG_PRINT_SAFE("Kernel : Shutdown complete\n");
 }
 
@@ -467,5 +519,5 @@
 	// abort cannot be recursively entered by the same or different processors because all signal handlers return when
 	// the globalAbort flag is true.
-	lock( &kernel_abort_lock );
+	lock( &kernel_abort_lock DEBUG_CTX2 );
 
 	// first task to abort ?
@@ -473,8 +525,8 @@
 		kernel_abort_called = true;
 		unlock( &kernel_abort_lock );
-	} 
+	}
 	else {
 		unlock( &kernel_abort_lock );
-		
+
 		sigset_t mask;
 		sigemptyset( &mask );
@@ -482,8 +534,8 @@
 		sigaddset( &mask, SIGUSR1 );			// block SIGUSR1 signals
 		sigsuspend( &mask );				// block the processor to prevent further damage during abort
-		_exit( EXIT_FAILURE );				// if processor unblocks before it is killed, terminate it		
-	}
-
-	return this_thread();
+		_exit( EXIT_FAILURE );				// if processor unblocks before it is killed, terminate it
+	}
+
+	return this_thread;
 }
 
@@ -494,8 +546,8 @@
 	__lib_debug_write( STDERR_FILENO, abort_text, len );
 
-	if ( thrd != this_coroutine() ) {
-		len = snprintf( abort_text, abort_text_size, " in coroutine %.256s (%p).\n", this_coroutine()->name, this_coroutine() );
+	if ( thrd != this_coroutine ) {
+		len = snprintf( abort_text, abort_text_size, " in coroutine %.256s (%p).\n", this_coroutine->name, this_coroutine );
 		__lib_debug_write( STDERR_FILENO, abort_text, len );
-	} 
+	}
 	else {
 		__lib_debug_write( STDERR_FILENO, ".\n", 2 );
@@ -505,9 +557,9 @@
 extern "C" {
 	void __lib_debug_acquire() {
-		lock(&kernel_debug_lock);
+		lock( &kernel_debug_lock DEBUG_CTX2 );
 	}
 
 	void __lib_debug_release() {
-		unlock(&kernel_debug_lock);
+		unlock( &kernel_debug_lock );
 	}
 }
@@ -525,13 +577,29 @@
 }
 
-bool try_lock( spinlock * this ) {
+bool try_lock( spinlock * this DEBUG_CTX_PARAM2 ) {
 	return this->lock == 0 && __sync_lock_test_and_set_4( &this->lock, 1 ) == 0;
 }
 
-void lock( spinlock * this ) {
+void lock( spinlock * this DEBUG_CTX_PARAM2 ) {
 	for ( unsigned int i = 1;; i += 1 ) {
-	  	if ( this->lock == 0 && __sync_lock_test_and_set_4( &this->lock, 1 ) == 0 ) break;
-	}
-}
+		if ( this->lock == 0 && __sync_lock_test_and_set_4( &this->lock, 1 ) == 0 ) { break; }
+	}
+	LIB_DEBUG_DO(
+		this->prev_name = caller;
+		this->prev_thrd = this_thread;
+	)
+}
+
+void lock_yield( spinlock * this DEBUG_CTX_PARAM2 ) {
+	for ( unsigned int i = 1;; i += 1 ) {
+		if ( this->lock == 0 && __sync_lock_test_and_set_4( &this->lock, 1 ) == 0 ) { break; }
+		yield();
+	}
+	LIB_DEBUG_DO(
+		this->prev_name = caller;
+		this->prev_thrd = this_thread;
+	)
+}
+
 
 void unlock( spinlock * this ) {
@@ -547,22 +615,25 @@
 
 void wait( signal_once * this ) {
-	lock( &this->lock );
+	lock( &this->lock DEBUG_CTX2 );
 	if( !this->cond ) {
-		append( &this->blocked, this_thread() );
-		ScheduleInternal( &this->lock );
-		lock( &this->lock );
-	}
-	unlock( &this->lock );
+		append( &this->blocked, (thread_desc*)this_thread );
+		BlockInternal( &this->lock );
+	}
+	else {
+		unlock( &this->lock );
+	}
 }
 
 void signal( signal_once * this ) {
-	lock( &this->lock );
+	lock( &this->lock DEBUG_CTX2 );
 	{
 		this->cond = true;
 
+		disable_interrupts();
 		thread_desc * it;
 		while( it = pop_head( &this->blocked) ) {
 			ScheduleThread( it );
 		}
+		enable_interrupts( DEBUG_CTX );
 	}
 	unlock( &this->lock );
@@ -590,5 +661,5 @@
 		}
 		head->next = NULL;
-	}	
+	}
 	return head;
 }
@@ -609,5 +680,5 @@
 		this->top = top->next;
 		top->next = NULL;
-	}	
+	}
 	return top;
 }
Index: src/libcfa/concurrency/kernel_private.h
===================================================================
--- src/libcfa/concurrency/kernel_private.h	(revision b72d4ed3c032c44b2e77aa0b0822f63ebaf1bea0)
+++ src/libcfa/concurrency/kernel_private.h	(revision 0614d14b45bad296918b3f83239838125bed84db)
@@ -18,4 +18,6 @@
 #define KERNEL_PRIVATE_H
 
+#include "libhdr.h"
+
 #include "kernel"
 #include "thread"
@@ -23,17 +25,30 @@
 #include "alarm.h"
 
-#include "libhdr.h"
 
 //-----------------------------------------------------------------------------
 // Scheduler
+
+extern "C" {
+	void disable_interrupts();
+	void enable_interrupts_noRF();
+	void enable_interrupts( DEBUG_CTX_PARAM );
+}
+
 void ScheduleThread( thread_desc * );
+static inline void WakeThread( thread_desc * thrd ) {
+	if( !thrd ) return;
+
+	disable_interrupts();
+	ScheduleThread( thrd );
+	enable_interrupts( DEBUG_CTX );
+}
 thread_desc * nextThread(cluster * this);
 
-void ScheduleInternal(void);
-void ScheduleInternal(spinlock * lock);
-void ScheduleInternal(thread_desc * thrd);
-void ScheduleInternal(spinlock * lock, thread_desc * thrd);
-void ScheduleInternal(spinlock ** locks, unsigned short count);
-void ScheduleInternal(spinlock ** locks, unsigned short count, thread_desc ** thrds, unsigned short thrd_count);
+void BlockInternal(void);
+void BlockInternal(spinlock * lock);
+void BlockInternal(thread_desc * thrd);
+void BlockInternal(spinlock * lock, thread_desc * thrd);
+void BlockInternal(spinlock ** locks, unsigned short count);
+void BlockInternal(spinlock ** locks, unsigned short count, thread_desc ** thrds, unsigned short thrd_count);
 
 //-----------------------------------------------------------------------------
@@ -60,24 +75,8 @@
 extern cluster * systemCluster;
 extern system_proc_t * systemProcessor;
-extern thread_local processor * this_processor;
-
-static inline void disable_interrupts() {
-	__attribute__((unused)) unsigned short prev = __atomic_fetch_add_2( &this_processor->disable_preempt_count, 1, __ATOMIC_SEQ_CST );
-	assert( prev != (unsigned short) -1 );
-}
-
-static inline void enable_interrupts_noRF() {
-	__attribute__((unused)) unsigned short prev = __atomic_fetch_add_2( &this_processor->disable_preempt_count, -1, __ATOMIC_SEQ_CST );
-	verify( prev != (unsigned short) 0 );
-}
-
-static inline void enable_interrupts() {
-	__attribute__((unused)) unsigned short prev = __atomic_fetch_add_2( &this_processor->disable_preempt_count, -1, __ATOMIC_SEQ_CST );
-	verify( prev != (unsigned short) 0 );
-	if( prev == 1 && this_processor->pending_preemption ) {
-		ScheduleInternal( this_processor->current_thread );
-		this_processor->pending_preemption = false;
-	}
-}
+extern volatile thread_local processor * this_processor;
+extern volatile thread_local coroutine_desc * this_coroutine;
+extern volatile thread_local thread_desc * this_thread;
+extern volatile thread_local unsigned short disable_preempt_count;
 
 //-----------------------------------------------------------------------------
Index: src/libcfa/concurrency/monitor
===================================================================
--- src/libcfa/concurrency/monitor	(revision b72d4ed3c032c44b2e77aa0b0822f63ebaf1bea0)
+++ src/libcfa/concurrency/monitor	(revision 0614d14b45bad296918b3f83239838125bed84db)
@@ -26,5 +26,4 @@
 static inline void ?{}(monitor_desc * this) {
 	this->owner = NULL;
-	this->stack_owner = NULL;
 	this->recursion = 0;
 }
Index: src/libcfa/concurrency/monitor.c
===================================================================
--- src/libcfa/concurrency/monitor.c	(revision b72d4ed3c032c44b2e77aa0b0822f63ebaf1bea0)
+++ src/libcfa/concurrency/monitor.c	(revision 0614d14b45bad296918b3f83239838125bed84db)
@@ -19,6 +19,6 @@
 #include <stdlib>
 
+#include "libhdr.h"
 #include "kernel_private.h"
-#include "libhdr.h"
 
 //-----------------------------------------------------------------------------
@@ -44,9 +44,9 @@
 
 extern "C" {
-	void __enter_monitor_desc(monitor_desc * this) {
-		lock( &this->lock );
-		thread_desc * thrd = this_thread();
-
-		LIB_DEBUG_PRINT_SAFE("%p Entering %p (o: %p, r: %i)\n", thrd, this, this->owner, this->recursion);
+	void __enter_monitor_desc( monitor_desc * this ) {
+		lock_yield( &this->lock DEBUG_CTX2 );
+		thread_desc * thrd = this_thread;
+
+		// LIB_DEBUG_PRINT_SAFE("%p Entering %p (o: %p, r: %i)\n", thrd, this, this->owner, this->recursion);
 
 		if( !this->owner ) {
@@ -62,9 +62,9 @@
 			//Some one else has the monitor, wait in line for it
 			append( &this->entry_queue, thrd );
-			LIB_DEBUG_PRINT_SAFE("%p Blocking on entry\n", thrd);
-			ScheduleInternal( &this->lock );
-
-			//ScheduleInternal will unlock spinlock, no need to unlock ourselves
-			return; 
+			// LIB_DEBUG_PRINT_SAFE("%p Blocking on entry\n", thrd);
+			BlockInternal( &this->lock );
+
+			//BlockInternal will unlock spinlock, no need to unlock ourselves
+			return;
 		}
 
@@ -75,9 +75,9 @@
 	// leave pseudo code :
 	//	TODO
-	void __leave_monitor_desc(monitor_desc * this) {
-		lock( &this->lock );
-
-		LIB_DEBUG_PRINT_SAFE("%p Leaving %p (o: %p, r: %i)\n", thrd, this, this->owner, this->recursion);
-		verifyf( this_thread() == this->owner, "Expected owner to be %p, got %p (r: %i)", this_thread(), this->owner, this->recursion );
+	void __leave_monitor_desc( monitor_desc * this ) {
+		lock_yield( &this->lock DEBUG_CTX2 );
+
+		// LIB_DEBUG_PRINT_SAFE("%p Leaving %p (o: %p, r: %i). ", this_thread, this, this->owner, this->recursion);
+		verifyf( this_thread == this->owner, "Expected owner to be %p, got %p (r: %i)", this_thread, this->owner, this->recursion );
 
 		//Leaving a recursion level, decrement the counter
@@ -96,8 +96,37 @@
 		unlock( &this->lock );
 
-		LIB_DEBUG_PRINT_SAFE("Next owner is %p\n", new_owner);
+		// LIB_DEBUG_PRINT_SAFE("Next owner is %p\n", new_owner);
 
 		//We need to wake-up the thread
-		ScheduleThread( new_owner );
+		WakeThread( new_owner );
+	}
+
+	void __leave_thread_monitor( thread_desc * thrd ) {
+		monitor_desc * this = &thrd->mon;
+		lock_yield( &this->lock DEBUG_CTX2 );
+
+		disable_interrupts();
+
+		thrd->cor.state = Halted;
+
+		verifyf( thrd == this->owner, "Expected owner to be %p, got %p (r: %i)", thrd, this->owner, this->recursion );
+
+		//Leaving a recursion level, decrement the counter
+		this->recursion -= 1;
+
+		//If we haven't left the last level of recursion
+		//it means we don't need to do anything
+		if( this->recursion != 0) {
+			unlock( &this->lock );
+			return;
+		}
+
+		thread_desc * new_owner = next_thread( this );
+
+		//We can now let other threads in safely
+		unlock( &this->lock );
+
+		//We need to wake-up the thread
+		if( new_owner) ScheduleThread( new_owner );
 	}
 }
@@ -121,9 +150,9 @@
 	enter( this->m, this->count );
 
-	this->prev_mntrs = this_thread()->current_monitors;
-	this->prev_count = this_thread()->current_monitor_count;
-
-	this_thread()->current_monitors      = m;
-	this_thread()->current_monitor_count = count;
+	this->prev_mntrs = this_thread->current_monitors;
+	this->prev_count = this_thread->current_monitor_count;
+
+	this_thread->current_monitors      = m;
+	this_thread->current_monitor_count = count;
 }
 
@@ -131,6 +160,6 @@
 	leave( this->m, this->count );
 
-	this_thread()->current_monitors      = this->prev_mntrs;
-	this_thread()->current_monitor_count = this->prev_count;
+	this_thread->current_monitors      = this->prev_mntrs;
+	this_thread->current_monitor_count = this->prev_count;
 }
 
@@ -159,5 +188,5 @@
 // Internal scheduling
 void wait( condition * this, uintptr_t user_info = 0 ) {
-	LIB_DEBUG_PRINT_SAFE("Waiting\n");
+	// LIB_DEBUG_PRINT_SAFE("Waiting\n");
 
 	brand_condition( this );
@@ -170,14 +199,14 @@
 	unsigned short count = this->monitor_count;
 	unsigned int recursions[ count ];		//Save the current recursion levels to restore them later
-	spinlock *   locks     [ count ];		//We need to pass-in an array of locks to ScheduleInternal
-
-	LIB_DEBUG_PRINT_SAFE("count %i\n", count);
-
-	__condition_node_t waiter = { this_thread(), count, user_info };
+	spinlock *   locks     [ count ];		//We need to pass-in an array of locks to BlockInternal
+
+	// LIB_DEBUG_PRINT_SAFE("count %i\n", count);
+
+	__condition_node_t waiter = { (thread_desc*)this_thread, count, user_info };
 
 	__condition_criterion_t criteria[count];
 	for(int i = 0; i < count; i++) {
 		(&criteria[i]){ this->monitors[i], &waiter };
-		LIB_DEBUG_PRINT_SAFE( "Criterion %p\n", &criteria[i] );
+		// LIB_DEBUG_PRINT_SAFE( "Criterion %p\n", &criteria[i] );
 	}
 
@@ -201,12 +230,12 @@
 	}
 
-	LIB_DEBUG_PRINT_SAFE("Will unblock: ");
+	// LIB_DEBUG_PRINT_SAFE("Will unblock: ");
 	for(int i = 0; i < thread_count; i++) {
-		LIB_DEBUG_PRINT_SAFE("%p ", threads[i]);
-	}
-	LIB_DEBUG_PRINT_SAFE("\n");
+		// LIB_DEBUG_PRINT_SAFE("%p ", threads[i]);
+	}
+	// LIB_DEBUG_PRINT_SAFE("\n");
 
 	// Everything is ready to go to sleep
-	ScheduleInternal( locks, count, threads, thread_count );
+	BlockInternal( locks, count, threads, thread_count );
 
 
@@ -222,5 +251,5 @@
 bool signal( condition * this ) {
 	if( is_empty( this ) ) {
-		LIB_DEBUG_PRINT_SAFE("Nothing to signal\n");
+		// LIB_DEBUG_PRINT_SAFE("Nothing to signal\n");
 		return false;
 	}
@@ -231,8 +260,8 @@
 
 	unsigned short count = this->monitor_count;
-	
+
 	//Some more checking in debug
 	LIB_DEBUG_DO(
-		thread_desc * this_thrd = this_thread();
+		thread_desc * this_thrd = this_thread;
 		if ( this->monitor_count != this_thrd->current_monitor_count ) {
 			abortf( "Signal on condition %p made with different number of monitor(s), expected %i got %i", this, this->monitor_count, this_thrd->current_monitor_count );
@@ -248,5 +277,5 @@
 	//Lock all the monitors
 	lock_all( this->monitors, NULL, count );
-	LIB_DEBUG_PRINT_SAFE("Signalling");
+	// LIB_DEBUG_PRINT_SAFE("Signalling");
 
 	//Pop the head of the waiting queue
@@ -256,10 +285,10 @@
 	for(int i = 0; i < count; i++) {
 		__condition_criterion_t * crit = &node->criteria[i];
-		LIB_DEBUG_PRINT_SAFE(" %p", crit->target);
+		// LIB_DEBUG_PRINT_SAFE(" %p", crit->target);
 		assert( !crit->ready );
 		push( &crit->target->signal_stack, crit );
 	}
 
-	LIB_DEBUG_PRINT_SAFE("\n");
+	// LIB_DEBUG_PRINT_SAFE("\n");
 
 	//Release
@@ -281,15 +310,15 @@
 	unsigned short count = this->monitor_count;
 	unsigned int recursions[ count ];		//Save the current recursion levels to restore them later
-	spinlock *   locks     [ count ];		//We need to pass-in an array of locks to ScheduleInternal
+	spinlock *   locks     [ count ];		//We need to pass-in an array of locks to BlockInternal
 
 	lock_all( this->monitors, locks, count );
 
 	//create creteria
-	__condition_node_t waiter = { this_thread(), count, 0 };
+	__condition_node_t waiter = { (thread_desc*)this_thread, count, 0 };
 
 	__condition_criterion_t criteria[count];
 	for(int i = 0; i < count; i++) {
 		(&criteria[i]){ this->monitors[i], &waiter };
-		LIB_DEBUG_PRINT_SAFE( "Criterion %p\n", &criteria[i] );
+		// LIB_DEBUG_PRINT_SAFE( "Criterion %p\n", &criteria[i] );
 		push( &criteria[i].target->signal_stack, &criteria[i] );
 	}
@@ -309,5 +338,5 @@
 
 	//Everything is ready to go to sleep
-	ScheduleInternal( locks, count, &signallee, 1 );
+	BlockInternal( locks, count, &signallee, 1 );
 
 
@@ -325,5 +354,5 @@
 
 uintptr_t front( condition * this ) {
-	verifyf( !is_empty(this), 
+	verifyf( !is_empty(this),
 		"Attempt to access user data on an empty condition.\n"
 		"Possible cause is not checking if the condition is empty before reading stored data."
@@ -335,9 +364,9 @@
 // Internal scheduling
 void __accept_internal( unsigned short count, __acceptable_t * acceptables, void (*func)(void) ) {
-	// thread_desc * this = this_thread();
+	// thread_desc * this = this_thread;
 
 	// unsigned short count = this->current_monitor_count;
 	// unsigned int recursions[ count ];		//Save the current recursion levels to restore them later
-	// spinlock *   locks     [ count ];		//We need to pass-in an array of locks to ScheduleInternal
+	// spinlock *   locks     [ count ];		//We need to pass-in an array of locks to BlockInternal
 
 	// lock_all( this->current_monitors, locks, count );
@@ -348,5 +377,5 @@
 
 	// // // Everything is ready to go to sleep
-	// // ScheduleInternal( locks, count, threads, thread_count );
+	// // BlockInternal( locks, count, threads, thread_count );
 
 
@@ -393,5 +422,5 @@
 static inline void lock_all( spinlock ** locks, unsigned short count ) {
 	for( int i = 0; i < count; i++ ) {
-		lock( locks[i] );
+		lock_yield( locks[i] DEBUG_CTX2 );
 	}
 }
@@ -400,5 +429,5 @@
 	for( int i = 0; i < count; i++ ) {
 		spinlock * l = &source[i]->lock;
-		lock( l );
+		lock_yield( l DEBUG_CTX2 );
 		if(locks) locks[i] = l;
 	}
@@ -443,8 +472,8 @@
 	for(	int i = 0; i < count; i++ ) {
 
-		LIB_DEBUG_PRINT_SAFE( "Checking %p for %p\n", &criteria[i], target );
+		// LIB_DEBUG_PRINT_SAFE( "Checking %p for %p\n", &criteria[i], target );
 		if( &criteria[i] == target ) {
 			criteria[i].ready = true;
-			LIB_DEBUG_PRINT_SAFE( "True\n" );
+			// LIB_DEBUG_PRINT_SAFE( "True\n" );
 		}
 
@@ -452,12 +481,12 @@
 	}
 
-	LIB_DEBUG_PRINT_SAFE( "Runing %i\n", ready2run );
+	// LIB_DEBUG_PRINT_SAFE( "Runing %i\n", ready2run );
 	return ready2run ? node->waiting_thread : NULL;
 }
 
 static inline void brand_condition( condition * this ) {
-	thread_desc * thrd = this_thread();
+	thread_desc * thrd = this_thread;
 	if( !this->monitors ) {
-		LIB_DEBUG_PRINT_SAFE("Branding\n");
+		// LIB_DEBUG_PRINT_SAFE("Branding\n");
 		assertf( thrd->current_monitors != NULL, "No current monitor to brand condition", thrd->current_monitors );
 		this->monitor_count = thrd->current_monitor_count;
Index: src/libcfa/concurrency/preemption.c
===================================================================
--- src/libcfa/concurrency/preemption.c	(revision b72d4ed3c032c44b2e77aa0b0822f63ebaf1bea0)
+++ src/libcfa/concurrency/preemption.c	(revision 0614d14b45bad296918b3f83239838125bed84db)
@@ -15,11 +15,24 @@
 //
 
+#include "libhdr.h"
 #include "preemption.h"
 
 extern "C" {
+#include <errno.h>
+#include <execinfo.h>
+#define __USE_GNU
 #include <signal.h>
-}
-
-#define __CFA_DEFAULT_PREEMPTION__ 10
+#undef __USE_GNU
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+}
+
+
+#ifdef __USE_STREAM__
+#include "fstream"
+#endif
+
+#define __CFA_DEFAULT_PREEMPTION__ 10000
 
 __attribute__((weak)) unsigned int default_preemption() {
@@ -27,20 +40,38 @@
 }
 
+#define __CFA_SIGCXT__ ucontext_t *
+#define __CFA_SIGPARMS__ __attribute__((unused)) int sig, __attribute__((unused)) siginfo_t *sfp, __attribute__((unused)) __CFA_SIGCXT__ cxt
+
 static void preempt( processor   * this );
 static void timeout( thread_desc * this );
 
+void sigHandler_ctxSwitch( __CFA_SIGPARMS__ );
+void sigHandler_alarm    ( __CFA_SIGPARMS__ );
+void sigHandler_segv     ( __CFA_SIGPARMS__ );
+void sigHandler_abort    ( __CFA_SIGPARMS__ );
+
+static void __kernel_sigaction( int sig, void (*handler)(__CFA_SIGPARMS__), int flags );
+LIB_DEBUG_DO( bool validate( alarm_list_t * this ); )
+
+#ifdef __x86_64__
+#define CFA_REG_IP REG_RIP
+#else
+#define CFA_REG_IP REG_EIP
+#endif
+
+
 //=============================================================================================
 // Kernel Preemption logic
 //=============================================================================================
 
-void kernel_start_preemption() {
-
-}
-
 void tick_preemption() {
+	// LIB_DEBUG_PRINT_BUFFER_DECL( STDERR_FILENO, "Ticking preemption\n" );
+
 	alarm_list_t * alarms = &systemProcessor->alarms;
 	__cfa_time_t currtime = __kernel_get_time();
 	while( alarms->head && alarms->head->alarm < currtime ) {
 		alarm_node_t * node = pop(alarms);
+		// LIB_DEBUG_PRINT_BUFFER_LOCAL( STDERR_FILENO, "Ticking %p\n", node );
+
 		if( node->kernel_alarm ) {
 			preempt( node->proc );
@@ -50,6 +81,8 @@
 		}
 
+		verify( validate( alarms ) );
+
 		if( node->period > 0 ) {
-			node->alarm += node->period;
+			node->alarm = currtime + node->period;
 			insert( alarms, node );
 		}
@@ -62,9 +95,14 @@
 		__kernel_set_timer( alarms->head->alarm - currtime );
 	}
+
+	verify( validate( alarms ) );
+	// LIB_DEBUG_PRINT_BUFFER_LOCAL( STDERR_FILENO, "Ticking preemption done\n" );
 }
 
 void update_preemption( processor * this, __cfa_time_t duration ) {
-	//     assert( THREAD_GETMEM( disableInt ) && THREAD_GETMEM( disableIntCnt ) == 1 );
+	LIB_DEBUG_PRINT_BUFFER_DECL( STDERR_FILENO, "Processor : %p updating preemption to %lu\n", this, duration );
+
 	alarm_node_t * alarm = this->preemption_alarm;
+	duration *= 1000;
 
 	// Alarms need to be enabled
@@ -89,4 +127,104 @@
 }
 
+//=============================================================================================
+// Kernel Signal Tools
+//=============================================================================================
+
+LIB_DEBUG_DO( static thread_local void * last_interrupt = 0; )
+
+extern "C" {
+	void disable_interrupts() {
+		__attribute__((unused)) unsigned short new_val = __atomic_add_fetch_2( &disable_preempt_count, 1, __ATOMIC_SEQ_CST );
+		verify( new_val < (unsigned short)65_000 );
+		verify( new_val != (unsigned short) 0 );
+	}
+
+	void enable_interrupts_noRF() {
+		__attribute__((unused)) unsigned short prev = __atomic_fetch_add_2( &disable_preempt_count, -1, __ATOMIC_SEQ_CST );
+		verify( prev != (unsigned short) 0 );
+	}
+
+	void enable_interrupts( DEBUG_CTX_PARAM ) {
+		processor * proc   = this_processor;
+		thread_desc * thrd = this_thread;
+		unsigned short prev = __atomic_fetch_add_2( &disable_preempt_count, -1, __ATOMIC_SEQ_CST );
+		verify( prev != (unsigned short) 0 );
+		if( prev == 1 && proc->pending_preemption ) {
+			proc->pending_preemption = false;
+			BlockInternal( thrd );
+		}
+
+		LIB_DEBUG_DO( proc->last_enable = caller; )
+	}
+}
+
+static inline void signal_unblock( int sig ) {
+	sigset_t mask;
+	sigemptyset( &mask );
+	sigaddset( &mask, sig );
+
+	if ( pthread_sigmask( SIG_UNBLOCK, &mask, NULL ) == -1 ) {
+	    abortf( "internal error, pthread_sigmask" );
+	}
+}
+
+static inline void signal_block( int sig ) {
+	sigset_t mask;
+	sigemptyset( &mask );
+	sigaddset( &mask, sig );
+
+	if ( pthread_sigmask( SIG_BLOCK, &mask, NULL ) == -1 ) {
+	    abortf( "internal error, pthread_sigmask" );
+	}
+}
+
+static inline bool preemption_ready() {
+	return disable_preempt_count == 0;
+}
+
+static inline void defer_ctxSwitch() {
+	this_processor->pending_preemption = true;
+}
+
+static inline void defer_alarm() {
+	systemProcessor->pending_alarm = true;
+}
+
+static void preempt( processor * this ) {
+	pthread_kill( this->kernel_thread, SIGUSR1 );
+}
+
+static void timeout( thread_desc * this ) {
+	//TODO : implement waking threads
+}
+
+//=============================================================================================
+// Kernel Signal Startup/Shutdown logic
+//=============================================================================================
+
+static pthread_t alarm_thread;
+void * alarm_loop( __attribute__((unused)) void * args );
+
+void kernel_start_preemption() {
+	LIB_DEBUG_PRINT_SAFE("Kernel : Starting preemption\n");
+	__kernel_sigaction( SIGUSR1, sigHandler_ctxSwitch, SA_SIGINFO );
+	__kernel_sigaction( SIGSEGV, sigHandler_segv     , SA_SIGINFO );
+	__kernel_sigaction( SIGBUS , sigHandler_segv     , SA_SIGINFO );
+
+	signal_block( SIGALRM );
+
+	pthread_create( &alarm_thread, NULL, alarm_loop, NULL );
+}
+
+void kernel_stop_preemption() {
+	sigset_t mask;
+	sigfillset( &mask );
+	sigprocmask( SIG_BLOCK, &mask, NULL );
+
+	pthread_kill( alarm_thread, SIGINT );
+	pthread_join( alarm_thread, NULL );
+	LIB_DEBUG_PRINT_SAFE("Kernel : Preemption stopped\n");
+}
+
 void ?{}( preemption_scope * this, processor * proc ) {
 	(&this->alarm){ proc };
@@ -97,26 +235,18 @@
 
 void ^?{}( preemption_scope * this ) {
+	disable_interrupts();
+
 	update_preemption( this->proc, 0 );
 }
 
 //=============================================================================================
-// Kernel Signal logic
-//=============================================================================================
-
-static inline bool preemption_ready() {
-	return this_processor->disable_preempt_count == 0;
-}
-
-static inline void defer_ctxSwitch() {
-	this_processor->pending_preemption = true;
-}
-
-static inline void defer_alarm() {
-	systemProcessor->pending_alarm = true;
-}
-
-void sigHandler_ctxSwitch( __attribute__((unused)) int sig ) {
+// Kernel Signal Handlers
+//=============================================================================================
+
+void sigHandler_ctxSwitch( __CFA_SIGPARMS__ ) {
+	LIB_DEBUG_DO( last_interrupt = (void *)(cxt->uc_mcontext.gregs[CFA_REG_IP]); )
 	if( preemption_ready() ) {
-		ScheduleInternal( this_processor->current_thread );
+		signal_unblock( SIGUSR1 );
+		BlockInternal( (thread_desc*)this_thread );
 	}
 	else {
@@ -125,19 +255,189 @@
 }
 
-void sigHandler_alarm( __attribute__((unused)) int sig ) {
-	if( try_lock( &systemProcessor->alarm_lock ) ) {
-		tick_preemption();
-		unlock( &systemProcessor->alarm_lock );
-	}
-	else {
-		defer_alarm();
-	}
-}
-
-static void preempt( processor * this ) {
-	pthread_kill( this->kernel_thread, SIGUSR1 );
-}
-
-static void timeout( thread_desc * this ) {
-	//TODO : implement waking threads
-}
+// void sigHandler_alarm( __CFA_SIGPARMS__ ) {
+// 	LIB_DEBUG_DO( last_interrupt = (void *)(cxt->uc_mcontext.gregs[CFA_REG_IP]); )
+// 	verify( this_processor == systemProcessor );
+
+// 	if( try_lock( &systemProcessor->alarm_lock DEBUG_CTX2 ) ) {
+// 		tick_preemption();
+// 		systemProcessor->pending_alarm = false;
+// 		unlock( &systemProcessor->alarm_lock );
+// 	}
+// 	else {
+// 		defer_alarm();
+// 	}
+
+// 	signal_unblock( SIGALRM );
+
+// 	if( preemption_ready() && this_processor->pending_preemption ) {
+
+// 		this_processor->pending_preemption = false;
+// 		BlockInternal( (thread_desc*)this_thread );
+// 	}
+// }
+
+void * alarm_loop( __attribute__((unused)) void * args ) {
+	sigset_t mask;
+	sigemptyset( &mask );
+	sigaddset( &mask, SIGALRM );
+	sigaddset( &mask, SIGUSR2 );
+	sigaddset( &mask, SIGINT  );
+
+	if ( pthread_sigmask( SIG_BLOCK, &mask, NULL ) == -1 ) {
+	    abortf( "internal error, pthread_sigmask" );
+	}
+
+	while( true ) {
+		int sig;
+		if( sigwait( &mask, &sig ) != 0  ) {
+			abortf( "internal error, sigwait" );
+		}
+
+		switch( sig) {
+			case SIGALRM:
+				LIB_DEBUG_PRINT_SAFE("Kernel : Preemption thread tick\n");
+				lock( &systemProcessor->alarm_lock DEBUG_CTX2 );
+				tick_preemption();
+				unlock( &systemProcessor->alarm_lock );
+				break;
+			case SIGUSR2:
+				//TODO other actions
+				break;
+			case SIGINT:
+				LIB_DEBUG_PRINT_SAFE("Kernel : Preemption thread stopping\n");
+				return NULL;
+			default:
+				abortf( "internal error, sigwait returned sig %d", sig );
+				break;
+		}
+	}
+}
+
+static void __kernel_sigaction( int sig, void (*handler)(__CFA_SIGPARMS__), int flags ) {
+	struct sigaction act;
+
+	act.sa_sigaction = (void (*)(int, siginfo_t *, void *))handler;
+	act.sa_flags = flags;
+
+	if ( sigaction( sig, &act, NULL ) == -1 ) {
+		LIB_DEBUG_PRINT_BUFFER_DECL( STDERR_FILENO,
+			" __kernel_sigaction( sig:%d, handler:%p, flags:%d ), problem installing signal handler, error(%d) %s.\n",
+			sig, handler, flags, errno, strerror( errno )
+		);
+		_exit( EXIT_FAILURE );
+	}
+}
+
+typedef void (*sa_handler_t)(int);
+
+static void __kernel_sigdefault( int sig ) {
+	struct sigaction act;
+
+	// act.sa_handler = SIG_DFL;
+	act.sa_flags = 0;
+	sigemptyset( &act.sa_mask );
+
+	if ( sigaction( sig, &act, NULL ) == -1 ) {
+		LIB_DEBUG_PRINT_BUFFER_DECL( STDERR_FILENO,
+			" __kernel_sigdefault( sig:%d ), problem reseting signal handler, error(%d) %s.\n",
+			sig, errno, strerror( errno )
+		);
+		_exit( EXIT_FAILURE );
+	}
+}
+
+//=============================================================================================
+// Terminating Signals logic
+//=============================================================================================
+
+LIB_DEBUG_DO(
+	static void __kernel_backtrace( int start ) {
+		// skip first N stack frames
+
+		enum { Frames = 50 };
+		void * array[Frames];
+		int size = backtrace( array, Frames );
+		char ** messages = backtrace_symbols( array, size );
+
+		// find executable name
+		*index( messages[0], '(' ) = '\0';
+		#ifdef __USE_STREAM__
+		serr | "Stack back trace for:" | messages[0] | endl;
+		#else
+		fprintf( stderr, "Stack back trace for: %s\n", messages[0]);
+		#endif
+
+		// skip last 2 stack frames after main
+		for ( int i = start; i < size && messages != NULL; i += 1 ) {
+			char * name = NULL;
+			char * offset_begin = NULL;
+			char * offset_end = NULL;
+
+			for ( char *p = messages[i]; *p; ++p ) {
+				// find parantheses and +offset
+				if ( *p == '(' ) {
+					name = p;
+				}
+				else if ( *p == '+' ) {
+					offset_begin = p;
+				}
+				else if ( *p == ')' ) {
+					offset_end = p;
+					break;
+				}
+			}
+
+			// if line contains symbol print it
+			int frameNo = i - start;
+			if ( name && offset_begin && offset_end && name < offset_begin ) {
+				// delimit strings
+				*name++ = '\0';
+				*offset_begin++ = '\0';
+				*offset_end++ = '\0';
+
+				#ifdef __USE_STREAM__
+				serr 	| "("  | frameNo | ")" | messages[i] | ":"
+					| name | "+" | offset_begin | offset_end | endl;
+				#else
+				fprintf( stderr, "(%i) %s : %s + %s %s\n", frameNo, messages[i], name, offset_begin, offset_end);
+				#endif
+			}
+			// otherwise, print the whole line
+			else {
+				#ifdef __USE_STREAM__
+				serr | "(" | frameNo | ")" | messages[i] | endl;
+				#else
+				fprintf( stderr, "(%i) %s\n", frameNo, messages[i] );
+				#endif
+			}
+		}
+
+		free( messages );
+	}
+)
+
+void sigHandler_segv( __CFA_SIGPARMS__ ) {
+	LIB_DEBUG_DO(
+		#ifdef __USE_STREAM__
+		serr 	| "*CFA runtime error* program cfa-cpp terminated with"
+			| (sig == SIGSEGV ? "segment fault." : "bus error.")
+			| endl;
+		#else
+		fprintf( stderr, "*CFA runtime error* program cfa-cpp terminated with %s\n", sig == SIGSEGV ? "segment fault." : "bus error." );
+		#endif
+
+		// skip first 2 stack frames
+		__kernel_backtrace( 1 );
+	)
+	exit( EXIT_FAILURE );
+}
+
+// void sigHandler_abort( __CFA_SIGPARMS__ ) {
+// 	// skip first 6 stack frames
+// 	LIB_DEBUG_DO( __kernel_backtrace( 6 ); )
+
+// 	// reset default signal handler
+// 	__kernel_sigdefault( SIGABRT );
+
+// 	raise( SIGABRT );
+// }
Index: src/libcfa/concurrency/thread
===================================================================
--- src/libcfa/concurrency/thread	(revision b72d4ed3c032c44b2e77aa0b0822f63ebaf1bea0)
+++ src/libcfa/concurrency/thread	(revision 0614d14b45bad296918b3f83239838125bed84db)
@@ -54,5 +54,5 @@
 }
 
-thread_desc * this_thread(void);
+extern volatile thread_local thread_desc * this_thread;
 
 forall( dtype T | is_thread(T) )
Index: src/libcfa/concurrency/thread.c
===================================================================
--- src/libcfa/concurrency/thread.c	(revision b72d4ed3c032c44b2e77aa0b0822f63ebaf1bea0)
+++ src/libcfa/concurrency/thread.c	(revision 0614d14b45bad296918b3f83239838125bed84db)
@@ -28,5 +28,5 @@
 }
 
-extern thread_local processor * this_processor;
+extern volatile thread_local processor * this_processor;
 
 //-----------------------------------------------------------------------------
@@ -71,18 +71,21 @@
 	coroutine_desc* thrd_c = get_coroutine(this);
 	thread_desc*  thrd_h = get_thread   (this);
-	thrd_c->last = this_coroutine();
-	this_processor->current_coroutine = thrd_c;
+	thrd_c->last = this_coroutine;
 
-	LIB_DEBUG_PRINT_SAFE("Thread start : %p (t %p, c %p)\n", this, thrd_c, thrd_h);
+	// LIB_DEBUG_PRINT_SAFE("Thread start : %p (t %p, c %p)\n", this, thrd_c, thrd_h);
 
+	disable_interrupts();
 	create_stack(&thrd_c->stack, thrd_c->stack.size);
+	this_coroutine = thrd_c;
 	CtxStart(this, CtxInvokeThread);
+	assert( thrd_c->last->stack.context );
 	CtxSwitch( thrd_c->last->stack.context, thrd_c->stack.context );
 
 	ScheduleThread(thrd_h);
+	enable_interrupts( DEBUG_CTX );
 }
 
 void yield( void ) {
-	ScheduleInternal( this_processor->current_thread );
+	BlockInternal( (thread_desc *)this_thread );
 }
 
@@ -95,5 +98,5 @@
 void ThreadCtxSwitch(coroutine_desc* src, coroutine_desc* dst) {
 	// set state of current coroutine to inactive
-	src->state = Inactive;
+	src->state = src->state == Halted ? Halted : Inactive;
 	dst->state = Active;
 
@@ -103,10 +106,11 @@
 	// set new coroutine that the processor is executing
 	// and context switch to it
-	this_processor->current_coroutine = dst;
+	this_coroutine = dst;
+	assert( src->stack.context );
 	CtxSwitch( src->stack.context, dst->stack.context );
-	this_processor->current_coroutine = src;
+	this_coroutine = src;
 
 	// set state of new coroutine to active
-	dst->state = Inactive;
+	dst->state = dst->state == Halted ? Halted : Inactive;
 	src->state = Active;
 }
Index: src/libcfa/libhdr/libalign.h
===================================================================
--- src/libcfa/libhdr/libalign.h	(revision b72d4ed3c032c44b2e77aa0b0822f63ebaf1bea0)
+++ src/libcfa/libhdr/libalign.h	(revision 0614d14b45bad296918b3f83239838125bed84db)
@@ -1,3 +1,3 @@
-//                              -*- Mode: C++ -*- 
+//                              -*- Mode: C++ -*-
 //
 // Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
@@ -18,13 +18,13 @@
 // Free Software  Foundation; either  version 2.1 of  the License, or  (at your
 // option) any later version.
-// 
+//
 // This library is distributed in the  hope that it will be useful, but WITHOUT
 // ANY  WARRANTY;  without even  the  implied  warranty  of MERCHANTABILITY  or
 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 // for more details.
-// 
+//
 // You should  have received a  copy of the  GNU Lesser General  Public License
 // along  with this library.
-// 
+//
 
 
@@ -33,6 +33,7 @@
 
 #include "assert"
+#include <stdbool.h>
 
-// Minimum size used to align memory boundaries for memory allocations. 
+// Minimum size used to align memory boundaries for memory allocations.
 #define libAlign() (sizeof(double))
 
Index: src/libcfa/libhdr/libdebug.h
===================================================================
--- src/libcfa/libhdr/libdebug.h	(revision b72d4ed3c032c44b2e77aa0b0822f63ebaf1bea0)
+++ src/libcfa/libhdr/libdebug.h	(revision 0614d14b45bad296918b3f83239838125bed84db)
@@ -18,9 +18,17 @@
 
 #ifdef __CFA_DEBUG__
-	#define LIB_DEBUG_DO(x) x
-	#define LIB_NO_DEBUG_DO(x) ((void)0)
+	#define LIB_DEBUG_DO(...) __VA_ARGS__
+	#define LIB_NO_DEBUG_DO(...)
+	#define DEBUG_CTX __PRETTY_FUNCTION__
+	#define DEBUG_CTX2 , __PRETTY_FUNCTION__
+	#define DEBUG_CTX_PARAM const char * caller
+	#define DEBUG_CTX_PARAM2 , const char * caller
 #else
-	#define LIB_DEBUG_DO(x) ((void)0)
-	#define LIB_NO_DEBUG_DO(x) x      
+	#define LIB_DEBUG_DO(...)
+	#define LIB_NO_DEBUG_DO(...) __VA_ARGS__
+	#define DEBUG_CTX
+	#define DEBUG_CTX2
+	#define DEBUG_CTX_PARAM
+	#define DEBUG_CTX_PARAM2
 #endif
 
@@ -51,17 +59,21 @@
 
 #ifdef __CFA_DEBUG_PRINT__
-      #define LIB_DEBUG_WRITE( fd, buffer, len )  __lib_debug_write( fd, buffer, len )
-      #define LIB_DEBUG_ACQUIRE()                 __lib_debug_acquire()
-      #define LIB_DEBUG_RELEASE()                 __lib_debug_release()
-      #define LIB_DEBUG_PRINT_SAFE(...)           __lib_debug_print_safe   (__VA_ARGS__)
-      #define LIB_DEBUG_PRINT_NOLOCK(...)         __lib_debug_print_nolock (__VA_ARGS__)
-      #define LIB_DEBUG_PRINT_BUFFER(...)         __lib_debug_print_buffer (__VA_ARGS__)
+	#define LIB_DEBUG_WRITE( fd, buffer, len )     __lib_debug_write( fd, buffer, len )
+	#define LIB_DEBUG_ACQUIRE()                    __lib_debug_acquire()
+	#define LIB_DEBUG_RELEASE()                    __lib_debug_release()
+	#define LIB_DEBUG_PRINT_SAFE(...)              __lib_debug_print_safe   (__VA_ARGS__)
+	#define LIB_DEBUG_PRINT_NOLOCK(...)            __lib_debug_print_nolock (__VA_ARGS__)
+	#define LIB_DEBUG_PRINT_BUFFER(...)            __lib_debug_print_buffer (__VA_ARGS__)
+	#define LIB_DEBUG_PRINT_BUFFER_DECL(fd, ...)   char text[256]; int len = snprintf( text, 256, __VA_ARGS__ ); __lib_debug_write( fd, text, len );
+	#define LIB_DEBUG_PRINT_BUFFER_LOCAL(fd, ...)  len = snprintf( text, 256, __VA_ARGS__ ); __lib_debug_write( fd, text, len );
 #else
-      #define LIB_DEBUG_WRITE(...)          ((void)0)
-      #define LIB_DEBUG_ACQUIRE()           ((void)0)
-      #define LIB_DEBUG_RELEASE()           ((void)0)
-      #define LIB_DEBUG_PRINT_SAFE(...)     ((void)0)
-      #define LIB_DEBUG_PRINT_NOLOCK(...)   ((void)0)
-      #define LIB_DEBUG_PRINT_BUFFER(...)   ((void)0)
+	#define LIB_DEBUG_WRITE(...)               ((void)0)
+	#define LIB_DEBUG_ACQUIRE()                ((void)0)
+	#define LIB_DEBUG_RELEASE()                ((void)0)
+	#define LIB_DEBUG_PRINT_SAFE(...)          ((void)0)
+	#define LIB_DEBUG_PRINT_NOLOCK(...)        ((void)0)
+	#define LIB_DEBUG_PRINT_BUFFER(...)        ((void)0)
+	#define LIB_DEBUG_PRINT_BUFFER_DECL(...)   ((void)0)
+	#define LIB_DEBUG_PRINT_BUFFER_LOCAL(...)  ((void)0)
 #endif
 
