Index: src/libcfa/bits/locks.h
===================================================================
--- src/libcfa/bits/locks.h	(revision fae90d5f4e6f14db5c582aeb2a6ec9c0dbc21e73)
+++ src/libcfa/bits/locks.h	(revision b10affdcb702e2f50d20fa65715ce6473d2bebb4)
@@ -10,6 +10,6 @@
 // Created On       : Tue Oct 31 15:14:38 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Fri Dec  8 16:02:22 2017
-// Update Count     : 1
+// Last Modified On : Fri Mar 30 18:18:13 2018
+// Update Count     : 9
 //
 
@@ -64,6 +64,4 @@
 
 	extern void yield( unsigned int );
-	extern thread_local struct thread_desc *    volatile this_thread;
-	extern thread_local struct processor *      volatile this_processor;
 
 	static inline void ?{}( __spinlock_t & this ) {
@@ -76,8 +74,8 @@
 		if( result ) {
 			disable_interrupts();
-			__cfaabi_dbg_debug_do(
-				this.prev_name = caller;
-				this.prev_thrd = this_thread;
-			)
+			// __cfaabi_dbg_debug_do(
+			// 	this.prev_name = caller;
+			// 	this.prev_thrd = TL_GET( this_thread );
+			// )
 		}
 		return result;
@@ -107,8 +105,8 @@
 		}
 		disable_interrupts();
-		__cfaabi_dbg_debug_do(
-			this.prev_name = caller;
-			this.prev_thrd = this_thread;
-		)
+		// __cfaabi_dbg_debug_do(
+		// 	this.prev_name = caller;
+		// 	this.prev_thrd = TL_GET( this_thread );
+		// )
 	}
 
Index: src/libcfa/concurrency/coroutine
===================================================================
--- src/libcfa/concurrency/coroutine	(revision fae90d5f4e6f14db5c582aeb2a6ec9c0dbc21e73)
+++ src/libcfa/concurrency/coroutine	(revision b10affdcb702e2f50d20fa65715ce6473d2bebb4)
@@ -10,6 +10,6 @@
 // Created On       : Mon Nov 28 12:27:26 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Wed Aug 30 07:58:29 2017
-// Update Count     : 3
+// Last Modified On : Fri Mar 30 18:23:45 2018
+// Update Count     : 8
 //
 
@@ -60,7 +60,4 @@
 }
 
-// Get current coroutine
-extern thread_local coroutine_desc * volatile this_coroutine;
-
 // Private wrappers for context switch and stack creation
 extern void CoroutineCtxSwitch(coroutine_desc * src, coroutine_desc * dst);
@@ -69,5 +66,5 @@
 // Suspend implementation inlined for performance
 static inline void suspend() {
-	coroutine_desc * src = this_coroutine;		// optimization
+	coroutine_desc * src = TL_GET( this_coroutine );			// optimization
 
 	assertf( src->last != 0,
@@ -86,5 +83,5 @@
 forall(dtype T | is_coroutine(T))
 static inline void resume(T & cor) {
-	coroutine_desc * src = this_coroutine;		// optimization
+	coroutine_desc * src = TL_GET( this_coroutine );			// optimization
 	coroutine_desc * dst = get_coroutine(cor);
 
@@ -111,5 +108,5 @@
 
 static inline void resume(coroutine_desc * dst) {
-	coroutine_desc * src = this_coroutine;		// optimization
+	coroutine_desc * src = TL_GET( this_coroutine );			// optimization
 
 	// not resuming self ?
Index: src/libcfa/concurrency/coroutine.c
===================================================================
--- src/libcfa/concurrency/coroutine.c	(revision fae90d5f4e6f14db5c582aeb2a6ec9c0dbc21e73)
+++ src/libcfa/concurrency/coroutine.c	(revision b10affdcb702e2f50d20fa65715ce6473d2bebb4)
@@ -10,6 +10,6 @@
 // Created On       : Mon Nov 28 12:27:26 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Thu Feb  8 16:10:31 2018
-// Update Count     : 4
+// Last Modified On : Fri Mar 30 17:20:57 2018
+// Update Count     : 9
 //
 
@@ -99,5 +99,5 @@
 // Wrapper for co
 void CoroutineCtxSwitch(coroutine_desc* src, coroutine_desc* dst) {
-	verify( preemption_state.enabled || this_processor->do_terminate );
+	verify( TL_GET( preemption_state ).enabled || TL_GET( this_processor )->do_terminate );
 	disable_interrupts();
 
@@ -106,5 +106,5 @@
 
 	// set new coroutine that task is executing
-	this_coroutine = dst;
+	TL_SET( this_coroutine, dst );
 
 	// context switch to specified coroutine
@@ -117,5 +117,5 @@
 
 	enable_interrupts( __cfaabi_dbg_ctx );
-	verify( preemption_state.enabled || this_processor->do_terminate );
+	verify( TL_GET( preemption_state ).enabled || TL_GET( this_processor )->do_terminate );
 } //ctxSwitchDirect
 
@@ -172,5 +172,5 @@
 
 	void __leave_coroutine(void) {
-		coroutine_desc * src = this_coroutine;		// optimization
+		coroutine_desc * src = TL_GET( this_coroutine ); // optimization
 
 		assertf( src->starter != 0,
Index: src/libcfa/concurrency/invoke.h
===================================================================
--- src/libcfa/concurrency/invoke.h	(revision fae90d5f4e6f14db5c582aeb2a6ec9c0dbc21e73)
+++ src/libcfa/concurrency/invoke.h	(revision b10affdcb702e2f50d20fa65715ce6473d2bebb4)
@@ -10,6 +10,6 @@
 // Created On       : Tue Jan 17 12:27:26 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Fri Feb  9 14:41:55 2018
-// Update Count     : 6
+// Last Modified On : Fri Mar 30 14:28:31 2018
+// Update Count     : 29
 //
 
@@ -17,4 +17,7 @@
 #include "bits/defs.h"
 #include "bits/locks.h"
+
+#define TL_GET( member ) kernelThreadData.member
+#define TL_SET( member, value ) kernelThreadData.member = value;
 
 #ifdef __cforall
@@ -30,28 +33,31 @@
 		static inline struct thread_desc             * & get_next( struct thread_desc             & this );
 		static inline struct __condition_criterion_t * & get_next( struct __condition_criterion_t & this );
+
+		extern thread_local struct KernelThreadData {
+			struct coroutine_desc * volatile this_coroutine;
+			struct thread_desc    * volatile this_thread;
+			struct processor      * volatile this_processor;
+
+			struct {
+				volatile unsigned short disable_count;
+				volatile bool enabled;
+				volatile bool in_progress;
+			} preemption_state;
+		} kernelThreadData;
 	}
 	#endif
 
+	static inline struct coroutine_desc * volatile active_coroutine() { return TL_GET( this_coroutine ); }
+	static inline struct thread_desc * volatile active_thread() { return TL_GET( this_thread ); }
+	static inline struct processor * volatile active_processor() { return TL_GET( this_processor ); }
+
 	struct coStack_t {
-		// size of stack
-		size_t size;
-
-		// pointer to stack
-		void *storage;
-
-		// stack grows towards stack limit
-		void *limit;
-
-		// base of stack
-		void *base;
-
-		// address of cfa_context_t
-		void *context;
-
-		// address of top of storage
-		void *top;
-
-		// whether or not the user allocated the stack
-		bool userStack;
+		size_t size;									// size of stack
+		void * storage;									// pointer to stack
+		void * limit;									// stack grows towards stack limit
+		void * base;									// base of stack
+		void * context;									// address of cfa_context_t
+		void * top;										// address of top of storage
+		bool userStack;									// whether or not the user allocated the stack
 	};
 
@@ -59,21 +65,10 @@
 
 	struct coroutine_desc {
-		// stack information of the coroutine
-		struct coStack_t stack;
-
-		// textual name for coroutine/task, initialized by uC++ generated code
-		const char *name;
-
-		// copy of global UNIX variable errno
-		int errno_;
-
-		// current execution status for coroutine
-		enum coroutine_state state;
-
-		// first coroutine to resume this one
-		struct coroutine_desc * starter;
-
-		// last coroutine to resume this one
-		struct coroutine_desc * last;
+		struct coStack_t stack;							// stack information of the coroutine
+		const char * name;								// textual name for coroutine/task, initialized by uC++ generated code
+		int errno_;										// copy of global UNIX variable errno
+		enum coroutine_state state;						// current execution status for coroutine
+		struct coroutine_desc * starter;				// first coroutine to resume this one
+		struct coroutine_desc * last;					// last coroutine to resume this one
 	};
 
Index: src/libcfa/concurrency/kernel.c
===================================================================
--- src/libcfa/concurrency/kernel.c	(revision fae90d5f4e6f14db5c582aeb2a6ec9c0dbc21e73)
+++ src/libcfa/concurrency/kernel.c	(revision b10affdcb702e2f50d20fa65715ce6473d2bebb4)
@@ -10,6 +10,6 @@
 // Created On       : Tue Jan 17 12:27:26 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Thu Feb  8 23:52:19 2018
-// Update Count     : 5
+// Last Modified On : Fri Mar 30 18:26:11 2018
+// Update Count     : 23
 //
 
@@ -52,13 +52,14 @@
 // Global state
 
-thread_local coroutine_desc * volatile this_coroutine;
-thread_local thread_desc *    volatile this_thread;
-thread_local processor *      volatile this_processor;
-
 // volatile thread_local bool preemption_in_progress = 0;
 // volatile thread_local bool preemption_enabled = false;
 // volatile thread_local unsigned short disable_preempt_count = 1;
 
-volatile thread_local __cfa_kernel_preemption_state_t preemption_state = { false, false, 1 };
+thread_local struct KernelThreadData kernelThreadData = {
+	NULL,
+	NULL,
+	NULL,
+	{ 1, false, false }
+};
 
 //-----------------------------------------------------------------------------
@@ -172,7 +173,7 @@
 		terminate(&this);
 		verify(this.do_terminate);
-		verify(this_processor != &this);
+		verify(TL_GET( this_processor ) != &this);
 		P( terminated );
-		verify(this_processor != &this);
+		verify(TL_GET( this_processor ) != &this);
 		pthread_join( kernel_thread, NULL );
 	}
@@ -213,9 +214,9 @@
 			if(readyThread)
 			{
-				verify( !preemption_state.enabled );
+				verify( ! TL_GET( preemption_state ).enabled );
 
 				runThread(this, readyThread);
 
-				verify( !preemption_state.enabled );
+				verify( ! TL_GET( preemption_state ).enabled );
 
 				//Some actions need to be taken from the kernel
@@ -249,5 +250,5 @@
 
 	//Update global state
-	this_thread = dst;
+	TL_SET( this_thread, dst );
 
 	// Context Switch to the thread
@@ -257,6 +258,6 @@
 
 void returnToKernel() {
-	coroutine_desc * proc_cor = get_coroutine(this_processor->runner);
-	coroutine_desc * thrd_cor = this_thread->curr_cor = this_coroutine;
+	coroutine_desc * proc_cor = get_coroutine(TL_GET( this_processor )->runner);
+	coroutine_desc * thrd_cor = TL_GET( this_thread )->curr_cor = TL_GET( this_coroutine );
 	ThreadCtxSwitch(thrd_cor, proc_cor);
 }
@@ -266,5 +267,5 @@
 void finishRunning(processor * this) with( this->finish ) {
 	if( action_code == Release ) {
-		verify( !preemption_state.enabled );
+		verify( ! TL_GET( preemption_state ).enabled );
 		unlock( *lock );
 	}
@@ -273,10 +274,10 @@
 	}
 	else if( action_code == Release_Schedule ) {
-		verify( !preemption_state.enabled );
+		verify( ! TL_GET( preemption_state ).enabled );
 		unlock( *lock );
 		ScheduleThread( thrd );
 	}
 	else if( action_code == Release_Multi ) {
-		verify( !preemption_state.enabled );
+		verify( ! TL_GET( preemption_state ).enabled );
 		for(int i = 0; i < lock_count; i++) {
 			unlock( *locks[i] );
@@ -307,9 +308,9 @@
 void * CtxInvokeProcessor(void * arg) {
 	processor * proc = (processor *) arg;
-	this_processor = proc;
-	this_coroutine = NULL;
-	this_thread = NULL;
-	preemption_state.enabled = false;
-	preemption_state.disable_count = 1;
+	TL_SET( this_processor, proc );
+	TL_SET( this_coroutine, NULL );
+	TL_SET( this_thread, NULL );
+	TL_GET( preemption_state ).enabled = false;
+	TL_GET( preemption_state ).disable_count = 1;
 	// SKULLDUGGERY: We want to create a context for the processor coroutine
 	// which is needed for the 2-step context switch. However, there is no reason
@@ -323,6 +324,6 @@
 
 	//Set global state
-	this_coroutine = get_coroutine(proc->runner);
-	this_thread = NULL;
+	TL_SET( this_coroutine, get_coroutine(proc->runner) );
+	TL_SET( this_thread, NULL );
 
 	//We now have a proper context from which to schedule threads
@@ -352,13 +353,13 @@
 
 void kernel_first_resume(processor * this) {
-	coroutine_desc * src = this_coroutine;
+	coroutine_desc * src = TL_GET( this_coroutine );
 	coroutine_desc * dst = get_coroutine(this->runner);
 
-	verify( !preemption_state.enabled );
+	verify( ! TL_GET( preemption_state ).enabled );
 
 	create_stack(&dst->stack, dst->stack.size);
 	CtxStart(&this->runner, CtxInvokeCoroutine);
 
-	verify( !preemption_state.enabled );
+	verify( ! TL_GET( preemption_state ).enabled );
 
 	dst->last = src;
@@ -369,5 +370,5 @@
 
 	// set new coroutine that task is executing
-	this_coroutine = dst;
+	TL_SET( this_coroutine, dst );
 
 	// SKULLDUGGERY normally interrupts are enable before leaving a coroutine ctxswitch.
@@ -386,5 +387,5 @@
 	src->state = Active;
 
-	verify( !preemption_state.enabled );
+	verify( ! TL_GET( preemption_state ).enabled );
 }
 
@@ -392,13 +393,13 @@
 // Scheduler routines
 void ScheduleThread( thread_desc * thrd ) {
-	// if( !thrd ) return;
+	// if( ! thrd ) return;
 	verify( thrd );
 	verify( thrd->self_cor.state != Halted );
 
-	verify( !preemption_state.enabled );
+	verify( ! TL_GET( preemption_state ).enabled );
 
 	verifyf( thrd->next == NULL, "Expected null got %p", thrd->next );
 
-	with( *this_processor->cltr ) {
+	with( *TL_GET( this_processor )->cltr ) {
 		lock  ( ready_queue_lock __cfaabi_dbg_ctx2 );
 		append( ready_queue, thrd );
@@ -406,13 +407,13 @@
 	}
 
-	verify( !preemption_state.enabled );
+	verify( ! TL_GET( preemption_state ).enabled );
 }
 
 thread_desc * nextThread(cluster * this) with( *this ) {
-	verify( !preemption_state.enabled );
+	verify( ! TL_GET( preemption_state ).enabled );
 	lock( ready_queue_lock __cfaabi_dbg_ctx2 );
 	thread_desc * head = pop_head( ready_queue );
 	unlock( ready_queue_lock );
-	verify( !preemption_state.enabled );
+	verify( ! TL_GET( preemption_state ).enabled );
 	return head;
 }
@@ -420,7 +421,7 @@
 void BlockInternal() {
 	disable_interrupts();
-	verify( !preemption_state.enabled );
+	verify( ! TL_GET( preemption_state ).enabled );
 	returnToKernel();
-	verify( !preemption_state.enabled );
+	verify( ! TL_GET( preemption_state ).enabled );
 	enable_interrupts( __cfaabi_dbg_ctx );
 }
@@ -428,10 +429,10 @@
 void BlockInternal( __spinlock_t * lock ) {
 	disable_interrupts();
-	this_processor->finish.action_code = Release;
-	this_processor->finish.lock        = lock;
-
-	verify( !preemption_state.enabled );
+	TL_GET( this_processor )->finish.action_code = Release;
+	TL_GET( this_processor )->finish.lock        = lock;
+
+	verify( ! TL_GET( preemption_state ).enabled );
 	returnToKernel();
-	verify( !preemption_state.enabled );
+	verify( ! TL_GET( preemption_state ).enabled );
 
 	enable_interrupts( __cfaabi_dbg_ctx );
@@ -440,10 +441,10 @@
 void BlockInternal( thread_desc * thrd ) {
 	disable_interrupts();
-	this_processor->finish.action_code = Schedule;
-	this_processor->finish.thrd        = thrd;
-
-	verify( !preemption_state.enabled );
+	TL_GET( this_processor )->finish.action_code = Schedule;
+	TL_GET( this_processor )->finish.thrd        = thrd;
+
+	verify( ! TL_GET( preemption_state ).enabled );
 	returnToKernel();
-	verify( !preemption_state.enabled );
+	verify( ! TL_GET( preemption_state ).enabled );
 
 	enable_interrupts( __cfaabi_dbg_ctx );
@@ -453,11 +454,11 @@
 	assert(thrd);
 	disable_interrupts();
-	this_processor->finish.action_code = Release_Schedule;
-	this_processor->finish.lock        = lock;
-	this_processor->finish.thrd        = thrd;
-
-	verify( !preemption_state.enabled );
+	TL_GET( this_processor )->finish.action_code = Release_Schedule;
+	TL_GET( this_processor )->finish.lock        = lock;
+	TL_GET( this_processor )->finish.thrd        = thrd;
+
+	verify( ! TL_GET( preemption_state ).enabled );
 	returnToKernel();
-	verify( !preemption_state.enabled );
+	verify( ! TL_GET( preemption_state ).enabled );
 
 	enable_interrupts( __cfaabi_dbg_ctx );
@@ -466,11 +467,11 @@
 void BlockInternal(__spinlock_t * locks [], unsigned short count) {
 	disable_interrupts();
-	this_processor->finish.action_code = Release_Multi;
-	this_processor->finish.locks       = locks;
-	this_processor->finish.lock_count  = count;
-
-	verify( !preemption_state.enabled );
+	TL_GET( this_processor )->finish.action_code = Release_Multi;
+	TL_GET( this_processor )->finish.locks       = locks;
+	TL_GET( this_processor )->finish.lock_count  = count;
+
+	verify( ! TL_GET( preemption_state ).enabled );
 	returnToKernel();
-	verify( !preemption_state.enabled );
+	verify( ! TL_GET( preemption_state ).enabled );
 
 	enable_interrupts( __cfaabi_dbg_ctx );
@@ -479,13 +480,13 @@
 void BlockInternal(__spinlock_t * locks [], unsigned short lock_count, thread_desc * thrds [], unsigned short thrd_count) {
 	disable_interrupts();
-	this_processor->finish.action_code = Release_Multi_Schedule;
-	this_processor->finish.locks       = locks;
-	this_processor->finish.lock_count  = lock_count;
-	this_processor->finish.thrds       = thrds;
-	this_processor->finish.thrd_count  = thrd_count;
-
-	verify( !preemption_state.enabled );
+	TL_GET( this_processor )->finish.action_code = Release_Multi_Schedule;
+	TL_GET( this_processor )->finish.locks       = locks;
+	TL_GET( this_processor )->finish.lock_count  = lock_count;
+	TL_GET( this_processor )->finish.thrds       = thrds;
+	TL_GET( this_processor )->finish.thrd_count  = thrd_count;
+
+	verify( ! TL_GET( preemption_state ).enabled );
 	returnToKernel();
-	verify( !preemption_state.enabled );
+	verify( ! TL_GET( preemption_state ).enabled );
 
 	enable_interrupts( __cfaabi_dbg_ctx );
@@ -493,8 +494,8 @@
 
 void LeaveThread(__spinlock_t * lock, thread_desc * thrd) {
-	verify( !preemption_state.enabled );
-	this_processor->finish.action_code = thrd ? Release_Schedule : Release;
-	this_processor->finish.lock        = lock;
-	this_processor->finish.thrd        = thrd;
+	verify( ! TL_GET( preemption_state ).enabled );
+	TL_GET( this_processor )->finish.action_code = thrd ? Release_Schedule : Release;
+	TL_GET( this_processor )->finish.lock        = lock;
+	TL_GET( this_processor )->finish.thrd        = thrd;
 
 	returnToKernel();
@@ -507,5 +508,5 @@
 // Kernel boot procedures
 void kernel_startup(void) {
-	verify( !preemption_state.enabled );
+	verify( ! TL_GET( preemption_state ).enabled );
 	__cfaabi_dbg_print_safe("Kernel : Starting\n");
 
@@ -531,7 +532,7 @@
 
 	//initialize the global state variables
-	this_processor = mainProcessor;
-	this_thread = mainThread;
-	this_coroutine = &mainThread->self_cor;
+	TL_SET( this_processor, mainProcessor );
+	TL_SET( this_thread, mainThread );
+	TL_SET( this_coroutine, &mainThread->self_cor );
 
 	// Enable preemption
@@ -545,5 +546,5 @@
 	// context. Hence, the main thread does not begin through CtxInvokeThread, like all other threads. The trick here is that
 	// mainThread is on the ready queue when this call is made.
-	kernel_first_resume( this_processor );
+	kernel_first_resume( TL_GET( this_processor ) );
 
 
@@ -552,7 +553,7 @@
 	__cfaabi_dbg_print_safe("Kernel : Started\n--------------------------------------------------\n\n");
 
-	verify( !preemption_state.enabled );
+	verify( ! TL_GET( preemption_state ).enabled );
 	enable_interrupts( __cfaabi_dbg_ctx );
-	verify( preemption_state.enabled );
+	verify( TL_GET( preemption_state ).enabled );
 }
 
@@ -560,7 +561,7 @@
 	__cfaabi_dbg_print_safe("\n--------------------------------------------------\nKernel : Shutting down\n");
 
-	verify( preemption_state.enabled );
+	verify( TL_GET( preemption_state ).enabled );
 	disable_interrupts();
-	verify( !preemption_state.enabled );
+	verify( ! TL_GET( preemption_state ).enabled );
 
 	// SKULLDUGGERY: Notify the mainProcessor it needs to terminates.
@@ -602,5 +603,5 @@
 
 	// first task to abort ?
-	if ( !kernel_abort_called ) {			// not first task to abort ?
+	if ( ! kernel_abort_called ) {			// not first task to abort ?
 		kernel_abort_called = true;
 		unlock( kernel_abort_lock );
@@ -617,5 +618,5 @@
 	}
 
-	return this_thread;
+	return TL_GET( this_thread );
 }
 
@@ -626,6 +627,6 @@
 	__cfaabi_dbg_bits_write( abort_text, len );
 
-	if ( thrd != this_coroutine ) {
-		len = snprintf( abort_text, abort_text_size, " in coroutine %.256s (%p).\n", this_coroutine->name, this_coroutine );
+	if ( thrd != TL_GET( this_coroutine ) ) {
+		len = snprintf( abort_text, abort_text_size, " in coroutine %.256s (%p).\n", TL_GET( this_coroutine )->name, TL_GET( this_coroutine ) );
 		__cfaabi_dbg_bits_write( abort_text, len );
 	}
@@ -636,5 +637,5 @@
 
 int kernel_abort_lastframe( void ) __attribute__ ((__nothrow__)) {
-	return get_coroutine(this_thread) == get_coroutine(mainThread) ? 4 : 2;
+	return get_coroutine(TL_GET( this_thread )) == get_coroutine(mainThread) ? 4 : 2;
 }
 
@@ -666,5 +667,5 @@
 	if ( count < 0 ) {
 		// queue current task
-		append( waiting, (thread_desc *)this_thread );
+		append( waiting, (thread_desc *)TL_GET( this_thread ) );
 
 		// atomically release spin lock and block
Index: src/libcfa/concurrency/kernel_private.h
===================================================================
--- src/libcfa/concurrency/kernel_private.h	(revision fae90d5f4e6f14db5c582aeb2a6ec9c0dbc21e73)
+++ src/libcfa/concurrency/kernel_private.h	(revision b10affdcb702e2f50d20fa65715ce6473d2bebb4)
@@ -10,6 +10,6 @@
 // Created On       : Mon Feb 13 12:27:26 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sat Jul 22 09:58:09 2017
-// Update Count     : 2
+// Last Modified On : Thu Mar 29 14:06:40 2018
+// Update Count     : 3
 //
 
@@ -66,7 +66,7 @@
 extern event_kernel_t * event_kernel;
 
-extern thread_local coroutine_desc * volatile this_coroutine;
-extern thread_local thread_desc *    volatile this_thread;
-extern thread_local processor *      volatile this_processor;
+//extern thread_local coroutine_desc * volatile this_coroutine;
+//extern thread_local thread_desc *    volatile this_thread;
+//extern thread_local processor *      volatile this_processor;
 
 // extern volatile thread_local bool preemption_in_progress;
Index: src/libcfa/concurrency/monitor.c
===================================================================
--- src/libcfa/concurrency/monitor.c	(revision fae90d5f4e6f14db5c582aeb2a6ec9c0dbc21e73)
+++ src/libcfa/concurrency/monitor.c	(revision b10affdcb702e2f50d20fa65715ce6473d2bebb4)
@@ -10,6 +10,6 @@
 // Created On       : Thd Feb 23 12:27:26 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Fri Feb 16 14:49:53 2018
-// Update Count     : 5
+// Last Modified On : Fri Mar 30 14:30:26 2018
+// Update Count     : 9
 //
 
@@ -85,5 +85,5 @@
 		// Lock the monitor spinlock
 		lock( this->lock __cfaabi_dbg_ctx2 );
-		thread_desc * thrd = this_thread;
+		thread_desc * thrd = TL_GET( this_thread );
 
 		__cfaabi_dbg_print_safe( "Kernel : %10p Entering mon %p (%p)\n", thrd, this, this->owner);
@@ -134,5 +134,5 @@
 		// Lock the monitor spinlock
 		lock( this->lock __cfaabi_dbg_ctx2 );
-		thread_desc * thrd = this_thread;
+		thread_desc * thrd = TL_GET( this_thread );
 
 		__cfaabi_dbg_print_safe( "Kernel : %10p Entering dtor for mon %p (%p)\n", thrd, this, this->owner);
@@ -168,5 +168,5 @@
 
 			// Create the node specific to this wait operation
-			wait_ctx_primed( this_thread, 0 )
+			wait_ctx_primed( TL_GET( this_thread ), 0 )
 
 			// Some one else has the monitor, wait for him to finish and then run
@@ -179,5 +179,5 @@
 			__cfaabi_dbg_print_safe( "Kernel :  blocking \n" );
 
-			wait_ctx( this_thread, 0 )
+			wait_ctx( TL_GET( this_thread ), 0 )
 			this->dtor_node = &waiter;
 
@@ -199,7 +199,7 @@
 		lock( this->lock __cfaabi_dbg_ctx2 );
 
-		__cfaabi_dbg_print_safe( "Kernel : %10p Leaving mon %p (%p)\n", this_thread, this, this->owner);
-
-		verifyf( this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", this_thread, this->owner, this->recursion, this );
+		__cfaabi_dbg_print_safe( "Kernel : %10p Leaving mon %p (%p)\n", TL_GET( this_thread ), this, this->owner);
+
+		verifyf( TL_GET( this_thread ) == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", TL_GET( this_thread ), this->owner, this->recursion, this );
 
 		// Leaving a recursion level, decrement the counter
@@ -227,6 +227,6 @@
 	void __leave_dtor_monitor_desc( monitor_desc * this ) {
 		__cfaabi_dbg_debug_do(
-			if( this_thread != this->owner ) {
-				abort( "Destroyed monitor %p has inconsistent owner, expected %p got %p.\n", this, this_thread, this->owner);
+			if( TL_GET( this_thread ) != this->owner ) {
+				abort( "Destroyed monitor %p has inconsistent owner, expected %p got %p.\n", this, TL_GET( this_thread ), this->owner);
 			}
 			if( this->recursion != 1 ) {
@@ -297,8 +297,8 @@
 
 	// Save previous thread context
-	this.prev = this_thread->monitors;
+	this.prev = TL_GET( this_thread )->monitors;
 
 	// Update thread context (needed for conditions)
-	(this_thread->monitors){m, count, func};
+	(TL_GET( this_thread )->monitors){m, count, func};
 
 	// __cfaabi_dbg_print_safe( "MGUARD : enter %d\n", count);
@@ -322,5 +322,5 @@
 
 	// Restore thread context
-	this_thread->monitors = this.prev;
+	TL_GET( this_thread )->monitors = this.prev;
 }
 
@@ -332,8 +332,8 @@
 
 	// Save previous thread context
-	this.prev = this_thread->monitors;
+	this.prev = TL_GET( this_thread )->monitors;
 
 	// Update thread context (needed for conditions)
-	(this_thread->monitors){m, 1, func};
+	(TL_GET( this_thread )->monitors){m, 1, func};
 
 	__enter_monitor_dtor( this.m, func );
@@ -346,5 +346,5 @@
 
 	// Restore thread context
-	this_thread->monitors = this.prev;
+	TL_GET( this_thread )->monitors = this.prev;
 }
 
@@ -386,5 +386,5 @@
 
 	// Create the node specific to this wait operation
-	wait_ctx( this_thread, user_info );
+	wait_ctx( TL_GET( this_thread ), user_info );
 
 	// Append the current wait operation to the ones already queued on the condition
@@ -425,5 +425,5 @@
 	//Some more checking in debug
 	__cfaabi_dbg_debug_do(
-		thread_desc * this_thrd = this_thread;
+		thread_desc * this_thrd = TL_GET( this_thread );
 		if ( this.monitor_count != this_thrd->monitors.size ) {
 			abort( "Signal on condition %p made with different number of monitor(s), expected %zi got %zi", &this, this.monitor_count, this_thrd->monitors.size );
@@ -473,5 +473,5 @@
 
 	// Create the node specific to this wait operation
-	wait_ctx_primed( this_thread, 0 )
+	wait_ctx_primed( TL_GET( this_thread ), 0 )
 
 	//save contexts
@@ -566,5 +566,5 @@
 
 				// Create the node specific to this wait operation
-				wait_ctx_primed( this_thread, 0 );
+				wait_ctx_primed( TL_GET( this_thread ), 0 );
 
 				// Save monitor states
@@ -612,5 +612,5 @@
 
 	// Create the node specific to this wait operation
-	wait_ctx_primed( this_thread, 0 );
+	wait_ctx_primed( TL_GET( this_thread ), 0 );
 
 	monitor_save;
@@ -618,5 +618,5 @@
 
 	for( __lock_size_t i = 0; i < count; i++) {
-		verify( monitors[i]->owner == this_thread );
+		verify( monitors[i]->owner == TL_GET( this_thread ) );
 	}
 
@@ -812,5 +812,5 @@
 
 static inline void brand_condition( condition & this ) {
-	thread_desc * thrd = this_thread;
+	thread_desc * thrd = TL_GET( this_thread );
 	if( !this.monitors ) {
 		// __cfaabi_dbg_print_safe( "Branding\n" );
Index: src/libcfa/concurrency/preemption.c
===================================================================
--- src/libcfa/concurrency/preemption.c	(revision fae90d5f4e6f14db5c582aeb2a6ec9c0dbc21e73)
+++ src/libcfa/concurrency/preemption.c	(revision b10affdcb702e2f50d20fa65715ce6473d2bebb4)
@@ -10,6 +10,6 @@
 // Created On       : Mon Jun 5 14:20:42 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Mar 27 11:28:51 2018
-// Update Count     : 24
+// Last Modified On : Fri Mar 30 17:27:43 2018
+// Update Count     : 31
 //
 
@@ -150,7 +150,7 @@
 	// Disable interrupts by incrementing the counter
 	void disable_interrupts() {
-		preemption_state.enabled = false;
-		__attribute__((unused)) unsigned short new_val = preemption_state.disable_count + 1;
-		preemption_state.disable_count = new_val;
+		TL_GET( preemption_state ).enabled = false;
+		__attribute__((unused)) unsigned short new_val = TL_GET( preemption_state ).disable_count + 1;
+		TL_GET( preemption_state ).disable_count = new_val;
 		verify( new_val < 65_000u );              // If this triggers someone is disabling interrupts without enabling them
 	}
@@ -159,14 +159,14 @@
 	// If counter reaches 0, execute any pending CtxSwitch
 	void enable_interrupts( __cfaabi_dbg_ctx_param ) {
-		processor   * proc = this_processor;      // Cache the processor now since interrupts can start happening after the atomic add
-		thread_desc * thrd = this_thread;         // Cache the thread now since interrupts can start happening after the atomic add
-
-		unsigned short prev = preemption_state.disable_count;
-		preemption_state.disable_count -= 1;
+		processor   * proc = TL_GET( this_processor ); // Cache the processor now since interrupts can start happening after the atomic add
+		thread_desc * thrd = TL_GET( this_thread );	  // Cache the thread now since interrupts can start happening after the atomic add
+
+		unsigned short prev = TL_GET( preemption_state ).disable_count;
+		TL_GET( preemption_state ).disable_count -= 1;
 		verify( prev != 0u );                     // If this triggers someone is enabled already enabled interruptsverify( prev != 0u );
 
 		// Check if we need to prempt the thread because an interrupt was missed
 		if( prev == 1 ) {
-			preemption_state.enabled = true;
+			TL_GET( preemption_state ).enabled = true;
 			if( proc->pending_preemption ) {
 				proc->pending_preemption = false;
@@ -182,9 +182,9 @@
 	// Don't execute any pending CtxSwitch even if counter reaches 0
 	void enable_interrupts_noPoll() {
-		unsigned short prev = preemption_state.disable_count;
-		preemption_state.disable_count -= 1;
+		unsigned short prev = TL_GET( preemption_state ).disable_count;
+		TL_GET( preemption_state ).disable_count -= 1;
 		verifyf( prev != 0u, "Incremented from %u\n", prev );                     // If this triggers someone is enabled already enabled interrupts
 		if( prev == 1 ) {
-			preemption_state.enabled = true;
+			TL_GET( preemption_state ).enabled = true;
 		}
 	}
@@ -236,6 +236,6 @@
 // If false : preemption is unsafe and marked as pending
 static inline bool preemption_ready() {
-	bool ready = preemption_state.enabled && !preemption_state.in_progress; // Check if preemption is safe
-	this_processor->pending_preemption = !ready;                        // Adjust the pending flag accordingly
+	bool ready = TL_GET( preemption_state ).enabled && !TL_GET( preemption_state ).in_progress; // Check if preemption is safe
+	TL_GET( this_processor )->pending_preemption = !ready;			// Adjust the pending flag accordingly
 	return ready;
 }
@@ -251,6 +251,6 @@
 
 	// Start with preemption disabled until ready
-	preemption_state.enabled = false;
-	preemption_state.disable_count = 1;
+	TL_GET( preemption_state ).enabled = false;
+	TL_GET( preemption_state ).disable_count = 1;
 
 	// Initialize the event kernel
@@ -317,9 +317,9 @@
 	// before the kernel thread has even started running. When that happens an iterrupt
 	// we a null 'this_processor' will be caught, just ignore it.
-	if(!this_processor) return;
+	if(!TL_GET( this_processor )) return;
 
 	choose(sfp->si_value.sival_int) {
 		case PREEMPT_NORMAL   : ;// Normal case, nothing to do here
-		case PREEMPT_TERMINATE: verify(this_processor->do_terminate);
+		case PREEMPT_TERMINATE: verify(TL_GET( this_processor )->do_terminate);
 		default:
 			abort( "internal error, signal value is %d", sfp->si_value.sival_int );
@@ -331,11 +331,11 @@
 	__cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p).\n", this_processor, this_thread);
 
-	preemption_state.in_progress = true;                      // Sync flag : prevent recursive calls to the signal handler
+	TL_GET( preemption_state ).in_progress = true;  // Sync flag : prevent recursive calls to the signal handler
 	signal_unblock( SIGUSR1 );                          // We are about to CtxSwitch out of the signal handler, let other handlers in
-	preemption_state.in_progress = false;                     // Clear the in progress flag
+	TL_GET( preemption_state ).in_progress = false; // Clear the in progress flag
 
 	// Preemption can occur here
 
-	BlockInternal( (thread_desc*)this_thread );         // Do the actual CtxSwitch
+	BlockInternal( (thread_desc*)TL_GET( this_thread ) ); // Do the actual CtxSwitch
 }
 
Index: src/libcfa/concurrency/thread
===================================================================
--- src/libcfa/concurrency/thread	(revision fae90d5f4e6f14db5c582aeb2a6ec9c0dbc21e73)
+++ src/libcfa/concurrency/thread	(revision b10affdcb702e2f50d20fa65715ce6473d2bebb4)
@@ -10,6 +10,6 @@
 // Created On       : Tue Jan 17 12:27:26 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sat Jul 22 09:59:40 2017
-// Update Count     : 3
+// Last Modified On : Thu Mar 29 14:07:11 2018
+// Update Count     : 4
 //
 
@@ -52,5 +52,5 @@
 }
 
-extern thread_local thread_desc * volatile this_thread;
+//extern thread_local thread_desc * volatile this_thread;
 
 forall( dtype T | is_thread(T) )
Index: src/libcfa/concurrency/thread.c
===================================================================
--- src/libcfa/concurrency/thread.c	(revision fae90d5f4e6f14db5c582aeb2a6ec9c0dbc21e73)
+++ src/libcfa/concurrency/thread.c	(revision b10affdcb702e2f50d20fa65715ce6473d2bebb4)
@@ -10,6 +10,6 @@
 // Created On       : Tue Jan 17 12:27:26 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Fri Jul 21 22:34:46 2017
-// Update Count     : 1
+// Last Modified On : Fri Mar 30 17:19:52 2018
+// Update Count     : 8
 //
 
@@ -26,5 +26,5 @@
 }
 
-extern volatile thread_local processor * this_processor;
+//extern volatile thread_local processor * this_processor;
 
 //-----------------------------------------------------------------------------
@@ -75,5 +75,5 @@
 	coroutine_desc* thrd_c = get_coroutine(this);
 	thread_desc   * thrd_h = get_thread   (this);
-	thrd_c->last = this_coroutine;
+	thrd_c->last = TL_GET( this_coroutine );
 
 	// __cfaabi_dbg_print_safe("Thread start : %p (t %p, c %p)\n", this, thrd_c, thrd_h);
@@ -81,5 +81,5 @@
 	disable_interrupts();
 	create_stack(&thrd_c->stack, thrd_c->stack.size);
-	this_coroutine = thrd_c;
+	TL_SET( this_coroutine, thrd_c );
 	CtxStart(&this, CtxInvokeThread);
 	assert( thrd_c->last->stack.context );
@@ -92,5 +92,5 @@
 extern "C" {
 	void __finish_creation(void) {
-		coroutine_desc* thrd_c = this_coroutine;
+		coroutine_desc* thrd_c = TL_GET( this_coroutine );
 		ThreadCtxSwitch( thrd_c, thrd_c->last );
 	}
@@ -98,7 +98,7 @@
 
 void yield( void ) {
-	verify( preemption_state.enabled );
-	BlockInternal( this_thread );
-	verify( preemption_state.enabled );
+	verify( TL_GET( preemption_state ).enabled );
+	BlockInternal( TL_GET( this_thread ) );
+	verify( TL_GET( preemption_state ).enabled );
 }
 
@@ -116,8 +116,8 @@
 	// set new coroutine that the processor is executing
 	// and context switch to it
-	this_coroutine = dst;
+	TL_SET( this_coroutine, dst );
 	assert( src->stack.context );
 	CtxSwitch( src->stack.context, dst->stack.context );
-	this_coroutine = src;
+	TL_SET( this_coroutine, src );
 
 	// set state of new coroutine to active