Index: libcfa/src/concurrency/io.cfa
===================================================================
--- libcfa/src/concurrency/io.cfa	(revision be5e34b191df89bc3ee8bb1fb45fe9e4d11148d8)
+++ libcfa/src/concurrency/io.cfa	(revision 8fc652e01c609dd8c3f392cd9de90efc31e0d6b2)
@@ -76,5 +76,5 @@
 
 	static inline bool next( __leaderlock_t & this ) {
-		/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+		/* paranoid */ verify( ! __preemption_enabled() );
 		struct $thread * nextt;
 		for() {
@@ -168,5 +168,5 @@
 	// This is NOT thread-safe
 	static [int, bool] __drain_io( & struct __io_data ring ) {
-		/* paranoid */ verify( !kernelTLS.preemption_state.enabled );
+		/* paranoid */ verify( ! __preemption_enabled() );
 
 		unsigned to_submit = 0;
@@ -404,5 +404,5 @@
 					return;
 				}
-				/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+				/* paranoid */ verify( ! __preemption_enabled() );
 				__STATS__( true,
 					io.submit_q.leader += 1;
@@ -442,5 +442,5 @@
 
 			#if defined(LEADER_LOCK)
-				/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+				/* paranoid */ verify( ! __preemption_enabled() );
 				next(ring.submit_q.submit_lock);
 			#else
Index: libcfa/src/concurrency/io/setup.cfa
===================================================================
--- libcfa/src/concurrency/io/setup.cfa	(revision be5e34b191df89bc3ee8bb1fb45fe9e4d11148d8)
+++ libcfa/src/concurrency/io/setup.cfa	(revision 8fc652e01c609dd8c3f392cd9de90efc31e0d6b2)
@@ -149,5 +149,5 @@
 		id.full_proc = false;
 		id.id = doregister(&id);
-		kernelTLS.this_proc_id = &id;
+		__cfaabi_tls.this_proc_id = &id;
 		__cfaabi_dbg_print_safe( "Kernel : IO poller thread starting\n" );
 
@@ -179,5 +179,5 @@
 				__cfadbg_print_safe(io_core, "Kernel I/O : Unparking io poller %p\n", io_ctx);
 				#if !defined( __CFA_NO_STATISTICS__ )
-					kernelTLS.this_stats = io_ctx->self.curr_cluster->stats;
+					__cfaabi_tls.this_stats = io_ctx->self.curr_cluster->stats;
 				#endif
 				post( io_ctx->sem );
Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision be5e34b191df89bc3ee8bb1fb45fe9e4d11148d8)
+++ libcfa/src/concurrency/kernel.cfa	(revision 8fc652e01c609dd8c3f392cd9de90efc31e0d6b2)
@@ -122,6 +122,6 @@
 	// Because of a bug, we couldn't initialized the seed on construction
 	// Do it here
-	kernelTLS.rand_seed ^= rdtscl();
-	kernelTLS.ready_rng.fwd_seed = 25214903917_l64u * (rdtscl() ^ (uintptr_t)&runner);
+	__cfaabi_tls.rand_seed ^= rdtscl();
+	__cfaabi_tls.ready_rng.fwd_seed = 25214903917_l64u * (rdtscl() ^ (uintptr_t)&runner);
 	__tls_rand_advance_bck();
 
@@ -217,5 +217,5 @@
 		// and it make sense for it to be set in all other cases except here
 		// fake it
-		kernelTLS.this_thread = mainThread;
+		__cfaabi_tls.this_thread = mainThread;
 	}
 
@@ -230,5 +230,5 @@
 // from the processor coroutine to the target thread
 static void __run_thread(processor * this, $thread * thrd_dst) {
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 	/* paranoid */ verifyf( thrd_dst->state == Ready || thrd_dst->preempted != __NO_PREEMPTION, "state : %d, preempted %d\n", thrd_dst->state, thrd_dst->preempted);
 	/* paranoid */ verifyf( thrd_dst->link.next == 0p, "Expected null got %p", thrd_dst->link.next );
@@ -247,8 +247,8 @@
 
 		// Update global state
-		kernelTLS.this_thread = thrd_dst;
-
-		/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-		/* paranoid */ verify( kernelTLS.this_thread == thrd_dst );
+		kernelTLS().this_thread = thrd_dst;
+
+		/* paranoid */ verify( ! __preemption_enabled() );
+		/* paranoid */ verify( kernelTLS().this_thread == thrd_dst );
 		/* paranoid */ verify( thrd_dst->context.SP );
 		/* paranoid */ verify( thrd_dst->state != Halted );
@@ -267,9 +267,9 @@
 		/* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) < ((uintptr_t)__get_stack(thrd_dst->curr_cor)->base ), "ERROR : Destination $thread %p has been corrupted.\n StackPointer too small.\n", thrd_dst );
 		/* paranoid */ verify( thrd_dst->context.SP );
-		/* paranoid */ verify( kernelTLS.this_thread == thrd_dst );
-		/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+		/* paranoid */ verify( kernelTLS().this_thread == thrd_dst );
+		/* paranoid */ verify( ! __preemption_enabled() );
 
 		// Reset global state
-		kernelTLS.this_thread = 0p;
+		kernelTLS().this_thread = 0p;
 
 		// We just finished running a thread, there are a few things that could have happened.
@@ -315,15 +315,15 @@
 	proc_cor->state = Active;
 
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 }
 
 // KERNEL_ONLY
 void returnToKernel() {
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-	$coroutine * proc_cor = get_coroutine(kernelTLS.this_processor->runner);
-	$thread * thrd_src = kernelTLS.this_thread;
+	/* paranoid */ verify( ! __preemption_enabled() );
+	$coroutine * proc_cor = get_coroutine(kernelTLS().this_processor->runner);
+	$thread * thrd_src = kernelTLS().this_thread;
 
 	#if !defined(__CFA_NO_STATISTICS__)
-		struct processor * last_proc = kernelTLS.this_processor;
+		struct processor * last_proc = kernelTLS().this_processor;
 	#endif
 
@@ -345,10 +345,10 @@
 
 	#if !defined(__CFA_NO_STATISTICS__)
-		if(last_proc != kernelTLS.this_processor) {
+		if(last_proc != kernelTLS().this_processor) {
 			__tls_stats()->ready.threads.migration++;
 		}
 	#endif
 
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 	/* paranoid */ verifyf( ((uintptr_t)thrd_src->context.SP) < ((uintptr_t)__get_stack(thrd_src->curr_cor)->base ), "ERROR : Returning $thread %p has been corrupted.\n StackPointer too small.\n", thrd_src );
 	/* paranoid */ verifyf( ((uintptr_t)thrd_src->context.SP) > ((uintptr_t)__get_stack(thrd_src->curr_cor)->limit), "ERROR : Returning $thread %p has been corrupted.\n StackPointer too large.\n", thrd_src );
@@ -359,8 +359,8 @@
 // KERNEL ONLY
 void __schedule_thread( $thread * thrd ) {
+	/* paranoid */ verify( ! __preemption_enabled() );
 	/* paranoid */ verify( thrd );
 	/* paranoid */ verify( thrd->state != Halted );
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-	/* paranoid */ verify( kernelTLS.this_proc_id );
+	/* paranoid */ verify( kernelTLS().this_proc_id );
 	/* paranoid */ #if defined( __CFA_WITH_VERIFY__ )
 	/* paranoid */ 	if( thrd->state == Blocked || thrd->state == Start ) assertf( thrd->preempted == __NO_PREEMPTION,
@@ -380,11 +380,11 @@
 	ready_schedule_unlock();
 
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 }
 
 // KERNEL ONLY
 static inline $thread * __next_thread(cluster * this) with( *this ) {
-	/* paranoid */ verify( kernelTLS.this_proc_id );
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
+	/* paranoid */ verify( kernelTLS().this_proc_id );
 
 	ready_schedule_lock();
@@ -392,6 +392,6 @@
 	ready_schedule_unlock();
 
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-	/* paranoid */ verify( kernelTLS.this_proc_id );
+	/* paranoid */ verify( kernelTLS().this_proc_id );
+	/* paranoid */ verify( ! __preemption_enabled() );
 	return thrd;
 }
@@ -399,6 +399,6 @@
 // KERNEL ONLY
 static inline $thread * __next_thread_slow(cluster * this) with( *this ) {
-	/* paranoid */ verify( kernelTLS.this_proc_id );
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
+	/* paranoid */ verify( kernelTLS().this_proc_id );
 
 	ready_schedule_lock();
@@ -406,6 +406,6 @@
 	ready_schedule_unlock();
 
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-	/* paranoid */ verify( kernelTLS.this_proc_id );
+	/* paranoid */ verify( kernelTLS().this_proc_id );
+	/* paranoid */ verify( ! __preemption_enabled() );
 	return thrd;
 }
@@ -414,9 +414,4 @@
 	if( !thrd ) return;
 
-	/* paranoid */ verify( kernelTLS.this_proc_id );
-	bool full = kernelTLS.this_proc_id->full_proc;
-	if(full) disable_interrupts();
-
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
 	int old_ticket = __atomic_fetch_add(&thrd->ticket, 1, __ATOMIC_SEQ_CST);
 	switch(old_ticket) {
@@ -428,6 +423,20 @@
 			/* paranoid */ verify( thrd->state == Blocked );
 
-			// Wake lost the race,
-			__schedule_thread( thrd );
+			{
+				/* paranoid */ verify( publicTLS_get(this_proc_id) );
+				bool full = publicTLS_get(this_proc_id)->full_proc;
+				if(full) disable_interrupts();
+
+				/* paranoid */ verify( ! __preemption_enabled() );
+
+				// Wake lost the race,
+				__schedule_thread( thrd );
+
+				/* paranoid */ verify( ! __preemption_enabled() );
+
+				if(full) enable_interrupts( __cfaabi_dbg_ctx );
+				/* paranoid */ verify( publicTLS_get(this_proc_id) );
+			}
+
 			break;
 		default:
@@ -435,21 +444,17 @@
 			abort("Thread %p (%s) has mismatch park/unpark\n", thrd, thrd->self_cor.name);
 	}
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-
-	if(full) enable_interrupts( __cfaabi_dbg_ctx );
-	/* paranoid */ verify( kernelTLS.this_proc_id );
 }
 
 void park( void ) {
-	/* paranoid */ verify( kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( __preemption_enabled() );
 	disable_interrupts();
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-	/* paranoid */ verify( kernelTLS.this_thread->preempted == __NO_PREEMPTION );
+	/* paranoid */ verify( ! __preemption_enabled() );
+	/* paranoid */ verify( kernelTLS().this_thread->preempted == __NO_PREEMPTION );
 
 	returnToKernel();
 
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 	enable_interrupts( __cfaabi_dbg_ctx );
-	/* paranoid */ verify( kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( __preemption_enabled() );
 
 }
@@ -460,5 +465,5 @@
 	// Should never return
 	void __cfactx_thrd_leave() {
-		$thread * thrd = TL_GET( this_thread );
+		$thread * thrd = active_thread();
 		$monitor * this = &thrd->self_mon;
 
@@ -473,5 +478,5 @@
 
 		// Leave the thread
-		/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+		/* paranoid */ verify( ! __preemption_enabled() );
 		returnToKernel();
 		abort();
@@ -483,9 +488,9 @@
 // KERNEL ONLY
 bool force_yield( __Preemption_Reason reason ) {
-	/* paranoid */ verify( kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( __preemption_enabled() );
 	disable_interrupts();
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-
-	$thread * thrd = kernelTLS.this_thread;
+	/* paranoid */ verify( ! __preemption_enabled() );
+
+	$thread * thrd = kernelTLS().this_thread;
 	/* paranoid */ verify(thrd->state == Active);
 
@@ -501,7 +506,7 @@
 	}
 
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 	enable_interrupts_noPoll();
-	/* paranoid */ verify( kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( __preemption_enabled() );
 
 	return preempted;
@@ -513,5 +518,5 @@
 // Wake a thread from the front if there are any
 static void __wake_one(cluster * this) {
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 	/* paranoid */ verify( ready_schedule_islocked() );
 
@@ -533,5 +538,5 @@
 
 	/* paranoid */ verify( ready_schedule_islocked() );
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 
 	return;
@@ -543,5 +548,5 @@
 
 	disable_interrupts();
-		/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+		/* paranoid */ verify( ! __preemption_enabled() );
 		post( this->idle );
 	enable_interrupts( __cfaabi_dbg_ctx );
@@ -549,5 +554,5 @@
 
 static void push  (__cluster_idles & this, processor & proc) {
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 	lock( this );
 		this.idle++;
@@ -556,9 +561,9 @@
 		insert_first(this.list, proc);
 	unlock( this );
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 }
 
 static void remove(__cluster_idles & this, processor & proc) {
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 	lock( this );
 		this.idle--;
@@ -567,5 +572,5 @@
 		remove(proc);
 	unlock( this );
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 }
 
@@ -611,5 +616,5 @@
 	}
 
-	return kernelTLS.this_thread;
+	return __cfaabi_tls.this_thread;
 }
 
@@ -636,5 +641,5 @@
 
 int kernel_abort_lastframe( void ) __attribute__ ((__nothrow__)) {
-	return get_coroutine(kernelTLS.this_thread) == get_coroutine(mainThread) ? 4 : 2;
+	return get_coroutine(kernelTLS().this_thread) == get_coroutine(mainThread) ? 4 : 2;
 }
 
@@ -668,5 +673,5 @@
 	if ( count < 0 ) {
 		// queue current task
-		append( waiting, kernelTLS.this_thread );
+		append( waiting, active_thread() );
 
 		// atomically release spin lock and block
@@ -718,5 +723,5 @@
 		void __cfaabi_dbg_record_lock(__spinlock_t & this, const char prev_name[]) {
 			this.prev_name = prev_name;
-			this.prev_thrd = kernelTLS.this_thread;
+			this.prev_thrd = kernelTLS().this_thread;
 		}
 	}
Index: libcfa/src/concurrency/kernel.hfa
===================================================================
--- libcfa/src/concurrency/kernel.hfa	(revision be5e34b191df89bc3ee8bb1fb45fe9e4d11148d8)
+++ libcfa/src/concurrency/kernel.hfa	(revision 8fc652e01c609dd8c3f392cd9de90efc31e0d6b2)
@@ -275,8 +275,10 @@
 static inline [cluster *&, cluster *& ] __get( cluster & this ) __attribute__((const)) { return this.node.[next, prev]; }
 
-static inline struct processor * active_processor() { return TL_GET( this_processor ); } // UNSAFE
-static inline struct cluster   * active_cluster  () { return TL_GET( this_processor )->cltr; }
+static inline struct processor * active_processor() { return publicTLS_get( this_processor ); } // UNSAFE
+static inline struct cluster   * active_cluster  () { return publicTLS_get( this_processor )->cltr; }
 
 #if !defined(__CFA_NO_STATISTICS__)
+	void print_stats_now( cluster & this, int flags );
+
 	static inline void print_stats_at_exit( cluster & this, int flags ) {
 		this.print_stats |= flags;
Index: libcfa/src/concurrency/kernel/fwd.hfa
===================================================================
--- libcfa/src/concurrency/kernel/fwd.hfa	(revision be5e34b191df89bc3ee8bb1fb45fe9e4d11148d8)
+++ libcfa/src/concurrency/kernel/fwd.hfa	(revision 8fc652e01c609dd8c3f392cd9de90efc31e0d6b2)
@@ -55,13 +55,24 @@
 				uint64_t bck_seed;
 			} ready_rng;
-		} kernelTLS __attribute__ ((tls_model ( "initial-exec" )));
+		} __cfaabi_tls __attribute__ ((tls_model ( "initial-exec" )));
 
+		extern bool __preemption_enabled();
 
+		static inline KernelThreadData & kernelTLS( void ) {
+			/* paranoid */ verify( ! __preemption_enabled() );
+			return __cfaabi_tls;
+		}
+
+		extern uintptr_t __cfatls_get( unsigned long int member );
+		// #define publicTLS_get( member ) ((typeof(__cfaabi_tls.member))__cfatls_get( __builtin_offsetof(KernelThreadData, member) ))
+		#define publicTLS_get( member ) (__cfaabi_tls.member)
+		// extern forall(otype T) T __cfatls_get( T * member, T value );
+		// #define publicTLS_set( member, value ) __cfatls_set( (typeof(member)*)__builtin_offsetof(KernelThreadData, member), value );
 
 		static inline uint64_t __tls_rand() {
 			#if defined(__SIZEOF_INT128__)
-				return __lehmer64( kernelTLS.rand_seed );
+				return __lehmer64( kernelTLS().rand_seed );
 			#else
-				return __xorshift64( kernelTLS.rand_seed );
+				return __xorshift64( kernelTLS().rand_seed );
 			#endif
 		}
@@ -75,11 +86,11 @@
 		static inline unsigned __tls_rand_fwd() {
 
-			kernelTLS.ready_rng.fwd_seed = (A * kernelTLS.ready_rng.fwd_seed + C) & (M - 1);
-			return kernelTLS.ready_rng.fwd_seed >> D;
+			kernelTLS().ready_rng.fwd_seed = (A * kernelTLS().ready_rng.fwd_seed + C) & (M - 1);
+			return kernelTLS().ready_rng.fwd_seed >> D;
 		}
 
 		static inline unsigned __tls_rand_bck() {
-			unsigned int r = kernelTLS.ready_rng.bck_seed >> D;
-			kernelTLS.ready_rng.bck_seed = AI * (kernelTLS.ready_rng.bck_seed - C) & (M - 1);
+			unsigned int r = kernelTLS().ready_rng.bck_seed >> D;
+			kernelTLS().ready_rng.bck_seed = AI * (kernelTLS().ready_rng.bck_seed - C) & (M - 1);
 			return r;
 		}
@@ -92,25 +103,9 @@
 
 		static inline void __tls_rand_advance_bck(void) {
-			kernelTLS.ready_rng.bck_seed = kernelTLS.ready_rng.fwd_seed;
+			kernelTLS().ready_rng.bck_seed = kernelTLS().ready_rng.fwd_seed;
 		}
 	}
 
-	#if 0 // def __ARM_ARCH
-		// function prototypes are only really used by these macros on ARM
-		void disable_global_interrupts();
-		void enable_global_interrupts();
 
-		#define TL_GET( member ) ( { __typeof__( kernelTLS.member ) target; \
-			disable_global_interrupts(); \
-			target = kernelTLS.member; \
-			enable_global_interrupts(); \
-			target; } )
-		#define TL_SET( member, value ) disable_global_interrupts(); \
-			kernelTLS.member = value; \
-			enable_global_interrupts();
-	#else
-		#define TL_GET( member ) kernelTLS.member
-		#define TL_SET( member, value ) kernelTLS.member = value;
-	#endif
 
 	extern void disable_interrupts();
@@ -121,5 +116,9 @@
 		extern void park( void );
 		extern void unpark( struct $thread * this );
-		static inline struct $thread * active_thread () { return TL_GET( this_thread ); }
+		static inline struct $thread * active_thread () {
+			struct $thread * t = publicTLS_get( this_thread );
+			/* paranoid */ verify( t );
+			return t;
+		}
 
 		extern bool force_yield( enum __Preemption_Reason );
@@ -140,7 +139,7 @@
 		#if !defined(__CFA_NO_STATISTICS__)
 			static inline struct __stats_t * __tls_stats() {
-				/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-				/* paranoid */ verify( kernelTLS.this_stats );
-				return kernelTLS.this_stats;
+				/* paranoid */ verify( ! __preemption_enabled() );
+				/* paranoid */ verify( kernelTLS().this_stats );
+				return kernelTLS().this_stats;
 			}
 
Index: libcfa/src/concurrency/kernel/startup.cfa
===================================================================
--- libcfa/src/concurrency/kernel/startup.cfa	(revision be5e34b191df89bc3ee8bb1fb45fe9e4d11148d8)
+++ libcfa/src/concurrency/kernel/startup.cfa	(revision 8fc652e01c609dd8c3f392cd9de90efc31e0d6b2)
@@ -118,5 +118,5 @@
 //-----------------------------------------------------------------------------
 // Global state
-thread_local struct KernelThreadData kernelTLS __attribute__ ((tls_model ( "initial-exec" ))) @= {
+thread_local struct KernelThreadData __cfaabi_tls __attribute__ ((tls_model ( "initial-exec" ))) @= {
 	NULL,												// cannot use 0p
 	NULL,
@@ -156,5 +156,5 @@
 // Kernel boot procedures
 static void __kernel_startup(void) {
-	verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 	__cfadbg_print_safe(runtime_core, "Kernel : Starting\n");
 
@@ -212,11 +212,11 @@
 
 	//initialize the global state variables
-	kernelTLS.this_processor = mainProcessor;
-	kernelTLS.this_proc_id   = (__processor_id_t*)mainProcessor;
-	kernelTLS.this_thread    = mainThread;
+	__cfaabi_tls.this_processor = mainProcessor;
+	__cfaabi_tls.this_proc_id   = (__processor_id_t*)mainProcessor;
+	__cfaabi_tls.this_thread    = mainThread;
 
 	#if !defined( __CFA_NO_STATISTICS__ )
-		kernelTLS.this_stats = (__stats_t *)& storage_mainProcStats;
-		__init_stats( kernelTLS.this_stats );
+		__cfaabi_tls.this_stats = (__stats_t *)& storage_mainProcStats;
+		__init_stats( __cfaabi_tls.this_stats );
 	#endif
 
@@ -234,5 +234,5 @@
 	// context. Hence, the main thread does not begin through __cfactx_invoke_thread, like all other threads. The trick here is that
 	// mainThread is on the ready queue when this call is made.
-	__kernel_first_resume( kernelTLS.this_processor );
+	__kernel_first_resume( __cfaabi_tls.this_processor );
 
 
@@ -251,7 +251,8 @@
 	__cfadbg_print_safe(runtime_core, "Kernel : Started\n--------------------------------------------------\n\n");
 
-	verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 	enable_interrupts( __cfaabi_dbg_ctx );
-	verify( TL_GET( preemption_state.enabled ) );
+	/* paranoid */ verify( __preemption_enabled() );
+
 }
 
@@ -262,7 +263,7 @@
 	mainCluster->io.ctxs = 0p;
 
-	/* paranoid */ verify( TL_GET( preemption_state.enabled ) );
+	/* paranoid */ verify( __preemption_enabled() );
 	disable_interrupts();
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 
 	__cfadbg_print_safe(runtime_core, "\n--------------------------------------------------\nKernel : Shutting down\n");
@@ -272,5 +273,5 @@
 	// which is currently here
 	__atomic_store_n(&mainProcessor->do_terminate, true, __ATOMIC_RELEASE);
-	__kernel_last_resume( kernelTLS.this_processor );
+	__kernel_last_resume( __cfaabi_tls.this_processor );
 	mainThread->self_cor.state = Halted;
 
@@ -321,12 +322,12 @@
 		__stats_t local_stats;
 		__init_stats( &local_stats );
-		kernelTLS.this_stats = &local_stats;
+		__cfaabi_tls.this_stats = &local_stats;
 	#endif
 
 	processor * proc = (processor *) arg;
-	kernelTLS.this_processor = proc;
-	kernelTLS.this_proc_id   = (__processor_id_t*)proc;
-	kernelTLS.this_thread    = 0p;
-	kernelTLS.preemption_state.[enabled, disable_count] = [false, 1];
+	__cfaabi_tls.this_processor = proc;
+	__cfaabi_tls.this_proc_id   = (__processor_id_t*)proc;
+	__cfaabi_tls.this_thread    = 0p;
+	__cfaabi_tls.preemption_state.[enabled, disable_count] = [false, 1];
 	// SKULLDUGGERY: We want to create a context for the processor coroutine
 	// which is needed for the 2-step context switch. However, there is no reason
@@ -340,5 +341,5 @@
 
 	//Set global state
-	kernelTLS.this_thread = 0p;
+	__cfaabi_tls.this_thread = 0p;
 
 	//We now have a proper context from which to schedule threads
@@ -370,11 +371,11 @@
 	$coroutine * dst = get_coroutine(this->runner);
 
-	verify( ! kernelTLS.preemption_state.enabled );
-
-	kernelTLS.this_thread->curr_cor = dst;
+	/* paranoid */ verify( ! __preemption_enabled() );
+
+	__cfaabi_tls.this_thread->curr_cor = dst;
 	__stack_prepare( &dst->stack, 65000 );
 	__cfactx_start(main, dst, this->runner, __cfactx_invoke_coroutine);
 
-	verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 
 	dst->last = &src->self_cor;
@@ -394,5 +395,5 @@
 	/* paranoid */ verify(src->state == Active);
 
-	verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 }
 
@@ -402,7 +403,7 @@
 	$coroutine * dst = get_coroutine(this->runner);
 
-	verify( ! kernelTLS.preemption_state.enabled );
-	verify( dst->starter == src );
-	verify( dst->context.SP );
+	/* paranoid */ verify( ! __preemption_enabled() );
+	/* paranoid */ verify( dst->starter == src );
+	/* paranoid */ verify( dst->context.SP );
 
 	// SKULLDUGGERY in debug the processors check that the
@@ -546,5 +547,5 @@
 
 		P( terminated );
-		verify( kernelTLS.this_processor != &this);
+		/* paranoid */ verify( active_processor() != &this);
 	}
 
@@ -696,5 +697,5 @@
 #if defined(__CFA_WITH_VERIFY__)
 static bool verify_fwd_bck_rng(void) {
-	kernelTLS.ready_rng.fwd_seed = 25214903917_l64u * (rdtscl() ^ (uintptr_t)&verify_fwd_bck_rng);
+	__cfaabi_tls.ready_rng.fwd_seed = 25214903917_l64u * (rdtscl() ^ (uintptr_t)&verify_fwd_bck_rng);
 
 	unsigned values[10];
Index: libcfa/src/concurrency/kernel_private.hfa
===================================================================
--- libcfa/src/concurrency/kernel_private.hfa	(revision be5e34b191df89bc3ee8bb1fb45fe9e4d11148d8)
+++ libcfa/src/concurrency/kernel_private.hfa	(revision 8fc652e01c609dd8c3f392cd9de90efc31e0d6b2)
@@ -38,4 +38,6 @@
 #endif
 ;
+
+extern bool __preemption_enabled();
 
 //release/wake-up the following resources
@@ -181,8 +183,9 @@
 //  creating/destroying queues
 static inline void ready_schedule_lock(void) with(*__scheduler_lock) {
-	/*paranoid*/ verify( kernelTLS.this_proc_id );
-
-	unsigned iproc = kernelTLS.this_proc_id->id;
-	/*paranoid*/ verify(data[iproc].handle == kernelTLS.this_proc_id);
+	/* paranoid */ verify( ! __preemption_enabled() );
+	/* paranoid */ verify( kernelTLS().this_proc_id );
+
+	unsigned iproc = kernelTLS().this_proc_id->id;
+	/*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id);
 	/*paranoid*/ verify(iproc < ready);
 
@@ -207,8 +210,9 @@
 
 static inline void ready_schedule_unlock(void) with(*__scheduler_lock) {
-	/*paranoid*/ verify( kernelTLS.this_proc_id );
-
-	unsigned iproc = kernelTLS.this_proc_id->id;
-	/*paranoid*/ verify(data[iproc].handle == kernelTLS.this_proc_id);
+	/* paranoid */ verify( ! __preemption_enabled() );
+	/* paranoid */ verify( kernelTLS().this_proc_id );
+
+	unsigned iproc = kernelTLS().this_proc_id->id;
+	/*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id);
 	/*paranoid*/ verify(iproc < ready);
 	/*paranoid*/ verify(data[iproc].lock);
@@ -223,6 +227,7 @@
 #ifdef __CFA_WITH_VERIFY__
 	static inline bool ready_schedule_islocked(void) {
-		/*paranoid*/ verify( kernelTLS.this_proc_id );
-		__processor_id_t * proc = kernelTLS.this_proc_id;
+		/* paranoid */ verify( ! __preemption_enabled() );
+		/*paranoid*/ verify( kernelTLS().this_proc_id );
+		__processor_id_t * proc = kernelTLS().this_proc_id;
 		return __scheduler_lock->data[proc->id].owned;
 	}
Index: libcfa/src/concurrency/monitor.cfa
===================================================================
--- libcfa/src/concurrency/monitor.cfa	(revision be5e34b191df89bc3ee8bb1fb45fe9e4d11148d8)
+++ libcfa/src/concurrency/monitor.cfa	(revision 8fc652e01c609dd8c3f392cd9de90efc31e0d6b2)
@@ -82,8 +82,8 @@
 // Enter single monitor
 static void __enter( $monitor * this, const __monitor_group_t & group ) {
+	$thread * thrd = active_thread();
+
 	// Lock the monitor spinlock
 	lock( this->lock __cfaabi_dbg_ctx2 );
-	// Interrupts disable inside critical section
-	$thread * thrd = kernelTLS.this_thread;
 
 	__cfaabi_dbg_print_safe( "Kernel : %10p Entering mon %p (%p)\n", thrd, this, this->owner);
@@ -126,5 +126,5 @@
 		__cfaabi_dbg_print_safe( "Kernel : %10p Entered  mon %p\n", thrd, this);
 
-		/* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+		/* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
 		return;
 	}
@@ -132,5 +132,5 @@
 	__cfaabi_dbg_print_safe( "Kernel : %10p Entered  mon %p\n", thrd, this);
 
-	/* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+	/* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
 	/* paranoid */ verify( this->lock.lock );
 
@@ -141,8 +141,8 @@
 
 static void __dtor_enter( $monitor * this, fptr_t func, bool join ) {
+	$thread * thrd = active_thread();
+
 	// Lock the monitor spinlock
 	lock( this->lock __cfaabi_dbg_ctx2 );
-	// Interrupts disable inside critical section
-	$thread * thrd = kernelTLS.this_thread;
 
 	__cfaabi_dbg_print_safe( "Kernel : %10p Entering dtor for mon %p (%p)\n", thrd, this, this->owner);
@@ -155,5 +155,5 @@
 		__set_owner( this, thrd );
 
-		verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+		verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
 
 		unlock( this->lock );
@@ -174,5 +174,5 @@
 		this->owner = thrd;
 
-		verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+		verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
 
 		unlock( this->lock );
@@ -200,5 +200,5 @@
 
 		// Release the next thread
-		/* paranoid */ verifyf( urgent->owner->waiting_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+		/* paranoid */ verifyf( urgent->owner->waiting_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
 		unpark( urgent->owner->waiting_thread );
 
@@ -207,5 +207,5 @@
 
 		// Some one was waiting for us, enter
-		/* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+		/* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
 	}
 	else {
@@ -224,5 +224,5 @@
 		park();
 
-		/* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+		/* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
 		return;
 	}
@@ -237,7 +237,7 @@
 	lock( this->lock __cfaabi_dbg_ctx2 );
 
-	__cfaabi_dbg_print_safe( "Kernel : %10p Leaving mon %p (%p)\n", kernelTLS.this_thread, this, this->owner);
-
-	/* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+	__cfaabi_dbg_print_safe( "Kernel : %10p Leaving mon %p (%p)\n", active_thread(), this, this->owner);
+
+	/* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
 
 	// Leaving a recursion level, decrement the counter
@@ -270,6 +270,6 @@
 void __dtor_leave( $monitor * this, bool join ) {
 	__cfaabi_dbg_debug_do(
-		if( TL_GET( this_thread ) != this->owner ) {
-			abort( "Destroyed monitor %p has inconsistent owner, expected %p got %p.\n", this, TL_GET( this_thread ), this->owner);
+		if( active_thread() != this->owner ) {
+			abort( "Destroyed monitor %p has inconsistent owner, expected %p got %p.\n", this, active_thread(), this->owner);
 		}
 		if( this->recursion != 1  && !join ) {
@@ -287,5 +287,5 @@
 	/* paranoid */ verify( this->lock.lock );
 	/* paranoid */ verifyf( thrd == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", thrd, this->owner, this->recursion, this );
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 	/* paranoid */ verify( thrd->state == Halted );
 	/* paranoid */ verify( this->recursion == 1 );
@@ -303,5 +303,5 @@
 	// Unpark the next owner if needed
 	/* paranoid */ verifyf( !new_owner || new_owner == this->owner, "Expected owner to be %p, got %p (m: %p)", new_owner, this->owner, this );
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 	/* paranoid */ verify( thrd->state == Halted );
 	unpark( new_owner );
@@ -327,5 +327,5 @@
 // Sorts monitors before entering
 void ?{}( monitor_guard_t & this, $monitor * m [], __lock_size_t count, fptr_t func ) {
-	$thread * thrd = TL_GET( this_thread );
+	$thread * thrd = active_thread();
 
 	// Store current array
@@ -362,5 +362,5 @@
 
 	// Restore thread context
-	TL_GET( this_thread )->monitors = this.prev;
+	active_thread()->monitors = this.prev;
 }
 
@@ -369,5 +369,5 @@
 void ?{}( monitor_dtor_guard_t & this, $monitor * m [], fptr_t func, bool join ) {
 	// optimization
-	$thread * thrd = TL_GET( this_thread );
+	$thread * thrd = active_thread();
 
 	// Store current array
@@ -392,5 +392,5 @@
 
 	// Restore thread context
-	TL_GET( this_thread )->monitors = this.prev;
+	active_thread()->monitors = this.prev;
 }
 
@@ -432,5 +432,5 @@
 
 	// Create the node specific to this wait operation
-	wait_ctx( TL_GET( this_thread ), user_info );
+	wait_ctx( active_thread(), user_info );
 
 	// Append the current wait operation to the ones already queued on the condition
@@ -483,5 +483,5 @@
 	//Some more checking in debug
 	__cfaabi_dbg_debug_do(
-		$thread * this_thrd = TL_GET( this_thread );
+		$thread * this_thrd = active_thread();
 		if ( this.monitor_count != this_thrd->monitors.size ) {
 			abort( "Signal on condition %p made with different number of monitor(s), expected %zi got %zi", &this, this.monitor_count, this_thrd->monitors.size );
@@ -531,5 +531,5 @@
 
 	// Create the node specific to this wait operation
-	wait_ctx_primed( kernelTLS.this_thread, 0 )
+	wait_ctx_primed( active_thread(), 0 )
 
 	//save contexts
@@ -630,5 +630,5 @@
 
 				// Create the node specific to this wait operation
-				wait_ctx_primed( kernelTLS.this_thread, 0 );
+				wait_ctx_primed( active_thread(), 0 );
 
 				// Save monitor states
@@ -682,5 +682,5 @@
 
 	// Create the node specific to this wait operation
-	wait_ctx_primed( kernelTLS.this_thread, 0 );
+	wait_ctx_primed( active_thread(), 0 );
 
 	monitor_save;
@@ -688,5 +688,5 @@
 
 	for( __lock_size_t i = 0; i < count; i++) {
-		verify( monitors[i]->owner == kernelTLS.this_thread );
+		verify( monitors[i]->owner == active_thread() );
 	}
 
@@ -724,10 +724,10 @@
 static inline void __set_owner( $monitor * monitors [], __lock_size_t count, $thread * owner ) {
 	/* paranoid */ verify ( monitors[0]->lock.lock );
-	/* paranoid */ verifyf( monitors[0]->owner == kernelTLS.this_thread, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, monitors[0]->owner, monitors[0]->recursion, monitors[0] );
+	/* paranoid */ verifyf( monitors[0]->owner == active_thread(), "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), monitors[0]->owner, monitors[0]->recursion, monitors[0] );
 	monitors[0]->owner        = owner;
 	monitors[0]->recursion    = 1;
 	for( __lock_size_t i = 1; i < count; i++ ) {
 		/* paranoid */ verify ( monitors[i]->lock.lock );
-		/* paranoid */ verifyf( monitors[i]->owner == kernelTLS.this_thread, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, monitors[i]->owner, monitors[i]->recursion, monitors[i] );
+		/* paranoid */ verifyf( monitors[i]->owner == active_thread(), "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), monitors[i]->owner, monitors[i]->recursion, monitors[i] );
 		monitors[i]->owner        = owner;
 		monitors[i]->recursion    = 0;
@@ -755,5 +755,5 @@
 		//regardless of if we are ready to baton pass,
 		//we need to set the monitor as in use
-		/* paranoid */ verifyf( !this->owner || kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+		/* paranoid */ verifyf( !this->owner || active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
 		__set_owner( this,  urgent->owner->waiting_thread );
 
@@ -764,5 +764,5 @@
 	// Get the next thread in the entry_queue
 	$thread * new_owner = pop_head( this->entry_queue );
-	/* paranoid */ verifyf( !this->owner || kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+	/* paranoid */ verifyf( !this->owner || active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
 	/* paranoid */ verify( !new_owner || new_owner->link.next == 0p );
 	__set_owner( this, new_owner );
@@ -892,5 +892,5 @@
 
 static inline void brand_condition( condition & this ) {
-	$thread * thrd = TL_GET( this_thread );
+	$thread * thrd = active_thread();
 	if( !this.monitors ) {
 		// __cfaabi_dbg_print_safe( "Branding\n" );
Index: libcfa/src/concurrency/preemption.cfa
===================================================================
--- libcfa/src/concurrency/preemption.cfa	(revision be5e34b191df89bc3ee8bb1fb45fe9e4d11148d8)
+++ libcfa/src/concurrency/preemption.cfa	(revision 8fc652e01c609dd8c3f392cd9de90efc31e0d6b2)
@@ -164,4 +164,72 @@
 //=============================================================================================
 
+//----------
+// special case for preemption since used often
+bool __preemption_enabled() {
+	// create a assembler label before
+	// marked as clobber all to avoid movement
+	asm volatile("__cfaasm_check_before:":::"memory");
+
+	// access tls as normal
+	bool enabled = __cfaabi_tls.preemption_state.enabled;
+
+	// create a assembler label after
+	// marked as clobber all to avoid movement
+	asm volatile("__cfaasm_check_after:":::"memory");
+	return enabled;
+}
+
+//----------
+// Get data from the TLS block
+uintptr_t __cfatls_get( unsigned long int offset ) __attribute__((__noinline__)); //no inline to avoid problems
+uintptr_t __cfatls_get( unsigned long int offset ) {
+	// create a assembler label before
+	// marked as clobber all to avoid movement
+	asm volatile("__cfaasm_get_before:":::"memory");
+
+	// access tls as normal (except for pointer arithmetic)
+	uintptr_t val = *(uintptr_t*)((uintptr_t)&__cfaabi_tls + offset);
+
+	// create a assembler label after
+	// marked as clobber all to avoid movement
+	asm volatile("__cfaasm_get_after:":::"memory");
+	return val;
+}
+
+// //----------
+// // Write data to the TLS block
+// // sadly it looses the type information and can only write 1 word at a time
+// // use with __builtin_offsetof
+// void __cfatls_set(uintptr_t offset, void * value) __attribute__((__noinline__));
+// void __cfatls_set(uintptr_t offset, void * value) {
+//     // create a assembler label before
+//     // marked as clobber all to avoid movement
+//     asm volatile("__cfaasm_set_before:":::"memory");
+
+//     // access tls as normal (except for type information)
+//     *(void**)(offset + (uintptr_t)&my_tls) = value;
+
+//     // create a assembler label after
+//     // marked as clobber all to avoid movement
+//     asm volatile("__cfaasm_set_after:":::"memory");
+// }
+
+// //----------
+// #include <stdio.h>
+// int main() {
+//     // Get the information
+//     // Must use inline assembly to get access to label
+//     // C is annoying here because this could easily be a static const but "initializer element is not a compile-time constant"
+//     // The big advantage of this approach is that there is 0 overhead for the read and writes function
+//     void * __cfaasm_addr_get_before = ({ void * value; asm("movq $__cfaasm_get_before, %[v]\n\t" : [v]"=r"(value) ); value; });
+//     void * __cfaasm_addr_get_after  = ({ void * value; asm("movq $__cfaasm_get_after , %[v]\n\t" : [v]"=r"(value) ); value; });
+//     void * __cfaasm_addr_set_before = ({ void * value; asm("movq $__cfaasm_set_before, %[v]\n\t" : [v]"=r"(value) ); value; });
+//     void * __cfaasm_addr_set_after  = ({ void * value; asm("movq $__cfaasm_set_after , %[v]\n\t" : [v]"=r"(value) ); value; });
+
+//     printf("%p to %p\n", __cfaasm_addr_get_before, __cfaasm_addr_get_after);
+//     printf("%p to %p\n", __cfaasm_addr_set_before, __cfaasm_addr_set_after);
+//     return 0;
+// }
+
 __cfaabi_dbg_debug_do( static thread_local void * last_interrupt = 0; )
 
@@ -169,5 +237,9 @@
 	// Disable interrupts by incrementing the counter
 	void disable_interrupts() {
-		with( kernelTLS.preemption_state ) {
+		// create a assembler label before
+		// marked as clobber all to avoid movement
+		asm volatile("__cfaasm_disable_before:":::"memory");
+
+		with( __cfaabi_tls.preemption_state ) {
 			#if GCC_VERSION > 50000
 			static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free");
@@ -186,4 +258,8 @@
 			verify( new_val < 65_000u );              // If this triggers someone is disabling interrupts without enabling them
 		}
+
+		// create a assembler label after
+		// marked as clobber all to avoid movement
+		asm volatile("__cfaasm_disable_after:":::"memory");
 	}
 
@@ -191,8 +267,12 @@
 	// If counter reaches 0, execute any pending __cfactx_switch
 	void enable_interrupts( __cfaabi_dbg_ctx_param ) {
-		processor   * proc = kernelTLS.this_processor; // Cache the processor now since interrupts can start happening after the atomic store
+		// create a assembler label before
+		// marked as clobber all to avoid movement
+		asm volatile("__cfaasm_enable_before:":::"memory");
+
+		processor   * proc = __cfaabi_tls.this_processor; // Cache the processor now since interrupts can start happening after the atomic store
 		/* paranoid */ verify( proc );
 
-		with( kernelTLS.preemption_state ){
+		with( __cfaabi_tls.preemption_state ){
 			unsigned short prev = disable_count;
 			disable_count -= 1;
@@ -221,4 +301,8 @@
 		// For debugging purposes : keep track of the last person to enable the interrupts
 		__cfaabi_dbg_debug_do( proc->last_enable = caller; )
+
+		// create a assembler label after
+		// marked as clobber all to avoid movement
+		asm volatile("__cfaasm_enable_after:":::"memory");
 	}
 
@@ -226,19 +310,27 @@
 	// Don't execute any pending __cfactx_switch even if counter reaches 0
 	void enable_interrupts_noPoll() {
-		unsigned short prev = kernelTLS.preemption_state.disable_count;
-		kernelTLS.preemption_state.disable_count -= 1;
+		// create a assembler label before
+		// marked as clobber all to avoid movement
+		asm volatile("__cfaasm_nopoll_before:":::"memory");
+
+		unsigned short prev = __cfaabi_tls.preemption_state.disable_count;
+		__cfaabi_tls.preemption_state.disable_count -= 1;
 		verifyf( prev != 0u, "Incremented from %u\n", prev );                     // If this triggers someone is enabled already enabled interrupts
 		if( prev == 1 ) {
 			#if GCC_VERSION > 50000
-			static_assert(__atomic_always_lock_free(sizeof(kernelTLS.preemption_state.enabled), &kernelTLS.preemption_state.enabled), "Must be lock-free");
+			static_assert(__atomic_always_lock_free(sizeof(__cfaabi_tls.preemption_state.enabled), &__cfaabi_tls.preemption_state.enabled), "Must be lock-free");
 			#endif
 			// Set enabled flag to true
 			// should be atomic to avoid preemption in the middle of the operation.
 			// use memory order RELAXED since there is no inter-thread on this variable requirements
-			__atomic_store_n(&kernelTLS.preemption_state.enabled, true, __ATOMIC_RELAXED);
+			__atomic_store_n(&__cfaabi_tls.preemption_state.enabled, true, __ATOMIC_RELAXED);
 
 			// Signal the compiler that a fence is needed but only for signal handlers
 			__atomic_signal_fence(__ATOMIC_RELEASE);
 		}
+
+		// create a assembler label after
+		// marked as clobber all to avoid movement
+		asm volatile("__cfaasm_nopoll_after:":::"memory");
 	}
 }
@@ -275,5 +367,5 @@
 static void timeout( $thread * this ) {
 	#if !defined( __CFA_NO_STATISTICS__ )
-		kernelTLS.this_stats = this->curr_cluster->stats;
+		kernelTLS().this_stats = this->curr_cluster->stats;
 	#endif
 	unpark( this );
@@ -286,8 +378,8 @@
 static inline bool preemption_ready() {
 	// Check if preemption is safe
-	bool ready = kernelTLS.preemption_state.enabled && ! kernelTLS.preemption_state.in_progress;
+	bool ready = __cfaabi_tls.preemption_state.enabled && ! __cfaabi_tls.preemption_state.in_progress;
 
 	// Adjust the pending flag accordingly
-	kernelTLS.this_processor->pending_preemption = !ready;
+	__cfaabi_tls.this_processor->pending_preemption = !ready;
 	return ready;
 }
@@ -303,6 +395,6 @@
 
 	// Start with preemption disabled until ready
-	kernelTLS.preemption_state.enabled = false;
-	kernelTLS.preemption_state.disable_count = 1;
+	__cfaabi_tls.preemption_state.enabled = false;
+	__cfaabi_tls.preemption_state.disable_count = 1;
 
 	// Initialize the event kernel
@@ -362,9 +454,49 @@
 // Kernel Signal Handlers
 //=============================================================================================
+struct asm_region {
+	void * before;
+	void * after;
+};
+
+//-----------------------------------------------------------------------------
+// Some assembly required
+#if defined( __i386 )
+	#define __cfaasm_label( label ) \
+		({ \
+			struct asm_region region; \
+			asm( \
+				"movl $__cfaasm_" #label "_before, %[vb]\n\t" \
+				"movl $__cfaasm_" #label "_after , %[va]\n\t" \
+				 : [vb]"=r"(region.before), [vb]"=r"(region.before) \
+			); \
+			region; \
+		});
+#elif defined( __x86_64 )
+	#ifdef __PIC__
+		#define PLT "@PLT"
+	#else
+		#define PLT ""
+	#endif
+	#define __cfaasm_label( label ) \
+		({ \
+			struct asm_region region; \
+			asm( \
+				"movq $__cfaasm_" #label "_before" PLT ", %[vb]\n\t" \
+				"movq $__cfaasm_" #label "_after"  PLT ", %[va]\n\t" \
+				 : [vb]"=r"(region.before), [va]"=r"(region.after) \
+			); \
+			region; \
+		});
+#elif defined( __aarch64__ )
+	#error __cfaasm_label undefined for arm
+#else
+	#error unknown hardware architecture
+#endif
 
 // Context switch signal handler
 // Receives SIGUSR1 signal and causes the current thread to yield
 static void sigHandler_ctxSwitch( __CFA_SIGPARMS__ ) {
-	__cfaabi_dbg_debug_do( last_interrupt = (void *)(cxt->uc_mcontext.CFA_REG_IP); )
+	void * ip = (void *)(cxt->uc_mcontext.CFA_REG_IP);
+	__cfaabi_dbg_debug_do( last_interrupt = ip; )
 
 	// SKULLDUGGERY: if a thread creates a processor and the immediately deletes it,
@@ -372,9 +504,9 @@
 	// before the kernel thread has even started running. When that happens, an interrupt
 	// with a null 'this_processor' will be caught, just ignore it.
-	if(! kernelTLS.this_processor ) return;
+	if(! __cfaabi_tls.this_processor ) return;
 
 	choose(sfp->si_value.sival_int) {
 		case PREEMPT_NORMAL   : ;// Normal case, nothing to do here
-		case PREEMPT_TERMINATE: verify( __atomic_load_n( &kernelTLS.this_processor->do_terminate, __ATOMIC_SEQ_CST ) );
+		case PREEMPT_TERMINATE: verify( __atomic_load_n( &__cfaabi_tls.this_processor->do_terminate, __ATOMIC_SEQ_CST ) );
 		default:
 			abort( "internal error, signal value is %d", sfp->si_value.sival_int );
@@ -384,8 +516,15 @@
 	if( !preemption_ready() ) { return; }
 
-	__cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", kernelTLS.this_processor, kernelTLS.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) );
+	struct asm_region region;
+	region = __cfaasm_label( get     ); if( ip >= region.before && ip <= region.after ) return;
+	region = __cfaasm_label( check   ); if( ip >= region.before && ip <= region.after ) return;
+	region = __cfaasm_label( disable ); if( ip >= region.before && ip <= region.after ) return;
+	region = __cfaasm_label( enable  ); if( ip >= region.before && ip <= region.after ) return;
+	region = __cfaasm_label( nopoll  ); if( ip >= region.before && ip <= region.after ) return;
+
+	__cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", __cfaabi_tls.this_processor, __cfaabi_tls.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) );
 
 	// Sync flag : prevent recursive calls to the signal handler
-	kernelTLS.preemption_state.in_progress = true;
+	__cfaabi_tls.preemption_state.in_progress = true;
 
 	// Clear sighandler mask before context switching.
@@ -397,7 +536,6 @@
 	}
 
-	// TODO: this should go in finish action
 	// Clear the in progress flag
-	kernelTLS.preemption_state.in_progress = false;
+	__cfaabi_tls.preemption_state.in_progress = false;
 
 	// Preemption can occur here
@@ -416,5 +554,5 @@
 	id.full_proc = false;
 	id.id = doregister(&id);
-	kernelTLS.this_proc_id = &id;
+	__cfaabi_tls.this_proc_id = &id;
 
 	// Block sigalrms to control when they arrive
@@ -484,5 +622,5 @@
 
 void __cfaabi_check_preemption() {
-	bool ready = kernelTLS.preemption_state.enabled;
+	bool ready = __preemption_enabled();
 	if(!ready) { abort("Preemption should be ready"); }
 
@@ -507,5 +645,5 @@
 #ifdef __CFA_WITH_VERIFY__
 bool __cfaabi_dbg_in_kernel() {
-	return !kernelTLS.preemption_state.enabled;
+	return !__preemption_enabled();
 }
 #endif
Index: libcfa/src/concurrency/ready_queue.cfa
===================================================================
--- libcfa/src/concurrency/ready_queue.cfa	(revision be5e34b191df89bc3ee8bb1fb45fe9e4d11148d8)
+++ libcfa/src/concurrency/ready_queue.cfa	(revision 8fc652e01c609dd8c3f392cd9de90efc31e0d6b2)
@@ -150,5 +150,5 @@
 //  queues or removing them.
 uint_fast32_t ready_mutate_lock( void ) with(*__scheduler_lock) {
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 
 	// Step 1 : lock global lock
@@ -166,10 +166,10 @@
 	}
 
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 	return s;
 }
 
 void ready_mutate_unlock( uint_fast32_t last_s ) with(*__scheduler_lock) {
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 
 	// Step 1 : release local locks
@@ -188,5 +188,5 @@
 	__atomic_store_n(&lock, (bool)false, __ATOMIC_RELEASE);
 
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 }
 
@@ -252,5 +252,5 @@
 		preferred =
 			//*
-			kernelTLS.this_processor ? kernelTLS.this_processor->id * 4 : -1;
+			kernelTLS().this_processor ? kernelTLS().this_processor->id * 4 : -1;
 			/*/
 			thrd->link.preferred * 4;
@@ -331,5 +331,5 @@
 		// Don't bother trying locally too much
 		int local_tries = 8;
-		preferred = kernelTLS.this_processor->id * 4;
+		preferred = kernelTLS().this_processor->id * 4;
 	#endif