Index: libcfa/src/concurrency/alarm.cfa
===================================================================
--- libcfa/src/concurrency/alarm.cfa	(revision 55acc3a4fa45a500fa88a39b850dd11a68275702)
+++ libcfa/src/concurrency/alarm.cfa	(revision 18f0b707a42dceef0ea6f927ff708cc6bd645627)
@@ -45,10 +45,10 @@
 //=============================================================================================
 
-void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period ) with( this ) {
+void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period) with( this ) {
 	this.thrd = thrd;
 	this.alarm = alarm;
 	this.period = period;
 	set = false;
-	kernel_alarm = false;
+	type = User;
 }
 
@@ -58,5 +58,13 @@
 	this.period = period;
 	set = false;
-	kernel_alarm = true;
+	type = Kernel;
+}
+void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period, Alarm_Callback callback ) with( this ) {
+	this.thrd = thrd;
+	this.alarm = alarm;
+	this.period = period;
+	this.callback = callback;
+	set = false;
+	type = Callback;
 }
 
Index: libcfa/src/concurrency/alarm.hfa
===================================================================
--- libcfa/src/concurrency/alarm.hfa	(revision 55acc3a4fa45a500fa88a39b850dd11a68275702)
+++ libcfa/src/concurrency/alarm.hfa	(revision 18f0b707a42dceef0ea6f927ff708cc6bd645627)
@@ -39,4 +39,10 @@
 //=============================================================================================
 
+enum alarm_type{ Kernel = 0, User = 1, Callback = 2 };
+
+struct alarm_node_t;
+
+typedef void (*Alarm_Callback)(alarm_node_t & );
+
 struct alarm_node_t {
 	Time alarm;				// time when alarm goes off
@@ -50,6 +56,8 @@
 	};
 
+	Alarm_Callback callback;
+
 	bool set		:1;		// whether or not the alarm has be registered
-	bool kernel_alarm	:1;		// true if this is not a user defined alarm
+	enum alarm_type type;		// true if this is not a user defined alarm
 };
 DLISTED_MGD_IMPL_OUT(alarm_node_t)
@@ -57,4 +65,5 @@
 void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period );
 void ?{}( alarm_node_t & this, processor   * proc, Time alarm, Duration period );
+void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period, Alarm_Callback callback );
 void ^?{}( alarm_node_t & this );
 
Index: libcfa/src/concurrency/coroutine.cfa
===================================================================
--- libcfa/src/concurrency/coroutine.cfa	(revision 55acc3a4fa45a500fa88a39b850dd11a68275702)
+++ libcfa/src/concurrency/coroutine.cfa	(revision 18f0b707a42dceef0ea6f927ff708cc6bd645627)
@@ -134,5 +134,5 @@
 void ^?{}($coroutine& this) {
 	if(this.state != Halted && this.state != Start && this.state != Primed) {
-		$coroutine * src = TL_GET( this_thread )->curr_cor;
+		$coroutine * src = active_coroutine();
 		$coroutine * dst = &this;
 
@@ -240,5 +240,5 @@
 
 	struct $coroutine * __cfactx_cor_finish(void) {
-		struct $coroutine * cor = kernelTLS.this_thread->curr_cor;
+		struct $coroutine * cor = active_coroutine();
 
 		if(cor->state == Primed) {
Index: libcfa/src/concurrency/coroutine.hfa
===================================================================
--- libcfa/src/concurrency/coroutine.hfa	(revision 55acc3a4fa45a500fa88a39b850dd11a68275702)
+++ libcfa/src/concurrency/coroutine.hfa	(revision 18f0b707a42dceef0ea6f927ff708cc6bd645627)
@@ -63,5 +63,5 @@
 void prime(T & cor);
 
-static inline struct $coroutine * active_coroutine() { return TL_GET( this_thread )->curr_cor; }
+static inline struct $coroutine * active_coroutine() { return active_thread()->curr_cor; }
 
 //-----------------------------------------------------------------------------
@@ -87,5 +87,5 @@
 
 	// set new coroutine that task is executing
-	TL_GET( this_thread )->curr_cor = dst;
+	active_thread()->curr_cor = dst;
 
 	// context switch to specified coroutine
@@ -112,5 +112,5 @@
 		// will also migrate which means this value will
 		// stay in syn with the TLS
-		$coroutine * src = TL_GET( this_thread )->curr_cor;
+		$coroutine * src = active_coroutine();
 
 		assertf( src->last != 0,
@@ -138,12 +138,12 @@
 	// will also migrate which means this value will
 	// stay in syn with the TLS
-	$coroutine * src = TL_GET( this_thread )->curr_cor;
+	$coroutine * src = active_coroutine();
 	$coroutine * dst = get_coroutine(cor);
 
 	if( unlikely(dst->context.SP == 0p) ) {
-		TL_GET( this_thread )->curr_cor = dst;
+		active_thread()->curr_cor = dst;
 		__stack_prepare(&dst->stack, 65000);
 		__cfactx_start(main, dst, cor, __cfactx_invoke_coroutine);
-		TL_GET( this_thread )->curr_cor = src;
+		active_thread()->curr_cor = src;
 	}
 
@@ -175,5 +175,5 @@
 	// will also migrate which means this value will
 	// stay in syn with the TLS
-	$coroutine * src = TL_GET( this_thread )->curr_cor;
+	$coroutine * src = active_coroutine();
 
 	// not resuming self ?
Index: libcfa/src/concurrency/exception.cfa
===================================================================
--- libcfa/src/concurrency/exception.cfa	(revision 55acc3a4fa45a500fa88a39b850dd11a68275702)
+++ libcfa/src/concurrency/exception.cfa	(revision 18f0b707a42dceef0ea6f927ff708cc6bd645627)
@@ -72,5 +72,5 @@
 	void * stop_param;
 
-	struct $thread * this_thread = TL_GET( this_thread );
+	struct $thread * this_thread = active_thread();
 	if ( &this_thread->self_cor != this_thread->curr_cor ) {
 		struct $coroutine * cor = this_thread->curr_cor;
Index: libcfa/src/concurrency/io.cfa
===================================================================
--- libcfa/src/concurrency/io.cfa	(revision 55acc3a4fa45a500fa88a39b850dd11a68275702)
+++ libcfa/src/concurrency/io.cfa	(revision 18f0b707a42dceef0ea6f927ff708cc6bd645627)
@@ -76,5 +76,5 @@
 
 	static inline bool next( __leaderlock_t & this ) {
-		/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+		/* paranoid */ verify( ! __preemption_enabled() );
 		struct $thread * nextt;
 		for() {
@@ -168,5 +168,5 @@
 	// This is NOT thread-safe
 	static [int, bool] __drain_io( & struct __io_data ring ) {
-		/* paranoid */ verify( !kernelTLS.preemption_state.enabled );
+		/* paranoid */ verify( ! __preemption_enabled() );
 
 		unsigned to_submit = 0;
@@ -404,5 +404,5 @@
 					return;
 				}
-				/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+				/* paranoid */ verify( ! __preemption_enabled() );
 				__STATS__( true,
 					io.submit_q.leader += 1;
@@ -442,5 +442,5 @@
 
 			#if defined(LEADER_LOCK)
-				/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+				/* paranoid */ verify( ! __preemption_enabled() );
 				next(ring.submit_q.submit_lock);
 			#else
Index: libcfa/src/concurrency/io/setup.cfa
===================================================================
--- libcfa/src/concurrency/io/setup.cfa	(revision 55acc3a4fa45a500fa88a39b850dd11a68275702)
+++ libcfa/src/concurrency/io/setup.cfa	(revision 18f0b707a42dceef0ea6f927ff708cc6bd645627)
@@ -149,4 +149,5 @@
 		id.full_proc = false;
 		id.id = doregister(&id);
+		__cfaabi_tls.this_proc_id = &id;
 		__cfaabi_dbg_print_safe( "Kernel : IO poller thread starting\n" );
 
@@ -178,7 +179,7 @@
 				__cfadbg_print_safe(io_core, "Kernel I/O : Unparking io poller %p\n", io_ctx);
 				#if !defined( __CFA_NO_STATISTICS__ )
-					kernelTLS.this_stats = io_ctx->self.curr_cluster->stats;
+					__cfaabi_tls.this_stats = io_ctx->self.curr_cluster->stats;
 				#endif
-				__post( io_ctx->sem, &id );
+				post( io_ctx->sem );
 			}
 		}
@@ -235,5 +236,5 @@
 			if( thrd.state == Ready || thrd.preempted != __NO_PREEMPTION ) {
 
-				ready_schedule_lock( (struct __processor_id_t *)active_processor() );
+				ready_schedule_lock();
 
 					// This is the tricky case
@@ -253,5 +254,5 @@
 					thrd.preempted = __NO_PREEMPTION;
 
-				ready_schedule_unlock( (struct __processor_id_t *)active_processor() );
+				ready_schedule_unlock();
 
 				// Pretend like the thread was blocked all along
@@ -275,5 +276,5 @@
 			}
 		} else {
-			unpark( &thrd );
+			post( this.thrd.sem );
 		}
 
Index: libcfa/src/concurrency/iocall.cfa
===================================================================
--- libcfa/src/concurrency/iocall.cfa	(revision 55acc3a4fa45a500fa88a39b850dd11a68275702)
+++ 	(revision )
@@ -1,621 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2020 University of Waterloo
-//
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// iocall.cfa --
-//
-// Author           : Thierry Delisle
-// Created On       : Wed Jul  1 14:51:00 2020
-// Last Modified By :
-// Last Modified On :
-// Update Count     :
-//
-
-#define __cforall_thread__
-
-#include "bits/defs.hfa"
-#include "kernel.hfa"
-
-//=============================================================================================
-// I/O uring backend
-//=============================================================================================
-
-#if defined(CFA_HAVE_LINUX_IO_URING_H)
-	#include <assert.h>
-	#include <stdint.h>
-	#include <errno.h>
-	#include <linux/io_uring.h>
-
-	#include "kernel/fwd.hfa"
-	#include "io/types.hfa"
-
-	extern [* struct io_uring_sqe, __u32] __submit_alloc( struct __io_data & ring, __u64 data );
-	extern void __submit( struct io_context * ctx, __u32 idx ) __attribute__((nonnull (1)));
-
-	static inline void ?{}(struct io_uring_sqe & this, __u8 opcode, int fd) {
-		this.opcode = opcode;
-		#if !defined(IOSQE_ASYNC)
-			this.flags = 0;
-		#else
-			this.flags = IOSQE_ASYNC;
-		#endif
-		this.ioprio = 0;
-		this.fd = fd;
-		this.off = 0;
-		this.addr = 0;
-		this.len = 0;
-		this.rw_flags = 0;
-		this.__pad2[0] = this.__pad2[1] = this.__pad2[2] = 0;
-	}
-
-	static inline void ?{}(struct io_uring_sqe & this, __u8 opcode, int fd, void * addr, __u32 len, __u64 off ) {
-		(this){ opcode, fd };
-		this.off = off;
-		this.addr = (__u64)(uintptr_t)addr;
-		this.len = len;
-	}
-
-	static inline io_context * __get_io_context( void ) {
-		cluster * cltr = active_cluster();
-		/* paranoid */ verifyf( cltr, "No active cluster for io operation\n");
-		assertf( cltr->io.cnt > 0, "Cluster %p has no default io contexts and no context was specified\n", cltr );
-		/* paranoid */ verifyf( cltr->io.ctxs, "default io contexts for cluster %p are missing\n", cltr);
-		return &cltr->io.ctxs[ __tls_rand() % cltr->io.cnt ];
-	}
-
-
-	#if defined(CFA_HAVE_IOSQE_FIXED_FILE) && defined(CFA_HAVE_IOSQE_IO_DRAIN) && defined(CFA_HAVE_IOSQE_ASYNC)
-		#define REGULAR_FLAGS (IOSQE_FIXED_FILE | IOSQE_IO_DRAIN | IOSQE_ASYNC)
-	#elif defined(CFA_HAVE_IOSQE_FIXED_FILE) && defined(CFA_HAVE_IOSQE_ASYNC)
-		#define REGULAR_FLAGS (IOSQE_FIXED_FILE | IOSQE_ASYNC)
-	#elif defined(CFA_HAVE_IOSQE_FIXED_FILE) && defined(CFA_HAVE_IOSQE_IO_DRAIN)
-		#define REGULAR_FLAGS (IOSQE_FIXED_FILE | IOSQE_IO_DRAIN)
-	#elif defined(CFA_HAVE_IOSQE_IO_DRAIN) && defined(CFA_HAVE_IOSQE_ASYNC)
-		#define REGULAR_FLAGS (IOSQE_IO_DRAIN | IOSQE_ASYNC)
-	#elif defined(CFA_HAVE_IOSQE_FIXED_FILE)
-		#define REGULAR_FLAGS (IOSQE_FIXED_FILE)
-	#elif defined(CFA_HAVE_IOSQE_IO_DRAIN)
-		#define REGULAR_FLAGS (IOSQE_IO_DRAIN)
-	#elif defined(CFA_HAVE_IOSQE_ASYNC)
-		#define REGULAR_FLAGS (IOSQE_ASYNC)
-	#else
-		#define REGULAR_FLAGS (0)
-	#endif
-
-	#if defined(CFA_HAVE_IOSQE_IO_LINK) && defined(CFA_HAVE_IOSQE_IO_HARDLINK)
-		#define LINK_FLAGS (IOSQE_IO_LINK | IOSQE_IO_HARDLINK)
-	#elif defined(CFA_HAVE_IOSQE_IO_LINK)
-		#define LINK_FLAGS (IOSQE_IO_LINK)
-	#elif defined(CFA_HAVE_IOSQE_IO_HARDLINK)
-		#define LINK_FLAGS (IOSQE_IO_HARDLINK)
-	#else
-		#define LINK_FLAGS (0)
-	#endif
-
-	#if defined(CFA_HAVE_SPLICE_F_FD_IN_FIXED)
-		#define SPLICE_FLAGS (SPLICE_F_FD_IN_FIXED)
-	#else
-		#define SPLICE_FLAGS (0)
-	#endif
-
-	#define __submit_prelude \
-		if( 0 != (submit_flags & LINK_FLAGS) ) { errno = ENOTSUP; return -1; } \
-		(void)timeout; (void)cancellation; \
-		if( !context ) context = __get_io_context(); \
-		__io_user_data_t data = { 0 }; \
-		struct __io_data & ring = *context->thrd.ring; \
-		struct io_uring_sqe * sqe; \
-		__u32 idx; \
-		__u8 sflags = REGULAR_FLAGS & submit_flags; \
-		[sqe, idx] = __submit_alloc( ring, (__u64)(uintptr_t)&data ); \
-		sqe->flags = sflags;
-
-	#define __submit_wait \
-		/*__cfaabi_bits_print_safe( STDERR_FILENO, "Preparing user data %p for %p\n", &data, data.thrd );*/ \
-		verify( sqe->user_data == (__u64)(uintptr_t)&data ); \
-		__submit( context, idx ); \
-		wait( data.sem ); \
-		if( data.result < 0 ) { \
-			errno = -data.result; \
-			return -1; \
-		} \
-		return data.result;
-#endif
-
-//=============================================================================================
-// I/O Forwards
-//=============================================================================================
-#include <time.hfa>
-
-// Some forward declarations
-#include <errno.h>
-#include <unistd.h>
-
-extern "C" {
-	#include <sys/types.h>
-	#include <sys/socket.h>
-	#include <sys/syscall.h>
-
-#if defined(HAVE_PREADV2)
-	struct iovec;
-	extern ssize_t preadv2 (int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags);
-#endif
-#if defined(HAVE_PWRITEV2)
-	struct iovec;
-	extern ssize_t pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags);
-#endif
-
-	extern int fsync(int fd);
-
-	#if __OFF_T_MATCHES_OFF64_T
-		typedef __off64_t off_t;
-	#else
-		typedef __off_t off_t;
-	#endif
-	typedef __off64_t off64_t;
-	extern int sync_file_range(int fd, off64_t offset, off64_t nbytes, unsigned int flags);
-
-	struct msghdr;
-	struct sockaddr;
-	extern ssize_t sendmsg(int sockfd, const struct msghdr *msg, int flags);
-	extern ssize_t recvmsg(int sockfd, struct msghdr *msg, int flags);
-	extern ssize_t send(int sockfd, const void *buf, size_t len, int flags);
-	extern ssize_t recv(int sockfd, void *buf, size_t len, int flags);
-	extern int accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags);
-	extern int connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen);
-
-	extern int fallocate(int fd, int mode, off_t offset, off_t len);
-	extern int posix_fadvise(int fd, off_t offset, off_t len, int advice);
-	extern int madvise(void *addr, size_t length, int advice);
-
-	extern int openat(int dirfd, const char *pathname, int flags, mode_t mode);
-	extern int close(int fd);
-
-	extern ssize_t read (int fd, void *buf, size_t count);
-
-	extern ssize_t splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags);
-	extern ssize_t tee(int fd_in, int fd_out, size_t len, unsigned int flags);
-}
-
-//=============================================================================================
-// I/O Interface
-//=============================================================================================
-
-//-----------------------------------------------------------------------------
-// Asynchronous operations
-#if defined(HAVE_PREADV2)
-	ssize_t cfa_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
-		#if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_READV)
-			return preadv2(fd, iov, iovcnt, offset, flags);
-		#else
-			__submit_prelude
-
-			sqe->opcode = IORING_OP_READV;
-			sqe->ioprio = 0;
-			sqe->fd = fd;
-			sqe->off = offset;
-			sqe->addr = (__u64)iov;
-			sqe->len = iovcnt;
-			sqe->rw_flags = 0;
-			sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0;
-
-			__submit_wait
-		#endif
-	}
-#endif
-
-#if defined(HAVE_PWRITEV2)
-	ssize_t cfa_pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
-		#if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_WRITEV)
-			return pwritev2(fd, iov, iovcnt, offset, flags);
-		#else
-			__submit_prelude
-
-			sqe->opcode = IORING_OP_WRITEV;
-			sqe->ioprio = 0;
-			sqe->fd = fd;
-			sqe->off = offset;
-			sqe->addr = (__u64)iov;
-			sqe->len = iovcnt;
-			sqe->rw_flags = 0;
-			sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0;
-
-			__submit_wait
-		#endif
-	}
-#endif
-
-int cfa_fsync(int fd, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
-	#if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_FSYNC)
-		return fsync(fd);
-	#else
-		__submit_prelude
-
-		sqe->opcode = IORING_OP_FSYNC;
-		sqe->ioprio = 0;
-		sqe->fd = fd;
-		sqe->off = 0;
-		sqe->addr = 0;
-		sqe->len = 0;
-		sqe->rw_flags = 0;
-		sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0;
-
-		__submit_wait
-	#endif
-}
-
-int cfa_sync_file_range(int fd, off64_t offset, off64_t nbytes, unsigned int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
-	#if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_SYNC_FILE_RANGE)
-		return sync_file_range(fd, offset, nbytes, flags);
-	#else
-		__submit_prelude
-
-		(*sqe){ IORING_OP_SYNC_FILE_RANGE, fd };
-		sqe->off = offset;
-		sqe->len = nbytes;
-		sqe->sync_range_flags = flags;
-
-		__submit_wait
-	#endif
-}
-
-
-ssize_t cfa_sendmsg(int sockfd, const struct msghdr *msg, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
-	#if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_SENDMSG)
-		return sendmsg(sockfd, msg, flags);
-	#else
-		__submit_prelude
-
-		(*sqe){ IORING_OP_SENDMSG, sockfd, msg, 1, 0 };
-		sqe->msg_flags = flags;
-
-		__submit_wait
-	#endif
-}
-
-ssize_t cfa_recvmsg(int sockfd, struct msghdr *msg, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
-	#if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_RECVMSG)
-		return recvmsg(sockfd, msg, flags);
-	#else
-		__submit_prelude
-
-		(*sqe){ IORING_OP_RECVMSG, sockfd, msg, 1, 0 };
-		sqe->msg_flags = flags;
-
-		__submit_wait
-	#endif
-}
-
-ssize_t cfa_send(int sockfd, const void *buf, size_t len, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
-	#if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_SEND)
-		return send( sockfd, buf, len, flags );
-	#else
-		__submit_prelude
-
-		(*sqe){ IORING_OP_SEND, sockfd };
-		sqe->addr = (__u64)buf;
-		sqe->len = len;
-		sqe->msg_flags = flags;
-
-		__submit_wait
-	#endif
-}
-
-ssize_t cfa_recv(int sockfd, void *buf, size_t len, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
-	#if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_RECV)
-		return recv( sockfd, buf, len, flags );
-	#else
-		__submit_prelude
-
-		(*sqe){ IORING_OP_RECV, sockfd };
-		sqe->addr = (__u64)buf;
-		sqe->len = len;
-		sqe->msg_flags = flags;
-
-		__submit_wait
-	#endif
-}
-
-int cfa_accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
-	#if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_ACCEPT)
-		return accept4( sockfd, addr, addrlen, flags );
-	#else
-		__submit_prelude
-
-		(*sqe){ IORING_OP_ACCEPT, sockfd };
-		sqe->addr  = (__u64)addr;
-		sqe->addr2 = (__u64)addrlen;
-		sqe->accept_flags = flags;
-
-		__submit_wait
-	#endif
-}
-
-int cfa_connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
-	#if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_CONNECT)
-		return connect( sockfd, addr, addrlen );
-	#else
-		__submit_prelude
-
-		(*sqe){ IORING_OP_CONNECT, sockfd };
-		sqe->addr = (__u64)addr;
-		sqe->off  = (__u64)addrlen;
-
-		__submit_wait
-	#endif
-}
-
-int cfa_fallocate(int fd, int mode, off_t offset, off_t len, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
-	#if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_FALLOCATE)
-		return fallocate( fd, mode, offset, len );
-	#else
-		__submit_prelude
-
-		#warning FALLOCATE documentation for linux 5.7 is incorrect, and does not handle mode
-
-		(*sqe){ IORING_OP_FALLOCATE, fd };
-		sqe->off = offset;
-		sqe->len = mode;
-		sqe->addr = len;
-
-		__submit_wait
-	#endif
-}
-
-int cfa_fadvise(int fd, off_t offset, off_t len, int advice, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
-	#if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_FADVISE)
-		return posix_fadvise( fd, offset, len, advice );
-	#else
-		__submit_prelude
-
-		(*sqe){ IORING_OP_FADVISE, fd };
-		sqe->off = (__u64)offset;
-		sqe->len = len;
-		sqe->fadvise_advice = advice;
-
-		__submit_wait
-	#endif
-}
-
-int cfa_madvise(void *addr, size_t length, int advice, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
-	#if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_MADVISE)
-		return madvise( addr, length, advice );
-	#else
-		__submit_prelude
-
-		(*sqe){ IORING_OP_MADVISE, 0 };
-		sqe->addr = (__u64)addr;
-		sqe->len = length;
-		sqe->fadvise_advice = advice;
-
-		__submit_wait
-	#endif
-}
-
-int cfa_openat(int dirfd, const char *pathname, int flags, mode_t mode, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
-	#if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_OPENAT)
-		return openat( dirfd, pathname, flags, mode );
-	#else
-		__submit_prelude
-
-		(*sqe){ IORING_OP_OPENAT, dirfd };
-		sqe->addr = (__u64)pathname;
-		sqe->open_flags = flags;
-		sqe->len = mode;
-
-		__submit_wait
-	#endif
-}
-
-int cfa_close(int fd, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
-	#if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_CLOSE)
-		return close( fd );
-	#else
-		__submit_prelude
-
-		(*sqe){ IORING_OP_CLOSE, fd };
-
-		__submit_wait
-	#endif
-}
-
-// Forward declare in case it is not supported
-struct statx;
-int cfa_statx(int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
-	#if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_STATX)
-		#if defined(__NR_statx)
-			return syscall( __NR_statx, dirfd, pathname, flags, mask, statxbuf );
-		#else
-			errno = ENOTSUP;
-			return -1;
-		#endif
-	#else
-		__submit_prelude
-
-		(*sqe){ IORING_OP_STATX, dirfd, pathname, mask, (__u64)statxbuf };
-		sqe->statx_flags = flags;
-
-		__submit_wait
-	#endif
-}
-
-ssize_t cfa_read(int fd, void *buf, size_t count, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
-	#if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_READ)
-		return read( fd, buf, count );
-	#else
-		__submit_prelude
-
-		(*sqe){ IORING_OP_READ, fd, buf, count, 0 };
-
-		__submit_wait
-	#endif
-}
-
-ssize_t cfa_write(int fd, void *buf, size_t count, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
-	#if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_WRITE)
-		return read( fd, buf, count );
-	#else
-		__submit_prelude
-
-		(*sqe){ IORING_OP_WRITE, fd, buf, count, 0 };
-
-		__submit_wait
-	#endif
-}
-
-ssize_t cfa_splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
-	#if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_SPLICE)
-		return splice( fd_in, off_in, fd_out, off_out, len, flags );
-	#else
-		__submit_prelude
-
-		(*sqe){ IORING_OP_SPLICE, fd_out };
-		if( off_out ) {
-			sqe->off = *off_out;
-		}
-		else {
-			sqe->off = (__u64)-1;
-		}
-		sqe->len = len;
-		sqe->splice_fd_in  = fd_in;
-		if( off_in ) {
-			sqe->splice_off_in = *off_in;
-		}
-		else {
-			sqe->splice_off_in = (__u64)-1;
-		}
-		sqe->splice_flags  = flags | (SPLICE_FLAGS & submit_flags);
-
-		__submit_wait
-	#endif
-}
-
-ssize_t cfa_tee(int fd_in, int fd_out, size_t len, unsigned int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
-	#if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_TEE)
-		return tee( fd_in, fd_out, len, flags );
-	#else
-		__submit_prelude
-
-		(*sqe){ IORING_OP_TEE, fd_out, 0p, len, 0 };
-		sqe->splice_fd_in = fd_in;
-		sqe->splice_flags  = flags | (SPLICE_FLAGS & submit_flags);
-
-		__submit_wait
-	#endif
-}
-
-//-----------------------------------------------------------------------------
-// Check if a function is asynchronous
-
-// Macro magic to reduce the size of the following switch case
-#define IS_DEFINED_APPLY(f, ...) f(__VA_ARGS__)
-#define IS_DEFINED_SECOND(first, second, ...) second
-#define IS_DEFINED_TEST(expansion) _CFA_IO_FEATURE_##expansion
-#define IS_DEFINED(macro) IS_DEFINED_APPLY( IS_DEFINED_SECOND,IS_DEFINED_TEST(macro) false, true)
-
-bool has_user_level_blocking( fptr_t func ) {
-	#if defined(CFA_HAVE_LINUX_IO_URING_H)
-		#if defined(HAVE_PREADV2)
-			if( /*func == (fptr_t)preadv2 || */
-				func == (fptr_t)cfa_preadv2 )
-				#define _CFA_IO_FEATURE_CFA_HAVE_IORING_OP_READV ,
-				return IS_DEFINED(CFA_HAVE_IORING_OP_READV);
-		#endif
-
-		#if defined(HAVE_PWRITEV2)
-			if( /*func == (fptr_t)pwritev2 || */
-				func == (fptr_t)cfa_pwritev2 )
-				#define _CFA_IO_FEATURE_CFA_HAVE_IORING_OP_WRITEV ,
-				return IS_DEFINED(CFA_HAVE_IORING_OP_WRITEV);
-		#endif
-
-		if( /*func == (fptr_t)fsync || */
-			func == (fptr_t)cfa_fsync )
-			#define _CFA_IO_FEATURE_CFA_HAVE_IORING_OP_FSYNC ,
-			return IS_DEFINED(CFA_HAVE_IORING_OP_FSYNC);
-
-		if( /*func == (fptr_t)ync_file_range || */
-			func == (fptr_t)cfa_sync_file_range )
-			#define _CFA_IO_FEATURE_CFA_HAVE_IORING_OP_SYNC_FILE_RANGE ,
-			return IS_DEFINED(CFA_HAVE_IORING_OP_SYNC_FILE_RANGE);
-
-		if( /*func == (fptr_t)sendmsg || */
-			func == (fptr_t)cfa_sendmsg )
-			#define _CFA_IO_FEATURE_CFA_HAVE_IORING_OP_SENDMSG ,
-			return IS_DEFINED(CFA_HAVE_IORING_OP_SENDMSG);
-
-		if( /*func == (fptr_t)recvmsg || */
-			func == (fptr_t)cfa_recvmsg )
-			#define _CFA_IO_FEATURE_CFA_HAVE_IORING_OP_RECVMSG ,
-			return IS_DEFINED(CFA_HAVE_IORING_OP_RECVMSG);
-
-		if( /*func == (fptr_t)send || */
-			func == (fptr_t)cfa_send )
-			#define _CFA_IO_FEATURE_CFA_HAVE_IORING_OP_SEND ,
-			return IS_DEFINED(CFA_HAVE_IORING_OP_SEND);
-
-		if( /*func == (fptr_t)recv || */
-			func == (fptr_t)cfa_recv )
-			#define _CFA_IO_FEATURE_CFA_HAVE_IORING_OP_RECV ,
-			return IS_DEFINED(CFA_HAVE_IORING_OP_RECV);
-
-		if( /*func == (fptr_t)accept4 || */
-			func == (fptr_t)cfa_accept4 )
-			#define _CFA_IO_FEATURE_CFA_HAVE_IORING_OP_ACCEPT ,
-			return IS_DEFINED(CFA_HAVE_IORING_OP_ACCEPT);
-
-		if( /*func == (fptr_t)connect || */
-			func == (fptr_t)cfa_connect )
-			#define _CFA_IO_FEATURE_CFA_HAVE_IORING_OP_CONNECT ,
-			return IS_DEFINED(CFA_HAVE_IORING_OP_CONNECT);
-
-		if( /*func == (fptr_t)fallocate || */
-			func == (fptr_t)cfa_fallocate )
-			#define _CFA_IO_FEATURE_CFA_HAVE_IORING_OP_FALLOCATE ,
-			return IS_DEFINED(CFA_HAVE_IORING_OP_FALLOCATE);
-
-		if( /*func == (fptr_t)posix_fadvise || */
-			func == (fptr_t)cfa_fadvise )
-			#define _CFA_IO_FEATURE_CFA_HAVE_IORING_OP_FADVISE ,
-			return IS_DEFINED(CFA_HAVE_IORING_OP_FADVISE);
-
-		if( /*func == (fptr_t)madvise || */
-			func == (fptr_t)cfa_madvise )
-			#define _CFA_IO_FEATURE_CFA_HAVE_IORING_OP_MADVISE ,
-			return IS_DEFINED(CFA_HAVE_IORING_OP_MADVISE);
-
-		if( /*func == (fptr_t)openat || */
-			func == (fptr_t)cfa_openat )
-			#define _CFA_IO_FEATURE_CFA_HAVE_IORING_OP_OPENAT ,
-			return IS_DEFINED(CFA_HAVE_IORING_OP_OPENAT);
-
-		if( /*func == (fptr_t)close || */
-			func == (fptr_t)cfa_close )
-			#define _CFA_IO_FEATURE_CFA_HAVE_IORING_OP_CLOSE ,
-			return IS_DEFINED(CFA_HAVE_IORING_OP_CLOSE);
-
-		if( /*func == (fptr_t)read || */
-			func == (fptr_t)cfa_read )
-			#define _CFA_IO_FEATURE_CFA_HAVE_IORING_OP_READ ,
-			return IS_DEFINED(CFA_HAVE_IORING_OP_READ);
-
-		if( /*func == (fptr_t)write || */
-			func == (fptr_t)cfa_write )
-			#define _CFA_IO_FEATURE_CFA_HAVE_IORING_OP_WRITE ,
-			return IS_DEFINED(CFA_HAVE_IORING_OP_WRITE);
-
-		if( /*func == (fptr_t)splice || */
-			func == (fptr_t)cfa_splice )
-			#define _CFA_IO_FEATURE_CFA_HAVE_IORING_OP_SPLICE ,
-			return IS_DEFINED(CFA_HAVE_IORING_OP_SPLICE);
-
-		if( /*func == (fptr_t)tee || */
-			func == (fptr_t)cfa_tee )
-			#define _CFA_IO_FEATURE_CFA_HAVE_IORING_OP_TEE ,
-			return IS_DEFINED(CFA_HAVE_IORING_OP_TEE);
-	#endif
-
-	return false;
-}
Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision 55acc3a4fa45a500fa88a39b850dd11a68275702)
+++ libcfa/src/concurrency/kernel.cfa	(revision 18f0b707a42dceef0ea6f927ff708cc6bd645627)
@@ -108,5 +108,5 @@
 static $thread * __next_thread_slow(cluster * this);
 static void __run_thread(processor * this, $thread * dst);
-static void __wake_one(struct __processor_id_t * id, cluster * cltr);
+static void __wake_one(cluster * cltr);
 
 static void push  (__cluster_idles & idles, processor & proc);
@@ -122,6 +122,6 @@
 	// Because of a bug, we couldn't initialized the seed on construction
 	// Do it here
-	kernelTLS.rand_seed ^= rdtscl();
-	kernelTLS.ready_rng.fwd_seed = 25214903917_l64u * (rdtscl() ^ (uintptr_t)&runner);
+	__cfaabi_tls.rand_seed ^= rdtscl();
+	__cfaabi_tls.ready_rng.fwd_seed = 25214903917_l64u * (rdtscl() ^ (uintptr_t)&runner);
 	__tls_rand_advance_bck();
 
@@ -217,5 +217,5 @@
 		// and it make sense for it to be set in all other cases except here
 		// fake it
-		kernelTLS.this_thread = mainThread;
+		__cfaabi_tls.this_thread = mainThread;
 	}
 
@@ -230,5 +230,5 @@
 // from the processor coroutine to the target thread
 static void __run_thread(processor * this, $thread * thrd_dst) {
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 	/* paranoid */ verifyf( thrd_dst->state == Ready || thrd_dst->preempted != __NO_PREEMPTION, "state : %d, preempted %d\n", thrd_dst->state, thrd_dst->preempted);
 	/* paranoid */ verifyf( thrd_dst->link.next == 0p, "Expected null got %p", thrd_dst->link.next );
@@ -247,8 +247,8 @@
 
 		// Update global state
-		kernelTLS.this_thread = thrd_dst;
-
-		/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-		/* paranoid */ verify( kernelTLS.this_thread == thrd_dst );
+		kernelTLS().this_thread = thrd_dst;
+
+		/* paranoid */ verify( ! __preemption_enabled() );
+		/* paranoid */ verify( kernelTLS().this_thread == thrd_dst );
 		/* paranoid */ verify( thrd_dst->context.SP );
 		/* paranoid */ verify( thrd_dst->state != Halted );
@@ -267,9 +267,9 @@
 		/* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) < ((uintptr_t)__get_stack(thrd_dst->curr_cor)->base ), "ERROR : Destination $thread %p has been corrupted.\n StackPointer too small.\n", thrd_dst );
 		/* paranoid */ verify( thrd_dst->context.SP );
-		/* paranoid */ verify( kernelTLS.this_thread == thrd_dst );
-		/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+		/* paranoid */ verify( kernelTLS().this_thread == thrd_dst );
+		/* paranoid */ verify( ! __preemption_enabled() );
 
 		// Reset global state
-		kernelTLS.this_thread = 0p;
+		kernelTLS().this_thread = 0p;
 
 		// We just finished running a thread, there are a few things that could have happened.
@@ -282,5 +282,5 @@
 		if(unlikely(thrd_dst->preempted != __NO_PREEMPTION)) {
 			// The thread was preempted, reschedule it and reset the flag
-			__schedule_thread( (__processor_id_t*)this, thrd_dst );
+			__schedule_thread( thrd_dst );
 			break RUNNING;
 		}
@@ -315,15 +315,15 @@
 	proc_cor->state = Active;
 
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 }
 
 // KERNEL_ONLY
 void returnToKernel() {
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-	$coroutine * proc_cor = get_coroutine(kernelTLS.this_processor->runner);
-	$thread * thrd_src = kernelTLS.this_thread;
+	/* paranoid */ verify( ! __preemption_enabled() );
+	$coroutine * proc_cor = get_coroutine(kernelTLS().this_processor->runner);
+	$thread * thrd_src = kernelTLS().this_thread;
 
 	#if !defined(__CFA_NO_STATISTICS__)
-		struct processor * last_proc = kernelTLS.this_processor;
+		struct processor * last_proc = kernelTLS().this_processor;
 	#endif
 
@@ -345,10 +345,10 @@
 
 	#if !defined(__CFA_NO_STATISTICS__)
-		if(last_proc != kernelTLS.this_processor) {
+		if(last_proc != kernelTLS().this_processor) {
 			__tls_stats()->ready.threads.migration++;
 		}
 	#endif
 
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 	/* paranoid */ verifyf( ((uintptr_t)thrd_src->context.SP) < ((uintptr_t)__get_stack(thrd_src->curr_cor)->base ), "ERROR : Returning $thread %p has been corrupted.\n StackPointer too small.\n", thrd_src );
 	/* paranoid */ verifyf( ((uintptr_t)thrd_src->context.SP) > ((uintptr_t)__get_stack(thrd_src->curr_cor)->limit), "ERROR : Returning $thread %p has been corrupted.\n StackPointer too large.\n", thrd_src );
@@ -358,8 +358,9 @@
 // Scheduler routines
 // KERNEL ONLY
-void __schedule_thread( struct __processor_id_t * id, $thread * thrd ) {
+void __schedule_thread( $thread * thrd ) {
+	/* paranoid */ verify( ! __preemption_enabled() );
 	/* paranoid */ verify( thrd );
 	/* paranoid */ verify( thrd->state != Halted );
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( kernelTLS().this_proc_id );
 	/* paranoid */ #if defined( __CFA_WITH_VERIFY__ )
 	/* paranoid */ 	if( thrd->state == Blocked || thrd->state == Start ) assertf( thrd->preempted == __NO_PREEMPTION,
@@ -374,21 +375,23 @@
 	if (thrd->preempted == __NO_PREEMPTION) thrd->state = Ready;
 
-	ready_schedule_lock  ( id );
+	ready_schedule_lock();
 		push( thrd->curr_cluster, thrd );
-		__wake_one(id, thrd->curr_cluster);
-	ready_schedule_unlock( id );
-
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+		__wake_one(thrd->curr_cluster);
+	ready_schedule_unlock();
+
+	/* paranoid */ verify( ! __preemption_enabled() );
 }
 
 // KERNEL ONLY
 static inline $thread * __next_thread(cluster * this) with( *this ) {
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-
-	ready_schedule_lock  ( (__processor_id_t*)kernelTLS.this_processor );
+	/* paranoid */ verify( ! __preemption_enabled() );
+	/* paranoid */ verify( kernelTLS().this_proc_id );
+
+	ready_schedule_lock();
 		$thread * thrd = pop( this );
-	ready_schedule_unlock( (__processor_id_t*)kernelTLS.this_processor );
-
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	ready_schedule_unlock();
+
+	/* paranoid */ verify( kernelTLS().this_proc_id );
+	/* paranoid */ verify( ! __preemption_enabled() );
 	return thrd;
 }
@@ -396,16 +399,19 @@
 // KERNEL ONLY
 static inline $thread * __next_thread_slow(cluster * this) with( *this ) {
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-
-	ready_schedule_lock  ( (__processor_id_t*)kernelTLS.this_processor );
+	/* paranoid */ verify( ! __preemption_enabled() );
+	/* paranoid */ verify( kernelTLS().this_proc_id );
+
+	ready_schedule_lock();
 		$thread * thrd = pop_slow( this );
-	ready_schedule_unlock( (__processor_id_t*)kernelTLS.this_processor );
-
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	ready_schedule_unlock();
+
+	/* paranoid */ verify( kernelTLS().this_proc_id );
+	/* paranoid */ verify( ! __preemption_enabled() );
 	return thrd;
 }
 
-// KERNEL ONLY unpark with out disabling interrupts
-void __unpark(  struct __processor_id_t * id, $thread * thrd ) {
+void unpark( $thread * thrd ) {
+	if( !thrd ) return;
+
 	int old_ticket = __atomic_fetch_add(&thrd->ticket, 1, __ATOMIC_SEQ_CST);
 	switch(old_ticket) {
@@ -417,6 +423,20 @@
 			/* paranoid */ verify( thrd->state == Blocked );
 
-			// Wake lost the race,
-			__schedule_thread( id, thrd );
+			{
+				/* paranoid */ verify( publicTLS_get(this_proc_id) );
+				bool full = publicTLS_get(this_proc_id)->full_proc;
+				if(full) disable_interrupts();
+
+				/* paranoid */ verify( ! __preemption_enabled() );
+
+				// Wake lost the race,
+				__schedule_thread( thrd );
+
+				/* paranoid */ verify( ! __preemption_enabled() );
+
+				if(full) enable_interrupts( __cfaabi_dbg_ctx );
+				/* paranoid */ verify( publicTLS_get(this_proc_id) );
+			}
+
 			break;
 		default:
@@ -426,23 +446,15 @@
 }
 
-void unpark( $thread * thrd ) {
-	if( !thrd ) return;
-
+void park( void ) {
+	/* paranoid */ verify( __preemption_enabled() );
 	disable_interrupts();
-	__unpark( (__processor_id_t*)kernelTLS.this_processor, thrd );
+	/* paranoid */ verify( ! __preemption_enabled() );
+	/* paranoid */ verify( kernelTLS().this_thread->preempted == __NO_PREEMPTION );
+
+	returnToKernel();
+
+	/* paranoid */ verify( ! __preemption_enabled() );
 	enable_interrupts( __cfaabi_dbg_ctx );
-}
-
-void park( void ) {
-	/* paranoid */ verify( kernelTLS.preemption_state.enabled );
-	disable_interrupts();
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-	/* paranoid */ verify( kernelTLS.this_thread->preempted == __NO_PREEMPTION );
-
-	returnToKernel();
-
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-	enable_interrupts( __cfaabi_dbg_ctx );
-	/* paranoid */ verify( kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( __preemption_enabled() );
 
 }
@@ -453,5 +465,5 @@
 	// Should never return
 	void __cfactx_thrd_leave() {
-		$thread * thrd = TL_GET( this_thread );
+		$thread * thrd = active_thread();
 		$monitor * this = &thrd->self_mon;
 
@@ -462,9 +474,9 @@
 
 		thrd->state = Halted;
-
+		if( TICKET_RUNNING != thrd->ticket ) { abort( "Thread terminated with pending unpark" ); }
 		if( thrd != this->owner || this->recursion != 1) { abort( "Thread internal monitor has unbalanced recursion" ); }
 
 		// Leave the thread
-		/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+		/* paranoid */ verify( ! __preemption_enabled() );
 		returnToKernel();
 		abort();
@@ -476,9 +488,9 @@
 // KERNEL ONLY
 bool force_yield( __Preemption_Reason reason ) {
-	/* paranoid */ verify( kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( __preemption_enabled() );
 	disable_interrupts();
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-
-	$thread * thrd = kernelTLS.this_thread;
+	/* paranoid */ verify( ! __preemption_enabled() );
+
+	$thread * thrd = kernelTLS().this_thread;
 	/* paranoid */ verify(thrd->state == Active);
 
@@ -494,7 +506,7 @@
 	}
 
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 	enable_interrupts_noPoll();
-	/* paranoid */ verify( kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( __preemption_enabled() );
 
 	return preempted;
@@ -505,7 +517,7 @@
 //=============================================================================================
 // Wake a thread from the front if there are any
-static void __wake_one(struct __processor_id_t * id, cluster * this) {
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-	/* paranoid */ verify( ready_schedule_islocked( id ) );
+static void __wake_one(cluster * this) {
+	/* paranoid */ verify( ! __preemption_enabled() );
+	/* paranoid */ verify( ready_schedule_islocked() );
 
 	// Check if there is a sleeping processor
@@ -525,6 +537,6 @@
 	#endif
 
-	/* paranoid */ verify( ready_schedule_islocked( id ) );
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ready_schedule_islocked() );
+	/* paranoid */ verify( ! __preemption_enabled() );
 
 	return;
@@ -536,5 +548,5 @@
 
 	disable_interrupts();
-		/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+		/* paranoid */ verify( ! __preemption_enabled() );
 		post( this->idle );
 	enable_interrupts( __cfaabi_dbg_ctx );
@@ -542,5 +554,5 @@
 
 static void push  (__cluster_idles & this, processor & proc) {
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 	lock( this );
 		this.idle++;
@@ -549,9 +561,9 @@
 		insert_first(this.list, proc);
 	unlock( this );
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 }
 
 static void remove(__cluster_idles & this, processor & proc) {
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 	lock( this );
 		this.idle--;
@@ -560,5 +572,5 @@
 		remove(proc);
 	unlock( this );
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 }
 
@@ -604,5 +616,5 @@
 	}
 
-	return kernelTLS.this_thread;
+	return __cfaabi_tls.this_thread;
 }
 
@@ -629,5 +641,5 @@
 
 int kernel_abort_lastframe( void ) __attribute__ ((__nothrow__)) {
-	return get_coroutine(kernelTLS.this_thread) == get_coroutine(mainThread) ? 4 : 2;
+	return get_coroutine(kernelTLS().this_thread) == get_coroutine(mainThread) ? 4 : 2;
 }
 
@@ -661,5 +673,5 @@
 	if ( count < 0 ) {
 		// queue current task
-		append( waiting, kernelTLS.this_thread );
+		append( waiting, active_thread() );
 
 		// atomically release spin lock and block
@@ -711,5 +723,5 @@
 		void __cfaabi_dbg_record_lock(__spinlock_t & this, const char prev_name[]) {
 			this.prev_name = prev_name;
-			this.prev_thrd = kernelTLS.this_thread;
+			this.prev_thrd = kernelTLS().this_thread;
 		}
 	}
@@ -728,4 +740,8 @@
 		this.print_halts = true;
 	}
+
+	void print_stats_now( cluster & this, int flags ) {
+		__print_stats( this.stats, this.print_stats, true, this.name, (void*)&this );
+	}
 #endif
 // Local Variables: //
Index: libcfa/src/concurrency/kernel.hfa
===================================================================
--- libcfa/src/concurrency/kernel.hfa	(revision 55acc3a4fa45a500fa88a39b850dd11a68275702)
+++ libcfa/src/concurrency/kernel.hfa	(revision 18f0b707a42dceef0ea6f927ff708cc6bd645627)
@@ -275,8 +275,10 @@
 static inline [cluster *&, cluster *& ] __get( cluster & this ) __attribute__((const)) { return this.node.[next, prev]; }
 
-static inline struct processor * active_processor() { return TL_GET( this_processor ); } // UNSAFE
-static inline struct cluster   * active_cluster  () { return TL_GET( this_processor )->cltr; }
+static inline struct processor * active_processor() { return publicTLS_get( this_processor ); } // UNSAFE
+static inline struct cluster   * active_cluster  () { return publicTLS_get( this_processor )->cltr; }
 
 #if !defined(__CFA_NO_STATISTICS__)
+	void print_stats_now( cluster & this, int flags );
+
 	static inline void print_stats_at_exit( cluster & this, int flags ) {
 		this.print_stats |= flags;
Index: libcfa/src/concurrency/kernel/fwd.hfa
===================================================================
--- libcfa/src/concurrency/kernel/fwd.hfa	(revision 55acc3a4fa45a500fa88a39b850dd11a68275702)
+++ libcfa/src/concurrency/kernel/fwd.hfa	(revision 18f0b707a42dceef0ea6f927ff708cc6bd645627)
@@ -35,7 +35,8 @@
 	extern "Cforall" {
 		extern __attribute__((aligned(128))) thread_local struct KernelThreadData {
-			struct $thread    * volatile this_thread;
-			struct processor  * volatile this_processor;
-			struct __stats_t  * volatile this_stats;
+			struct $thread          * volatile this_thread;
+			struct processor        * volatile this_processor;
+			struct __processor_id_t * volatile this_proc_id;
+			struct __stats_t        * volatile this_stats;
 
 			struct {
@@ -54,13 +55,24 @@
 				uint64_t bck_seed;
 			} ready_rng;
-		} kernelTLS __attribute__ ((tls_model ( "initial-exec" )));
+		} __cfaabi_tls __attribute__ ((tls_model ( "initial-exec" )));
 
+		extern bool __preemption_enabled();
 
+		static inline KernelThreadData & kernelTLS( void ) {
+			/* paranoid */ verify( ! __preemption_enabled() );
+			return __cfaabi_tls;
+		}
+
+		extern uintptr_t __cfatls_get( unsigned long int member );
+		// #define publicTLS_get( member ) ((typeof(__cfaabi_tls.member))__cfatls_get( __builtin_offsetof(KernelThreadData, member) ))
+		#define publicTLS_get( member ) (__cfaabi_tls.member)
+		// extern forall(otype T) T __cfatls_get( T * member, T value );
+		// #define publicTLS_set( member, value ) __cfatls_set( (typeof(member)*)__builtin_offsetof(KernelThreadData, member), value );
 
 		static inline uint64_t __tls_rand() {
 			#if defined(__SIZEOF_INT128__)
-				return __lehmer64( kernelTLS.rand_seed );
+				return __lehmer64( kernelTLS().rand_seed );
 			#else
-				return __xorshift64( kernelTLS.rand_seed );
+				return __xorshift64( kernelTLS().rand_seed );
 			#endif
 		}
@@ -74,11 +86,11 @@
 		static inline unsigned __tls_rand_fwd() {
 
-			kernelTLS.ready_rng.fwd_seed = (A * kernelTLS.ready_rng.fwd_seed + C) & (M - 1);
-			return kernelTLS.ready_rng.fwd_seed >> D;
+			kernelTLS().ready_rng.fwd_seed = (A * kernelTLS().ready_rng.fwd_seed + C) & (M - 1);
+			return kernelTLS().ready_rng.fwd_seed >> D;
 		}
 
 		static inline unsigned __tls_rand_bck() {
-			unsigned int r = kernelTLS.ready_rng.bck_seed >> D;
-			kernelTLS.ready_rng.bck_seed = AI * (kernelTLS.ready_rng.bck_seed - C) & (M - 1);
+			unsigned int r = kernelTLS().ready_rng.bck_seed >> D;
+			kernelTLS().ready_rng.bck_seed = AI * (kernelTLS().ready_rng.bck_seed - C) & (M - 1);
 			return r;
 		}
@@ -91,25 +103,9 @@
 
 		static inline void __tls_rand_advance_bck(void) {
-			kernelTLS.ready_rng.bck_seed = kernelTLS.ready_rng.fwd_seed;
+			kernelTLS().ready_rng.bck_seed = kernelTLS().ready_rng.fwd_seed;
 		}
 	}
 
-	#if 0 // def __ARM_ARCH
-		// function prototypes are only really used by these macros on ARM
-		void disable_global_interrupts();
-		void enable_global_interrupts();
 
-		#define TL_GET( member ) ( { __typeof__( kernelTLS.member ) target; \
-			disable_global_interrupts(); \
-			target = kernelTLS.member; \
-			enable_global_interrupts(); \
-			target; } )
-		#define TL_SET( member, value ) disable_global_interrupts(); \
-			kernelTLS.member = value; \
-			enable_global_interrupts();
-	#else
-		#define TL_GET( member ) kernelTLS.member
-		#define TL_SET( member, value ) kernelTLS.member = value;
-	#endif
 
 	extern void disable_interrupts();
@@ -120,5 +116,9 @@
 		extern void park( void );
 		extern void unpark( struct $thread * this );
-		static inline struct $thread * active_thread () { return TL_GET( this_thread ); }
+		static inline struct $thread * active_thread () {
+			struct $thread * t = publicTLS_get( this_thread );
+			/* paranoid */ verify( t );
+			return t;
+		}
 
 		extern bool force_yield( enum __Preemption_Reason );
@@ -139,7 +139,7 @@
 		#if !defined(__CFA_NO_STATISTICS__)
 			static inline struct __stats_t * __tls_stats() {
-				/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-				/* paranoid */ verify( kernelTLS.this_stats );
-				return kernelTLS.this_stats;
+				/* paranoid */ verify( ! __preemption_enabled() );
+				/* paranoid */ verify( kernelTLS().this_stats );
+				return kernelTLS().this_stats;
 			}
 
Index: libcfa/src/concurrency/kernel/startup.cfa
===================================================================
--- libcfa/src/concurrency/kernel/startup.cfa	(revision 55acc3a4fa45a500fa88a39b850dd11a68275702)
+++ libcfa/src/concurrency/kernel/startup.cfa	(revision 18f0b707a42dceef0ea6f927ff708cc6bd645627)
@@ -118,6 +118,7 @@
 //-----------------------------------------------------------------------------
 // Global state
-thread_local struct KernelThreadData kernelTLS __attribute__ ((tls_model ( "initial-exec" ))) @= {
+thread_local struct KernelThreadData __cfaabi_tls __attribute__ ((tls_model ( "initial-exec" ))) @= {
 	NULL,												// cannot use 0p
+	NULL,
 	NULL,
 	NULL,
@@ -155,5 +156,5 @@
 // Kernel boot procedures
 static void __kernel_startup(void) {
-	verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 	__cfadbg_print_safe(runtime_core, "Kernel : Starting\n");
 
@@ -211,10 +212,11 @@
 
 	//initialize the global state variables
-	kernelTLS.this_processor = mainProcessor;
-	kernelTLS.this_thread    = mainThread;
+	__cfaabi_tls.this_processor = mainProcessor;
+	__cfaabi_tls.this_proc_id   = (__processor_id_t*)mainProcessor;
+	__cfaabi_tls.this_thread    = mainThread;
 
 	#if !defined( __CFA_NO_STATISTICS__ )
-		kernelTLS.this_stats = (__stats_t *)& storage_mainProcStats;
-		__init_stats( kernelTLS.this_stats );
+		__cfaabi_tls.this_stats = (__stats_t *)& storage_mainProcStats;
+		__init_stats( __cfaabi_tls.this_stats );
 	#endif
 
@@ -227,10 +229,10 @@
 	// Add the main thread to the ready queue
 	// once resume is called on mainProcessor->runner the mainThread needs to be scheduled like any normal thread
-	__schedule_thread((__processor_id_t *)mainProcessor, mainThread);
+	__schedule_thread(mainThread);
 
 	// SKULLDUGGERY: Force a context switch to the main processor to set the main thread's context to the current UNIX
 	// context. Hence, the main thread does not begin through __cfactx_invoke_thread, like all other threads. The trick here is that
 	// mainThread is on the ready queue when this call is made.
-	__kernel_first_resume( kernelTLS.this_processor );
+	__kernel_first_resume( __cfaabi_tls.this_processor );
 
 
@@ -249,7 +251,8 @@
 	__cfadbg_print_safe(runtime_core, "Kernel : Started\n--------------------------------------------------\n\n");
 
-	verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 	enable_interrupts( __cfaabi_dbg_ctx );
-	verify( TL_GET( preemption_state.enabled ) );
+	/* paranoid */ verify( __preemption_enabled() );
+
 }
 
@@ -260,7 +263,7 @@
 	mainCluster->io.ctxs = 0p;
 
-	/* paranoid */ verify( TL_GET( preemption_state.enabled ) );
+	/* paranoid */ verify( __preemption_enabled() );
 	disable_interrupts();
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 
 	__cfadbg_print_safe(runtime_core, "\n--------------------------------------------------\nKernel : Shutting down\n");
@@ -270,5 +273,5 @@
 	// which is currently here
 	__atomic_store_n(&mainProcessor->do_terminate, true, __ATOMIC_RELEASE);
-	__kernel_last_resume( kernelTLS.this_processor );
+	__kernel_last_resume( __cfaabi_tls.this_processor );
 	mainThread->self_cor.state = Halted;
 
@@ -319,11 +322,12 @@
 		__stats_t local_stats;
 		__init_stats( &local_stats );
-		kernelTLS.this_stats = &local_stats;
+		__cfaabi_tls.this_stats = &local_stats;
 	#endif
 
 	processor * proc = (processor *) arg;
-	kernelTLS.this_processor = proc;
-	kernelTLS.this_thread    = 0p;
-	kernelTLS.preemption_state.[enabled, disable_count] = [false, 1];
+	__cfaabi_tls.this_processor = proc;
+	__cfaabi_tls.this_proc_id   = (__processor_id_t*)proc;
+	__cfaabi_tls.this_thread    = 0p;
+	__cfaabi_tls.preemption_state.[enabled, disable_count] = [false, 1];
 	// SKULLDUGGERY: We want to create a context for the processor coroutine
 	// which is needed for the 2-step context switch. However, there is no reason
@@ -337,5 +341,5 @@
 
 	//Set global state
-	kernelTLS.this_thread = 0p;
+	__cfaabi_tls.this_thread = 0p;
 
 	//We now have a proper context from which to schedule threads
@@ -367,11 +371,11 @@
 	$coroutine * dst = get_coroutine(this->runner);
 
-	verify( ! kernelTLS.preemption_state.enabled );
-
-	kernelTLS.this_thread->curr_cor = dst;
+	/* paranoid */ verify( ! __preemption_enabled() );
+
+	__cfaabi_tls.this_thread->curr_cor = dst;
 	__stack_prepare( &dst->stack, 65000 );
 	__cfactx_start(main, dst, this->runner, __cfactx_invoke_coroutine);
 
-	verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 
 	dst->last = &src->self_cor;
@@ -391,5 +395,5 @@
 	/* paranoid */ verify(src->state == Active);
 
-	verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 }
 
@@ -399,7 +403,7 @@
 	$coroutine * dst = get_coroutine(this->runner);
 
-	verify( ! kernelTLS.preemption_state.enabled );
-	verify( dst->starter == src );
-	verify( dst->context.SP );
+	/* paranoid */ verify( ! __preemption_enabled() );
+	/* paranoid */ verify( dst->starter == src );
+	/* paranoid */ verify( dst->context.SP );
 
 	// SKULLDUGGERY in debug the processors check that the
@@ -543,5 +547,5 @@
 
 		P( terminated );
-		verify( kernelTLS.this_processor != &this);
+		/* paranoid */ verify( active_processor() != &this);
 	}
 
@@ -693,5 +697,5 @@
 #if defined(__CFA_WITH_VERIFY__)
 static bool verify_fwd_bck_rng(void) {
-	kernelTLS.ready_rng.fwd_seed = 25214903917_l64u * (rdtscl() ^ (uintptr_t)&verify_fwd_bck_rng);
+	__cfaabi_tls.ready_rng.fwd_seed = 25214903917_l64u * (rdtscl() ^ (uintptr_t)&verify_fwd_bck_rng);
 
 	unsigned values[10];
Index: libcfa/src/concurrency/kernel_private.hfa
===================================================================
--- libcfa/src/concurrency/kernel_private.hfa	(revision 55acc3a4fa45a500fa88a39b850dd11a68275702)
+++ libcfa/src/concurrency/kernel_private.hfa	(revision 18f0b707a42dceef0ea6f927ff708cc6bd645627)
@@ -33,9 +33,11 @@
 }
 
-void __schedule_thread( struct __processor_id_t *, $thread * )
+void __schedule_thread( $thread * )
 #if defined(NDEBUG) || (!defined(__CFA_DEBUG__) && !defined(__CFA_VERIFY__))
-	__attribute__((nonnull (2)))
+	__attribute__((nonnull (1)))
 #endif
 ;
+
+extern bool __preemption_enabled();
 
 //release/wake-up the following resources
@@ -63,28 +65,7 @@
 )
 
-// KERNEL ONLY unpark with out disabling interrupts
-void __unpark( struct __processor_id_t *, $thread * thrd );
-
 #define TICKET_BLOCKED (-1) // thread is blocked
 #define TICKET_RUNNING ( 0) // thread is running
 #define TICKET_UNBLOCK ( 1) // thread should ignore next block
-
-static inline bool __post(single_sem & this, struct __processor_id_t * id) {
-	for() {
-		struct $thread * expected = this.ptr;
-		if(expected == 1p) return false;
-		if(expected == 0p) {
-			if(__atomic_compare_exchange_n(&this.ptr, &expected, 1p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
-				return false;
-			}
-		}
-		else {
-			if(__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
-				__unpark( id, expected );
-				return true;
-			}
-		}
-	}
-}
 
 //-----------------------------------------------------------------------------
@@ -201,7 +182,10 @@
 // Reader side : acquire when using the ready queue to schedule but not
 //  creating/destroying queues
-static inline void ready_schedule_lock( struct __processor_id_t * proc) with(*__scheduler_lock) {
-	unsigned iproc = proc->id;
-	/*paranoid*/ verify(data[iproc].handle == proc);
+static inline void ready_schedule_lock(void) with(*__scheduler_lock) {
+	/* paranoid */ verify( ! __preemption_enabled() );
+	/* paranoid */ verify( kernelTLS().this_proc_id );
+
+	unsigned iproc = kernelTLS().this_proc_id->id;
+	/*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id);
 	/*paranoid*/ verify(iproc < ready);
 
@@ -225,7 +209,10 @@
 }
 
-static inline void ready_schedule_unlock( struct __processor_id_t * proc) with(*__scheduler_lock) {
-	unsigned iproc = proc->id;
-	/*paranoid*/ verify(data[iproc].handle == proc);
+static inline void ready_schedule_unlock(void) with(*__scheduler_lock) {
+	/* paranoid */ verify( ! __preemption_enabled() );
+	/* paranoid */ verify( kernelTLS().this_proc_id );
+
+	unsigned iproc = kernelTLS().this_proc_id->id;
+	/*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id);
 	/*paranoid*/ verify(iproc < ready);
 	/*paranoid*/ verify(data[iproc].lock);
@@ -239,5 +226,8 @@
 
 #ifdef __CFA_WITH_VERIFY__
-	static inline bool ready_schedule_islocked( struct __processor_id_t * proc) {
+	static inline bool ready_schedule_islocked(void) {
+		/* paranoid */ verify( ! __preemption_enabled() );
+		/*paranoid*/ verify( kernelTLS().this_proc_id );
+		__processor_id_t * proc = kernelTLS().this_proc_id;
 		return __scheduler_lock->data[proc->id].owned;
 	}
Index: libcfa/src/concurrency/locks.cfa
===================================================================
--- libcfa/src/concurrency/locks.cfa	(revision 55acc3a4fa45a500fa88a39b850dd11a68275702)
+++ libcfa/src/concurrency/locks.cfa	(revision 18f0b707a42dceef0ea6f927ff708cc6bd645627)
@@ -15,4 +15,5 @@
 		this.t = t;
 		this.lock = 0p;
+		this.listed = false;
 	}
 
@@ -21,4 +22,5 @@
 		this.info = info;
 		this.lock = 0p;
+		this.listed = false;
 	}
 
@@ -74,18 +76,19 @@
 
 void lock( blocking_lock & this ) with( this ) {
+	$thread * thrd = active_thread();
 	lock( lock __cfaabi_dbg_ctx2 );
-	if ( owner == kernelTLS.this_thread && !multi_acquisition) {
+	if ( owner == thrd && !multi_acquisition) {
 		fprintf(stderr, "A single acquisition lock holder attempted to reacquire the lock resulting in a deadlock."); // Possibly throw instead
-		exit(EXIT_FAILURE);
-	} else if ( owner != 0p && owner != kernelTLS.this_thread ) {
-		append( blocked_threads, kernelTLS.this_thread );
+    	exit(EXIT_FAILURE);
+	} else if ( owner != 0p && owner != thrd ) {
+		append( blocked_threads, thrd );
 		wait_count++;
 		unlock( lock );
-		park( __cfaabi_dbg_ctx );
-	} else if ( owner == kernelTLS.this_thread && multi_acquisition ) {
+		park( );
+	} else if ( owner == thrd && multi_acquisition ) {
 		recursion_count++;
 		unlock( lock );
 	} else {
-		owner = kernelTLS.this_thread;
+		owner = thrd;
 		recursion_count = 1;
 		unlock( lock );
@@ -94,11 +97,12 @@
 
 bool try_lock( blocking_lock & this ) with( this ) {
+	$thread * thrd = active_thread();
 	bool ret = false;
 	lock( lock __cfaabi_dbg_ctx2 );
 	if ( owner == 0p ) {
-		owner = kernelTLS.this_thread;
+		owner = thrd;
 		if ( multi_acquisition ) recursion_count = 1;
 		ret = true;
-	} else if ( owner == kernelTLS.this_thread && multi_acquisition ) {
+	} else if ( owner == thrd && multi_acquisition ) {
 		recursion_count++;
 		ret = true;
@@ -113,5 +117,5 @@
 		fprintf( stderr, "There was an attempt to release a lock that isn't held" );
 		return;
-	} else if ( strict_owner && owner != kernelTLS.this_thread ) {
+	} else if ( strict_owner && active_thread() ) {
 		fprintf( stderr, "A thread other than the owner attempted to release an owner lock" );
 		return;
@@ -123,5 +127,5 @@
 		recursion_count = ( thrd && multi_acquisition ? 1 : 0 );
 		wait_count--;
-		unpark( thrd __cfaabi_dbg_ctx2 );
+		unpark( thrd );
 	}
 	unlock( lock );
@@ -150,5 +154,8 @@
 		owner = t;
 		if ( multi_acquisition ) recursion_count = 1;
-		unpark( t __cfaabi_dbg_ctx2 );
+		#if !defined( __CFA_NO_STATISTICS__ )
+			kernelTLS.this_stats = t->curr_cluster->stats;
+		#endif
+		unpark( t );
 		unlock( lock );
 	}
@@ -159,5 +166,5 @@
 	if ( owner == 0p ){ // no owner implies lock isn't held
 		fprintf( stderr, "A lock that is not held was passed to a synchronization lock" );
-	} else if ( strict_owner && owner != kernelTLS.this_thread ) {
+	} else if ( strict_owner && active_thread() ) {
 		fprintf( stderr, "A thread other than the owner of a lock passed it to a synchronization lock" );
 	} else {
@@ -166,5 +173,5 @@
 		recursion_count = ( thrd && multi_acquisition ? 1 : 0 );
 		wait_count--;
-		unpark( thrd __cfaabi_dbg_ctx2 );
+		unpark( thrd );
 	}
 	unlock( lock );
@@ -175,7 +182,5 @@
 ///////////////////////////////////////////////////////////////////
 
-// In an ideal world this may not be necessary
-// Is it possible for nominal inheritance to inherit traits??
-// If that occurs we would avoid all this extra code
+// This is temporary until an inheritance bug is fixed
 
 void lock( mutex_lock & this ){
@@ -228,21 +233,38 @@
 
 ///////////////////////////////////////////////////////////////////
-//// Synchronization Locks
+//// condition variable
 ///////////////////////////////////////////////////////////////////
 
 forall(dtype L | is_blocking_lock(L)) {
-	void ?{}( synchronization_lock(L) & this, bool reacquire_after_signal ){
+
+	void timeout_handler ( alarm_node_wrap(L) & this ) with( this ) {
+    	// This condition_variable member is called from the kernel, and therefore, cannot block, but it can spin.
+	    lock( cond->lock __cfaabi_dbg_ctx2 );
+	    if ( (*i)->listed ) {			// is thread on queue
+	    	info_thread(L) * copy = *i;
+			remove( cond->blocked_threads, i );		 //remove this thread O(1)
+			cond->wait_count--;
+			if( !copy->lock ) {
+				unlock( cond->lock );
+				#if !defined( __CFA_NO_STATISTICS__ )
+					#warning unprotected access to tls TODO discuss this
+					kernelTLS.this_stats = copy->t->curr_cluster->stats;
+				#endif
+				unpark( copy->t );
+	    	} else {
+	    		add_(*copy->lock, copy->t);			// call lock's add_
+	    	}
+	    }
+	    unlock( cond->lock );
+	}
+
+	void alarm_node_wrap_cast( alarm_node_t & a ) {
+		timeout_handler( (alarm_node_wrap(L) &)a );
+	}
+
+	void ?{}( condition_variable(L) & this ){
 		this.lock{};
 		this.blocked_threads{};
 		this.count = 0;
-		this.reacquire_after_signal = reacquire_after_signal;
-	}
-
-	void ^?{}( synchronization_lock(L) & this ){
-		// default
-	}
-
-	void ?{}( condition_variable(L) & this ){
-		((synchronization_lock(L) &)this){ true };
 	}
 
@@ -251,23 +273,23 @@
 	}
 
-	void ?{}( thread_queue(L) & this ){
-		((synchronization_lock(L) &)this){ false };
-	}
-
-	void ^?{}( thread_queue(L) & this ){
+	void ?{}( alarm_node_wrap(L) & this, $thread * thrd, Time alarm, Duration period, Alarm_Callback callback ) {
+		this.alarm_node{ thrd, alarm, period, callback };
+	}
+
+	void ^?{}( alarm_node_wrap(L) & this ) {
 		// default
 	}
 
-	bool notify_one( synchronization_lock(L) & this ) with( this ) {
+	bool notify_one( condition_variable(L) & this ) with( this ) {
 		lock( lock __cfaabi_dbg_ctx2 );
 		bool ret = !!blocked_threads;
 		info_thread(L) * popped = pop_head( blocked_threads );
+		popped->listed = false;
 		if(popped != 0p) {
-			if( reacquire_after_signal ){
+			count--;
+			if (popped->lock) {
 				add_(*popped->lock, popped->t);
 			} else {
-				unpark(
-					popped->t __cfaabi_dbg_ctx2
-				);
+				unpark(popped->t);
 			}
 		}
@@ -276,16 +298,16 @@
 	}
 
-	bool notify_all( synchronization_lock(L) & this ) with(this) {
+	bool notify_all( condition_variable(L) & this ) with(this) {
 		lock( lock __cfaabi_dbg_ctx2 );
 		bool ret = blocked_threads ? true : false;
 		while( blocked_threads ) {
 			info_thread(L) * popped = pop_head( blocked_threads );
+			popped->listed = false;
 			if(popped != 0p){
-				if( reacquire_after_signal ){
+				count--;
+				if (popped->lock) {
 					add_(*popped->lock, popped->t);
 				} else {
-					unpark(
-						popped->t __cfaabi_dbg_ctx2
-					);
+					unpark(popped->t);
 				}
 			}
@@ -295,134 +317,127 @@
 	}
 
-	uintptr_t front( synchronization_lock(L) & this ) with(this) {
-		return (*peek(blocked_threads)).info;
-	}
-
-	bool empty( synchronization_lock(L) & this ) with(this) {
+	uintptr_t front( condition_variable(L) & this ) with(this) {
+		if(!blocked_threads) return NULL;
+		return peek(blocked_threads)->info;
+	}
+
+	bool empty( condition_variable(L) & this ) with(this) {
 		return blocked_threads ? false : true;
 	}
 
-	int counter( synchronization_lock(L) & this ) with(this) {
+	int counter( condition_variable(L) & this ) with(this) {
 		return count;
 	}
 
-	void queue_info_thread( synchronization_lock(L) & this, info_thread(L) & i ) with(this) {
-		lock( lock __cfaabi_dbg_ctx2 );
-		append( blocked_threads, &i );
-		count++;
-		unlock( lock );
-		park( __cfaabi_dbg_ctx );
-	}
-
-
-	void wait( synchronization_lock(L) & this ) with(this) {
-		info_thread( L ) i = { kernelTLS.this_thread };
-		queue_info_thread( this, i );
-	}
-
-	void wait( synchronization_lock(L) & this, uintptr_t info ) with(this) {
-		info_thread( L ) i = { kernelTLS.this_thread, info };
-		queue_info_thread( this, i );
-	}
-	// I still need to implement the time delay wait routines
-	bool wait( synchronization_lock(L) & this, Duration duration ) with(this) {
-		timeval tv = { time(0) };
-		Time t = { tv };
-		return wait( this, t + duration );
-	}
-
-	bool wait( synchronization_lock(L) & this, uintptr_t info, Duration duration ) with(this) {
-		// TODO: ADD INFO
-		return wait( this, duration );
-	}
-
-	bool wait( synchronization_lock(L) & this, Time time ) with(this) {
-		return false; //default
-	}
-
-	bool wait( synchronization_lock(L) & this, uintptr_t info, Time time ) with(this) {
-		// TODO: ADD INFO
-		return wait( this, time );
-	}
-
-	void queue_info_thread_unlock( synchronization_lock(L) & this, L & l, info_thread(L) & i ) with(this) {
+	// helper for wait()'s' without a timeout
+	void queue_info_thread( condition_variable(L) & this, info_thread(L) & i ) with(this) {
 		lock( lock __cfaabi_dbg_ctx2 );
 		append( this.blocked_threads, &i );
 		count++;
-		i.lock = &l;
-		size_t recursion_count = get_recursion_count(l);
-		remove_( l );
-		unlock( lock );
-		park( __cfaabi_dbg_ctx ); // blocks here
-
-		set_recursion_count(l, recursion_count); // resets recursion count here after waking
-	}
-
-	void wait( synchronization_lock(L) & this, L & l ) with(this) {
-		info_thread(L) i = { kernelTLS.this_thread };
-		queue_info_thread_unlock( this, l, i );
-	}
-
-	void wait( synchronization_lock(L) & this, L & l, uintptr_t info ) with(this) {
-		info_thread(L) i = { kernelTLS.this_thread, info };
-		queue_info_thread_unlock( this, l, i );
-	}
-
-	bool wait( synchronization_lock(L) & this, L & l, Duration duration ) with(this) {
-		timeval tv = { time(0) };
-		Time t = { tv };
-		return wait( this, l, t + duration );
-	}
-
-	bool wait( synchronization_lock(L) & this, L & l, uintptr_t info, Duration duration ) with(this) {
-		// TODO: ADD INFO
-		return wait( this, l, duration );
-	}
-
-	bool wait( synchronization_lock(L) & this, L & l, Time time ) with(this) {
-		return false; //default
-	}
-
-	bool wait( synchronization_lock(L) & this, L & l, uintptr_t info, Time time ) with(this) {
-		// TODO: ADD INFO
-		return wait( this, l, time );
-	}
-}
-
-///////////////////////////////////////////////////////////////////
-//// condition lock alternative approach
-///////////////////////////////////////////////////////////////////
-
-// the solution below is less efficient but does not require the lock to have a specific add/remove routine
-
-///////////////////////////////////////////////////////////////////
-//// is_simple_lock
-///////////////////////////////////////////////////////////////////
-
-forall(dtype L | is_simple_lock(L)) {
-	void ?{}( condition_lock(L) & this ){
-		// default
-	}
-
-	void ^?{}( condition_lock(L) & this ){
-		// default
-	}
-
-	bool notify_one( condition_lock(L) & this ) with(this) {
-		return notify_one( c_var );
-	}
-
-	bool notify_all( condition_lock(L) & this ) with(this) {
-		return notify_all( c_var );
-	}
-
-	void wait( condition_lock(L) & this, L & l ) with(this) {
-		lock( m_lock );
-		size_t recursion = get_recursion_count( l );
-		unlock( l );
-		wait( c_var, m_lock );
-		lock( l );
-		set_recursion_count( l , recursion );
-		unlock( m_lock );
-	}
-}
+		i.listed = true;
+		size_t recursion_count;
+		if (i.lock) {
+			recursion_count = get_recursion_count(*i.lock);
+			remove_( *i.lock );
+		}
+
+		unlock( lock );
+		park( ); // blocks here
+
+		if (i.lock) set_recursion_count(*i.lock, recursion_count); // resets recursion count here after waking
+	}
+
+	// helper for wait()'s' with a timeout
+	void queue_info_thread_timeout( condition_variable(L) & this, info_thread(L) & info, Time t ) with(this) {
+		lock( lock __cfaabi_dbg_ctx2 );
+
+		info_thread(L) * queue_ptr = &info;
+
+		alarm_node_wrap(L) node_wrap = { info.t, t, 0`s, alarm_node_wrap_cast };
+		node_wrap.cond = &this;
+		node_wrap.i = &queue_ptr;
+
+		register_self( &node_wrap.alarm_node );
+
+		append( blocked_threads, queue_ptr );
+		info.listed = true;
+		count++;
+
+		size_t recursion_count;
+		if (info.lock) {
+			recursion_count = get_recursion_count(*info.lock);
+			remove_( *info.lock );
+		}
+
+		unlock( lock );
+		park();
+
+		if (info.lock) set_recursion_count(*info.lock, recursion_count);
+	}
+
+	void wait( condition_variable(L) & this ) with(this) {
+		info_thread( L ) i = { active_thread() };
+		queue_info_thread( this, i );
+	}
+
+	void wait( condition_variable(L) & this, uintptr_t info ) with(this) {
+		info_thread( L ) i = { active_thread(), info };
+		queue_info_thread( this, i );
+	}
+
+	void wait( condition_variable(L) & this, Duration duration ) with(this) {
+		info_thread( L ) i = { active_thread() };
+		queue_info_thread_timeout(this, i, __kernel_get_time() + duration );
+	}
+
+	void wait( condition_variable(L) & this, uintptr_t info, Duration duration ) with(this) {
+		info_thread( L ) i = { active_thread(), info };
+		queue_info_thread_timeout(this, i, __kernel_get_time() + duration );
+	}
+
+	void wait( condition_variable(L) & this, Time time ) with(this) {
+		info_thread( L ) i = { active_thread() };
+		queue_info_thread_timeout(this, i, time);
+	}
+
+	void wait( condition_variable(L) & this, uintptr_t info, Time time ) with(this) {
+		info_thread( L ) i = { active_thread(), info };
+		queue_info_thread_timeout(this, i, time);
+	}
+
+	void wait( condition_variable(L) & this, L & l ) with(this) {
+		info_thread(L) i = { active_thread() };
+		i.lock = &l;
+		queue_info_thread( this, i );
+	}
+
+	void wait( condition_variable(L) & this, L & l, uintptr_t info ) with(this) {
+		info_thread(L) i = { active_thread(), info };
+		i.lock = &l;
+		queue_info_thread( this, i );
+	}
+
+	void wait( condition_variable(L) & this, L & l, Duration duration ) with(this) {
+		info_thread(L) i = { active_thread() };
+		i.lock = &l;
+		queue_info_thread_timeout(this, i, __kernel_get_time() + duration );
+	}
+
+	void wait( condition_variable(L) & this, L & l, uintptr_t info, Duration duration ) with(this) {
+		info_thread(L) i = { active_thread(), info };
+		i.lock = &l;
+		queue_info_thread_timeout(this, i, __kernel_get_time() + duration );
+	}
+
+	void wait( condition_variable(L) & this, L & l, Time time ) with(this) {
+		info_thread(L) i = { active_thread() };
+		i.lock = &l;
+		queue_info_thread_timeout(this, i, time );
+	}
+
+	void wait( condition_variable(L) & this, L & l, uintptr_t info, Time time ) with(this) {
+		info_thread(L) i = { active_thread(), info };
+		i.lock = &l;
+		queue_info_thread_timeout(this, i, time );
+	}
+}
Index: libcfa/src/concurrency/locks.hfa
===================================================================
--- libcfa/src/concurrency/locks.hfa	(revision 55acc3a4fa45a500fa88a39b850dd11a68275702)
+++ libcfa/src/concurrency/locks.hfa	(revision 18f0b707a42dceef0ea6f927ff708cc6bd645627)
@@ -1,2 +1,4 @@
+#pragma once
+
 #include <stdbool.h>
 
@@ -10,4 +12,5 @@
 #include "time.hfa"
 #include <sys/time.h>
+#include "alarm.hfa"
 
 ///////////////////////////////////////////////////////////////////
@@ -32,4 +35,5 @@
 		info_thread(L) * next;
 		L * lock;
+		bool listed;					// true if info_thread is on queue, false otherwise;
 	};
 
@@ -119,5 +123,5 @@
 ///////////////////////////////////////////////////////////////////
 forall(dtype L | is_blocking_lock(L)) {
-	struct synchronization_lock {
+	struct condition_variable {
 		// Spin lock used for mutual exclusion
 		__spinlock_t lock;
@@ -128,84 +132,45 @@
 		// Count of current blocked threads
 		int count;
-
-		// If true threads will reacquire the lock they block on upon waking
-		bool reacquire_after_signal;
 	};
-
-	struct condition_variable {
-		inline synchronization_lock(L);
-	};
-
-	struct thread_queue {
-		inline synchronization_lock(L);
-	};
-
-
-	void ?{}( synchronization_lock(L) & this, bool multi_acquisition, bool strict_owner );
-	void ^?{}( synchronization_lock(L) & this );
 
 	void ?{}( condition_variable(L) & this );
 	void ^?{}( condition_variable(L) & this );
 
-	void ?{}( thread_queue(L) & this );
-	void ^?{}( thread_queue(L) & this );
+	struct alarm_node_wrap {
+		alarm_node_t alarm_node;
 
-	bool notify_one( synchronization_lock(L) & this );
-	bool notify_all( synchronization_lock(L) & this );
+		condition_variable(L) * cond;
 
-	uintptr_t front( synchronization_lock(L) & this );
-
-	bool empty( synchronization_lock(L) & this );
-	int counter( synchronization_lock(L) & this );
-
-	// wait functions that are not passed a mutex lock
-	void wait( synchronization_lock(L) & this );
-	void wait( synchronization_lock(L) & this, uintptr_t info );
-	bool wait( synchronization_lock(L) & this, Duration duration );
-	bool wait( synchronization_lock(L) & this, uintptr_t info, Duration duration );
-	bool wait( synchronization_lock(L) & this, Time time );
-	bool wait( synchronization_lock(L) & this, uintptr_t info, Time time );
-
-	// wait functions that are passed a lock
-	bool notify_one( synchronization_lock(L) & this, L & l );
-	bool notify_all( synchronization_lock(L) & this, L & l );
-
-	void wait( synchronization_lock(L) & this, L & l );
-	void wait( synchronization_lock(L) & this, L & l, uintptr_t info );
-	bool wait( synchronization_lock(L) & this, L & l, Duration duration );
-	bool wait( synchronization_lock(L) & this, L & l, uintptr_t info, Duration duration );
-	bool wait( synchronization_lock(L) & this, L & l, Time time );
-	bool wait( synchronization_lock(L) & this, L & l, uintptr_t info, Time time );
-}
-
-///////////////////////////////////////////////////////////////////
-//// condition lock alternative approach
-///////////////////////////////////////////////////////////////////
-
-
-///////////////////////////////////////////////////////////////////
-//// is_simple_lock
-///////////////////////////////////////////////////////////////////
-
-trait is_simple_lock(dtype L | sized(L)) {
-	void lock( L & );		// For synchronization locks to use when acquiring
-	void unlock( L & );    // For synchronization locks to use when releasing
-	size_t get_recursion_count( L & ); // to get recursion count for cond lock to reset after waking
-	void set_recursion_count( L &, size_t recursion ); // to set recursion count after getting signalled;
-};
-
-forall(dtype L | is_simple_lock(L)) {
-	struct condition_lock {
-		// Spin lock used for mutual exclusion
-		mutex_lock m_lock;
-
-		condition_variable( mutex_lock ) c_var;
+		info_thread(L) ** i;
 	};
 
-	void ?{}( condition_lock(L) & this );
-	void ^?{}( condition_lock(L) & this );
+	void ?{}( alarm_node_wrap(L) & this, $thread * thrd, Time alarm, Duration period, Alarm_Callback callback );
+	void ^?{}( alarm_node_wrap(L) & this );
 
-	bool notify_one( condition_lock(L) & this );
-	bool notify_all( condition_lock(L) & this );
-	void wait( condition_lock(L) & this, L & l );
+	void alarm_node_callback( alarm_node_wrap(L) & this );
+
+	void alarm_node_wrap_cast( alarm_node_t & a );
+
+	bool notify_one( condition_variable(L) & this );
+	bool notify_all( condition_variable(L) & this );
+
+	uintptr_t front( condition_variable(L) & this );
+
+	bool empty( condition_variable(L) & this );
+	int counter( condition_variable(L) & this );
+
+	// TODO: look into changing timout routines to return bool showing if signalled or woken by kernel
+	void wait( condition_variable(L) & this );
+	void wait( condition_variable(L) & this, uintptr_t info );
+	void wait( condition_variable(L) & this, Duration duration );
+	void wait( condition_variable(L) & this, uintptr_t info, Duration duration );
+	void wait( condition_variable(L) & this, Time time );
+	void wait( condition_variable(L) & this, uintptr_t info, Time time );
+
+	void wait( condition_variable(L) & this, L & l );
+	void wait( condition_variable(L) & this, L & l, uintptr_t info );
+	void wait( condition_variable(L) & this, L & l, Duration duration );
+	void wait( condition_variable(L) & this, L & l, uintptr_t info, Duration duration );
+	void wait( condition_variable(L) & this, L & l, Time time );
+	void wait( condition_variable(L) & this, L & l, uintptr_t info, Time time );
 }
Index: libcfa/src/concurrency/monitor.cfa
===================================================================
--- libcfa/src/concurrency/monitor.cfa	(revision 55acc3a4fa45a500fa88a39b850dd11a68275702)
+++ libcfa/src/concurrency/monitor.cfa	(revision 18f0b707a42dceef0ea6f927ff708cc6bd645627)
@@ -82,8 +82,8 @@
 // Enter single monitor
 static void __enter( $monitor * this, const __monitor_group_t & group ) {
+	$thread * thrd = active_thread();
+
 	// Lock the monitor spinlock
 	lock( this->lock __cfaabi_dbg_ctx2 );
-	// Interrupts disable inside critical section
-	$thread * thrd = kernelTLS.this_thread;
 
 	__cfaabi_dbg_print_safe( "Kernel : %10p Entering mon %p (%p)\n", thrd, this, this->owner);
@@ -126,5 +126,5 @@
 		__cfaabi_dbg_print_safe( "Kernel : %10p Entered  mon %p\n", thrd, this);
 
-		/* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+		/* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
 		return;
 	}
@@ -132,5 +132,5 @@
 	__cfaabi_dbg_print_safe( "Kernel : %10p Entered  mon %p\n", thrd, this);
 
-	/* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+	/* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
 	/* paranoid */ verify( this->lock.lock );
 
@@ -141,8 +141,8 @@
 
 static void __dtor_enter( $monitor * this, fptr_t func, bool join ) {
+	$thread * thrd = active_thread();
+
 	// Lock the monitor spinlock
 	lock( this->lock __cfaabi_dbg_ctx2 );
-	// Interrupts disable inside critical section
-	$thread * thrd = kernelTLS.this_thread;
 
 	__cfaabi_dbg_print_safe( "Kernel : %10p Entering dtor for mon %p (%p)\n", thrd, this, this->owner);
@@ -155,5 +155,5 @@
 		__set_owner( this, thrd );
 
-		verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+		verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
 
 		unlock( this->lock );
@@ -174,5 +174,5 @@
 		this->owner = thrd;
 
-		verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+		verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
 
 		unlock( this->lock );
@@ -200,5 +200,5 @@
 
 		// Release the next thread
-		/* paranoid */ verifyf( urgent->owner->waiting_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+		/* paranoid */ verifyf( urgent->owner->waiting_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
 		unpark( urgent->owner->waiting_thread );
 
@@ -207,5 +207,5 @@
 
 		// Some one was waiting for us, enter
-		/* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+		/* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
 	}
 	else {
@@ -224,5 +224,5 @@
 		park();
 
-		/* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+		/* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
 		return;
 	}
@@ -237,7 +237,7 @@
 	lock( this->lock __cfaabi_dbg_ctx2 );
 
-	__cfaabi_dbg_print_safe( "Kernel : %10p Leaving mon %p (%p)\n", kernelTLS.this_thread, this, this->owner);
-
-	/* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+	__cfaabi_dbg_print_safe( "Kernel : %10p Leaving mon %p (%p)\n", active_thread(), this, this->owner);
+
+	/* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
 
 	// Leaving a recursion level, decrement the counter
@@ -270,6 +270,6 @@
 void __dtor_leave( $monitor * this, bool join ) {
 	__cfaabi_dbg_debug_do(
-		if( TL_GET( this_thread ) != this->owner ) {
-			abort( "Destroyed monitor %p has inconsistent owner, expected %p got %p.\n", this, TL_GET( this_thread ), this->owner);
+		if( active_thread() != this->owner ) {
+			abort( "Destroyed monitor %p has inconsistent owner, expected %p got %p.\n", this, active_thread(), this->owner);
 		}
 		if( this->recursion != 1  && !join ) {
@@ -287,5 +287,5 @@
 	/* paranoid */ verify( this->lock.lock );
 	/* paranoid */ verifyf( thrd == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", thrd, this->owner, this->recursion, this );
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 	/* paranoid */ verify( thrd->state == Halted );
 	/* paranoid */ verify( this->recursion == 1 );
@@ -303,5 +303,5 @@
 	// Unpark the next owner if needed
 	/* paranoid */ verifyf( !new_owner || new_owner == this->owner, "Expected owner to be %p, got %p (m: %p)", new_owner, this->owner, this );
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 	/* paranoid */ verify( thrd->state == Halted );
 	unpark( new_owner );
@@ -327,5 +327,5 @@
 // Sorts monitors before entering
 void ?{}( monitor_guard_t & this, $monitor * m [], __lock_size_t count, fptr_t func ) {
-	$thread * thrd = TL_GET( this_thread );
+	$thread * thrd = active_thread();
 
 	// Store current array
@@ -362,5 +362,5 @@
 
 	// Restore thread context
-	TL_GET( this_thread )->monitors = this.prev;
+	active_thread()->monitors = this.prev;
 }
 
@@ -369,5 +369,5 @@
 void ?{}( monitor_dtor_guard_t & this, $monitor * m [], fptr_t func, bool join ) {
 	// optimization
-	$thread * thrd = TL_GET( this_thread );
+	$thread * thrd = active_thread();
 
 	// Store current array
@@ -392,5 +392,5 @@
 
 	// Restore thread context
-	TL_GET( this_thread )->monitors = this.prev;
+	active_thread()->monitors = this.prev;
 }
 
@@ -432,5 +432,5 @@
 
 	// Create the node specific to this wait operation
-	wait_ctx( TL_GET( this_thread ), user_info );
+	wait_ctx( active_thread(), user_info );
 
 	// Append the current wait operation to the ones already queued on the condition
@@ -483,5 +483,5 @@
 	//Some more checking in debug
 	__cfaabi_dbg_debug_do(
-		$thread * this_thrd = TL_GET( this_thread );
+		$thread * this_thrd = active_thread();
 		if ( this.monitor_count != this_thrd->monitors.size ) {
 			abort( "Signal on condition %p made with different number of monitor(s), expected %zi got %zi", &this, this.monitor_count, this_thrd->monitors.size );
@@ -531,5 +531,5 @@
 
 	// Create the node specific to this wait operation
-	wait_ctx_primed( kernelTLS.this_thread, 0 )
+	wait_ctx_primed( active_thread(), 0 )
 
 	//save contexts
@@ -630,5 +630,5 @@
 
 				// Create the node specific to this wait operation
-				wait_ctx_primed( kernelTLS.this_thread, 0 );
+				wait_ctx_primed( active_thread(), 0 );
 
 				// Save monitor states
@@ -682,5 +682,5 @@
 
 	// Create the node specific to this wait operation
-	wait_ctx_primed( kernelTLS.this_thread, 0 );
+	wait_ctx_primed( active_thread(), 0 );
 
 	monitor_save;
@@ -688,5 +688,5 @@
 
 	for( __lock_size_t i = 0; i < count; i++) {
-		verify( monitors[i]->owner == kernelTLS.this_thread );
+		verify( monitors[i]->owner == active_thread() );
 	}
 
@@ -724,10 +724,10 @@
 static inline void __set_owner( $monitor * monitors [], __lock_size_t count, $thread * owner ) {
 	/* paranoid */ verify ( monitors[0]->lock.lock );
-	/* paranoid */ verifyf( monitors[0]->owner == kernelTLS.this_thread, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, monitors[0]->owner, monitors[0]->recursion, monitors[0] );
+	/* paranoid */ verifyf( monitors[0]->owner == active_thread(), "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), monitors[0]->owner, monitors[0]->recursion, monitors[0] );
 	monitors[0]->owner        = owner;
 	monitors[0]->recursion    = 1;
 	for( __lock_size_t i = 1; i < count; i++ ) {
 		/* paranoid */ verify ( monitors[i]->lock.lock );
-		/* paranoid */ verifyf( monitors[i]->owner == kernelTLS.this_thread, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, monitors[i]->owner, monitors[i]->recursion, monitors[i] );
+		/* paranoid */ verifyf( monitors[i]->owner == active_thread(), "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), monitors[i]->owner, monitors[i]->recursion, monitors[i] );
 		monitors[i]->owner        = owner;
 		monitors[i]->recursion    = 0;
@@ -755,5 +755,5 @@
 		//regardless of if we are ready to baton pass,
 		//we need to set the monitor as in use
-		/* paranoid */ verifyf( !this->owner || kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+		/* paranoid */ verifyf( !this->owner || active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
 		__set_owner( this,  urgent->owner->waiting_thread );
 
@@ -764,5 +764,5 @@
 	// Get the next thread in the entry_queue
 	$thread * new_owner = pop_head( this->entry_queue );
-	/* paranoid */ verifyf( !this->owner || kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+	/* paranoid */ verifyf( !this->owner || active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
 	/* paranoid */ verify( !new_owner || new_owner->link.next == 0p );
 	__set_owner( this, new_owner );
@@ -892,5 +892,5 @@
 
 static inline void brand_condition( condition & this ) {
-	$thread * thrd = TL_GET( this_thread );
+	$thread * thrd = active_thread();
 	if( !this.monitors ) {
 		// __cfaabi_dbg_print_safe( "Branding\n" );
Index: libcfa/src/concurrency/mutex.cfa
===================================================================
--- libcfa/src/concurrency/mutex.cfa	(revision 55acc3a4fa45a500fa88a39b850dd11a68275702)
+++ libcfa/src/concurrency/mutex.cfa	(revision 18f0b707a42dceef0ea6f927ff708cc6bd645627)
@@ -40,5 +40,5 @@
 	lock( lock __cfaabi_dbg_ctx2 );
 	if( is_locked ) {
-		append( blocked_threads, kernelTLS.this_thread );
+		append( blocked_threads, active_thread() );
 		unlock( lock );
 		park();
@@ -86,14 +86,14 @@
 	lock( lock __cfaabi_dbg_ctx2 );
 	if( owner == 0p ) {
-		owner = kernelTLS.this_thread;
+		owner = active_thread();
 		recursion_count = 1;
 		unlock( lock );
 	}
-	else if( owner == kernelTLS.this_thread ) {
+	else if( owner == active_thread() ) {
 		recursion_count++;
 		unlock( lock );
 	}
 	else {
-		append( blocked_threads, kernelTLS.this_thread );
+		append( blocked_threads, active_thread() );
 		unlock( lock );
 		park();
@@ -105,9 +105,9 @@
 	lock( lock __cfaabi_dbg_ctx2 );
 	if( owner == 0p ) {
-		owner = kernelTLS.this_thread;
+		owner = active_thread();
 		recursion_count = 1;
 		ret = true;
 	}
-	else if( owner == kernelTLS.this_thread ) {
+	else if( owner == active_thread() ) {
 		recursion_count++;
 		ret = true;
@@ -159,5 +159,5 @@
 void wait(condition_variable & this) {
 	lock( this.lock __cfaabi_dbg_ctx2 );
-	append( this.blocked_threads, kernelTLS.this_thread );
+	append( this.blocked_threads, active_thread() );
 	unlock( this.lock );
 	park();
@@ -167,5 +167,5 @@
 void wait(condition_variable & this, L & l) {
 	lock( this.lock __cfaabi_dbg_ctx2 );
-	append( this.blocked_threads, kernelTLS.this_thread );
+	append( this.blocked_threads, active_thread() );
 	unlock(l);
 	unlock(this.lock);
Index: libcfa/src/concurrency/preemption.cfa
===================================================================
--- libcfa/src/concurrency/preemption.cfa	(revision 55acc3a4fa45a500fa88a39b850dd11a68275702)
+++ libcfa/src/concurrency/preemption.cfa	(revision 18f0b707a42dceef0ea6f927ff708cc6bd645627)
@@ -10,6 +10,6 @@
 // Created On       : Mon Jun 5 14:20:42 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Wed Aug 26 16:46:03 2020
-// Update Count     : 53
+// Last Modified On : Fri Nov  6 07:42:13 2020
+// Update Count     : 54
 //
 
@@ -38,5 +38,5 @@
 // FwdDeclarations : timeout handlers
 static void preempt( processor   * this );
-static void timeout( struct __processor_id_t * id, $thread * this );
+static void timeout( $thread * this );
 
 // FwdDeclarations : Signal handlers
@@ -91,5 +91,5 @@
 
 // Tick one frame of the Discrete Event Simulation for alarms
-static void tick_preemption( struct __processor_id_t * id ) {
+static void tick_preemption(void) {
 	alarm_node_t * node = 0p;							// Used in the while loop but cannot be declared in the while condition
 	alarm_list_t * alarms = &event_kernel->alarms;		// Local copy for ease of reading
@@ -105,9 +105,12 @@
 
 		// Check if this is a kernel
-		if( node->kernel_alarm ) {
+		if( node->type == Kernel ) {
 			preempt( node->proc );
 		}
+		else if( node->type == User ) {
+			timeout( node->thrd );
+		}
 		else {
-			timeout( id, node->thrd );
+			node->callback(*node);
 		}
 
@@ -160,11 +163,97 @@
 // Kernel Signal Tools
 //=============================================================================================
-
-__cfaabi_dbg_debug_do( static thread_local void * last_interrupt = 0; )
+// In a user-level threading system, there are handful of thread-local variables where this problem occurs on the ARM.
+//
+// For each kernel thread running user-level threads, there is a flag variable to indicate if interrupts are
+// enabled/disabled for that kernel thread. Therefore, this variable is made thread local.
+//
+// For example, this code fragment sets the state of the "interrupt" variable in thread-local memory.
+//
+// _Thread_local volatile int interrupts;
+// int main() {
+//     interrupts = 0; // disable interrupts }
+//
+// which generates the following code on the ARM
+//
+// (gdb) disassemble main
+// Dump of assembler code for function main:
+//    0x0000000000000610 <+0>:	mrs	x1, tpidr_el0
+//    0x0000000000000614 <+4>:	mov	w0, #0x0                   	// #0
+//    0x0000000000000618 <+8>:	add	x1, x1, #0x0, lsl #12
+//    0x000000000000061c <+12>:	add	x1, x1, #0x10
+//    0x0000000000000620 <+16>:	str	wzr, [x1]
+//    0x0000000000000624 <+20>:	ret
+//
+// The mrs moves a pointer from coprocessor register tpidr_el0 into register x1.  Register w0 is set to 0. The two adds
+// increase the TLS pointer with the displacement (offset) 0x10, which is the location in the TSL of variable
+// "interrupts".  Finally, 0 is stored into "interrupts" through the pointer in register x1 that points into the
+// TSL. Now once x1 has the pointer to the location of the TSL for kernel thread N, it can be be preempted at a
+// user-level and the user thread is put on the user-level ready-queue. When the preempted thread gets to the front of
+// the user-level ready-queue it is run on kernel thread M. It now stores 0 into "interrupts" back on kernel thread N,
+// turning off interrupt on the wrong kernel thread.
+//
+// On the x86, the following code is generated for the same code fragment.
+//
+// (gdb) disassemble main
+// Dump of assembler code for function main:
+//    0x0000000000400420 <+0>:	movl   $0x0,%fs:0xfffffffffffffffc
+//    0x000000000040042c <+12>:	xor    %eax,%eax
+//    0x000000000040042e <+14>:	retq
+//
+// and there is base-displacement addressing used to atomically reset variable "interrupts" off of the TSL pointer in
+// register "fs".
+//
+// Hence, the ARM has base-displacement address for the general purpose registers, BUT not to the coprocessor
+// registers. As a result, generating the address for the write into variable "interrupts" is no longer atomic.
+//
+// Note this problem does NOT occur when just using multiple kernel threads because the preemption ALWAYS restarts the
+// thread on the same kernel thread.
+//
+// The obvious question is why does ARM use a coprocessor register to store the TSL pointer given that coprocessor
+// registers are second-class registers with respect to the instruction set. One possible answer is that they did not
+// want to dedicate one of the general registers to hold the TLS pointer and there was a free coprocessor register
+// available.
+
+//----------
+// special case for preemption since used often
+bool __preemption_enabled() {
+	// create a assembler label before
+	// marked as clobber all to avoid movement
+	asm volatile("__cfaasm_check_before:":::"memory");
+
+	// access tls as normal
+	bool enabled = __cfaabi_tls.preemption_state.enabled;
+
+	// create a assembler label after
+	// marked as clobber all to avoid movement
+	asm volatile("__cfaasm_check_after:":::"memory");
+	return enabled;
+}
+
+//----------
+// Get data from the TLS block
+uintptr_t __cfatls_get( unsigned long int offset ) __attribute__((__noinline__)); //no inline to avoid problems
+uintptr_t __cfatls_get( unsigned long int offset ) {
+	// create a assembler label before
+	// marked as clobber all to avoid movement
+	asm volatile("__cfaasm_get_before:":::"memory");
+
+	// access tls as normal (except for pointer arithmetic)
+	uintptr_t val = *(uintptr_t*)((uintptr_t)&__cfaabi_tls + offset);
+
+	// create a assembler label after
+	// marked as clobber all to avoid movement
+	asm volatile("__cfaasm_get_after:":::"memory");
+	return val;
+}
 
 extern "C" {
 	// Disable interrupts by incrementing the counter
 	void disable_interrupts() {
-		with( kernelTLS.preemption_state ) {
+		// create a assembler label before
+		// marked as clobber all to avoid movement
+		asm volatile("__cfaasm_disable_before:":::"memory");
+
+		with( __cfaabi_tls.preemption_state ) {
 			#if GCC_VERSION > 50000
 			static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free");
@@ -183,4 +272,8 @@
 			verify( new_val < 65_000u );              // If this triggers someone is disabling interrupts without enabling them
 		}
+
+		// create a assembler label after
+		// marked as clobber all to avoid movement
+		asm volatile("__cfaasm_disable_after:":::"memory");
 	}
 
@@ -188,8 +281,12 @@
 	// If counter reaches 0, execute any pending __cfactx_switch
 	void enable_interrupts( __cfaabi_dbg_ctx_param ) {
-		processor   * proc = kernelTLS.this_processor; // Cache the processor now since interrupts can start happening after the atomic store
+		// create a assembler label before
+		// marked as clobber all to avoid movement
+		asm volatile("__cfaasm_enable_before:":::"memory");
+
+		processor   * proc = __cfaabi_tls.this_processor; // Cache the processor now since interrupts can start happening after the atomic store
 		/* paranoid */ verify( proc );
 
-		with( kernelTLS.preemption_state ){
+		with( __cfaabi_tls.preemption_state ){
 			unsigned short prev = disable_count;
 			disable_count -= 1;
@@ -218,4 +315,8 @@
 		// For debugging purposes : keep track of the last person to enable the interrupts
 		__cfaabi_dbg_debug_do( proc->last_enable = caller; )
+
+		// create a assembler label after
+		// marked as clobber all to avoid movement
+		asm volatile("__cfaasm_enable_after:":::"memory");
 	}
 
@@ -223,19 +324,27 @@
 	// Don't execute any pending __cfactx_switch even if counter reaches 0
 	void enable_interrupts_noPoll() {
-		unsigned short prev = kernelTLS.preemption_state.disable_count;
-		kernelTLS.preemption_state.disable_count -= 1;
+		// create a assembler label before
+		// marked as clobber all to avoid movement
+		asm volatile("__cfaasm_nopoll_before:":::"memory");
+
+		unsigned short prev = __cfaabi_tls.preemption_state.disable_count;
+		__cfaabi_tls.preemption_state.disable_count -= 1;
 		verifyf( prev != 0u, "Incremented from %u\n", prev );                     // If this triggers someone is enabled already enabled interrupts
 		if( prev == 1 ) {
 			#if GCC_VERSION > 50000
-			static_assert(__atomic_always_lock_free(sizeof(kernelTLS.preemption_state.enabled), &kernelTLS.preemption_state.enabled), "Must be lock-free");
+			static_assert(__atomic_always_lock_free(sizeof(__cfaabi_tls.preemption_state.enabled), &__cfaabi_tls.preemption_state.enabled), "Must be lock-free");
 			#endif
 			// Set enabled flag to true
 			// should be atomic to avoid preemption in the middle of the operation.
 			// use memory order RELAXED since there is no inter-thread on this variable requirements
-			__atomic_store_n(&kernelTLS.preemption_state.enabled, true, __ATOMIC_RELAXED);
+			__atomic_store_n(&__cfaabi_tls.preemption_state.enabled, true, __ATOMIC_RELAXED);
 
 			// Signal the compiler that a fence is needed but only for signal handlers
 			__atomic_signal_fence(__ATOMIC_RELEASE);
 		}
+
+		// create a assembler label after
+		// marked as clobber all to avoid movement
+		asm volatile("__cfaasm_nopoll_after:":::"memory");
 	}
 }
@@ -270,9 +379,9 @@
 
 // reserved for future use
-static void timeout( struct __processor_id_t * id, $thread * this ) {
+static void timeout( $thread * this ) {
 	#if !defined( __CFA_NO_STATISTICS__ )
-		kernelTLS.this_stats = this->curr_cluster->stats;
+		kernelTLS().this_stats = this->curr_cluster->stats;
 	#endif
-	__unpark( id, this );
+	unpark( this );
 }
 
@@ -283,8 +392,8 @@
 static inline bool preemption_ready() {
 	// Check if preemption is safe
-	bool ready = kernelTLS.preemption_state.enabled && ! kernelTLS.preemption_state.in_progress;
+	bool ready = __cfaabi_tls.preemption_state.enabled && ! __cfaabi_tls.preemption_state.in_progress;
 
 	// Adjust the pending flag accordingly
-	kernelTLS.this_processor->pending_preemption = !ready;
+	__cfaabi_tls.this_processor->pending_preemption = !ready;
 	return ready;
 }
@@ -300,6 +409,6 @@
 
 	// Start with preemption disabled until ready
-	kernelTLS.preemption_state.enabled = false;
-	kernelTLS.preemption_state.disable_count = 1;
+	__cfaabi_tls.preemption_state.enabled = false;
+	__cfaabi_tls.preemption_state.disable_count = 1;
 
 	// Initialize the event kernel
@@ -359,9 +468,51 @@
 // Kernel Signal Handlers
 //=============================================================================================
+struct asm_region {
+	void * before;
+	void * after;
+};
+
+//-----------------------------------------------------------------------------
+// Some assembly required
+#if defined( __i386 )
+	#define __cfaasm_label( label ) \
+		({ \
+			struct asm_region region; \
+			asm( \
+				"movl $__cfaasm_" #label "_before, %[vb]\n\t" \
+				"movl $__cfaasm_" #label "_after , %[va]\n\t" \
+				 : [vb]"=r"(region.before), [vb]"=r"(region.before) \
+			); \
+			region; \
+		});
+#elif defined( __x86_64 )
+	#ifdef __PIC__
+		#define PLT "@PLT"
+	#else
+		#define PLT ""
+	#endif
+	#define __cfaasm_label( label ) \
+		({ \
+			struct asm_region region; \
+			asm( \
+				"movq $__cfaasm_" #label "_before" PLT ", %[vb]\n\t" \
+				"movq $__cfaasm_" #label "_after"  PLT ", %[va]\n\t" \
+				 : [vb]"=r"(region.before), [va]"=r"(region.after) \
+			); \
+			region; \
+		});
+#elif defined( __aarch64__ )
+	#error __cfaasm_label undefined for arm
+#else
+	#error unknown hardware architecture
+#endif
+
+__cfaabi_dbg_debug_do( static thread_local void * last_interrupt = 0; )
 
 // Context switch signal handler
 // Receives SIGUSR1 signal and causes the current thread to yield
 static void sigHandler_ctxSwitch( __CFA_SIGPARMS__ ) {
-	__cfaabi_dbg_debug_do( last_interrupt = (void *)(cxt->uc_mcontext.CFA_REG_IP); )
+	void * ip = (void *)(cxt->uc_mcontext.CFA_REG_IP);
+	__cfaabi_dbg_debug_do( last_interrupt = ip; )
 
 	// SKULLDUGGERY: if a thread creates a processor and the immediately deletes it,
@@ -369,9 +520,9 @@
 	// before the kernel thread has even started running. When that happens, an interrupt
 	// with a null 'this_processor' will be caught, just ignore it.
-	if(! kernelTLS.this_processor ) return;
+	if(! __cfaabi_tls.this_processor ) return;
 
 	choose(sfp->si_value.sival_int) {
 		case PREEMPT_NORMAL   : ;// Normal case, nothing to do here
-		case PREEMPT_TERMINATE: verify( __atomic_load_n( &kernelTLS.this_processor->do_terminate, __ATOMIC_SEQ_CST ) );
+		case PREEMPT_TERMINATE: verify( __atomic_load_n( &__cfaabi_tls.this_processor->do_terminate, __ATOMIC_SEQ_CST ) );
 		default:
 			abort( "internal error, signal value is %d", sfp->si_value.sival_int );
@@ -381,8 +532,15 @@
 	if( !preemption_ready() ) { return; }
 
-	__cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", kernelTLS.this_processor, kernelTLS.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) );
+	struct asm_region region;
+	region = __cfaasm_label( get     ); if( ip >= region.before && ip <= region.after ) return;
+	region = __cfaasm_label( check   ); if( ip >= region.before && ip <= region.after ) return;
+	region = __cfaasm_label( disable ); if( ip >= region.before && ip <= region.after ) return;
+	region = __cfaasm_label( enable  ); if( ip >= region.before && ip <= region.after ) return;
+	region = __cfaasm_label( nopoll  ); if( ip >= region.before && ip <= region.after ) return;
+
+	__cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", __cfaabi_tls.this_processor, __cfaabi_tls.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) );
 
 	// Sync flag : prevent recursive calls to the signal handler
-	kernelTLS.preemption_state.in_progress = true;
+	__cfaabi_tls.preemption_state.in_progress = true;
 
 	// Clear sighandler mask before context switching.
@@ -394,7 +552,6 @@
 	}
 
-	// TODO: this should go in finish action
 	// Clear the in progress flag
-	kernelTLS.preemption_state.in_progress = false;
+	__cfaabi_tls.preemption_state.in_progress = false;
 
 	// Preemption can occur here
@@ -413,4 +570,5 @@
 	id.full_proc = false;
 	id.id = doregister(&id);
+	__cfaabi_tls.this_proc_id = &id;
 
 	// Block sigalrms to control when they arrive
@@ -458,5 +616,5 @@
 			// __cfaabi_dbg_print_safe( "Kernel : Preemption thread tick\n" );
 			lock( event_kernel->lock __cfaabi_dbg_ctx2 );
-			tick_preemption( &id );
+			tick_preemption();
 			unlock( event_kernel->lock );
 			break;
@@ -480,5 +638,5 @@
 
 void __cfaabi_check_preemption() {
-	bool ready = kernelTLS.preemption_state.enabled;
+	bool ready = __preemption_enabled();
 	if(!ready) { abort("Preemption should be ready"); }
 
@@ -503,5 +661,5 @@
 #ifdef __CFA_WITH_VERIFY__
 bool __cfaabi_dbg_in_kernel() {
-	return !kernelTLS.preemption_state.enabled;
+	return !__preemption_enabled();
 }
 #endif
Index: libcfa/src/concurrency/ready_queue.cfa
===================================================================
--- libcfa/src/concurrency/ready_queue.cfa	(revision 55acc3a4fa45a500fa88a39b850dd11a68275702)
+++ libcfa/src/concurrency/ready_queue.cfa	(revision 18f0b707a42dceef0ea6f927ff708cc6bd645627)
@@ -150,5 +150,5 @@
 //  queues or removing them.
 uint_fast32_t ready_mutate_lock( void ) with(*__scheduler_lock) {
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 
 	// Step 1 : lock global lock
@@ -166,10 +166,10 @@
 	}
 
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 	return s;
 }
 
 void ready_mutate_unlock( uint_fast32_t last_s ) with(*__scheduler_lock) {
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 
 	// Step 1 : release local locks
@@ -188,5 +188,5 @@
 	__atomic_store_n(&lock, (bool)false, __ATOMIC_RELEASE);
 
-	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+	/* paranoid */ verify( ! __preemption_enabled() );
 }
 
@@ -252,5 +252,5 @@
 		preferred =
 			//*
-			kernelTLS.this_processor ? kernelTLS.this_processor->id * 4 : -1;
+			kernelTLS().this_processor ? kernelTLS().this_processor->id * 4 : -1;
 			/*/
 			thrd->link.preferred * 4;
@@ -331,5 +331,5 @@
 		// Don't bother trying locally too much
 		int local_tries = 8;
-		preferred = kernelTLS.this_processor->id * 4;
+		preferred = kernelTLS().this_processor->id * 4;
 	#endif
 
Index: libcfa/src/concurrency/thread.cfa
===================================================================
--- libcfa/src/concurrency/thread.cfa	(revision 55acc3a4fa45a500fa88a39b850dd11a68275702)
+++ libcfa/src/concurrency/thread.cfa	(revision 18f0b707a42dceef0ea6f927ff708cc6bd645627)
@@ -127,5 +127,5 @@
 	verify( this_thrd->context.SP );
 
-	__schedule_thread( (__processor_id_t *)kernelTLS.this_processor, this_thrd);
+	__schedule_thread( this_thrd );
 	enable_interrupts( __cfaabi_dbg_ctx );
 }