Index: libcfa/src/concurrency/coroutine.hfa
===================================================================
--- libcfa/src/concurrency/coroutine.hfa	(revision b0c7419eaea3be8c569d9296836b9fdf20f008ab)
+++ libcfa/src/concurrency/coroutine.hfa	(revision 8c50aed95a361f33369903cfdf38ceaaefeffddf)
@@ -54,5 +54,5 @@
 void prime(T & cor);
 
-static inline struct coroutine_desc * active_coroutine() { return TL_GET( this_thread )->curr_cor; }
+static inline struct coroutine_desc * active_coroutine() __attribute__((const)) { return TL_GET( this_thread )->curr_cor; }
 
 //-----------------------------------------------------------------------------
@@ -73,5 +73,5 @@
 // Private wrappers for context switch and stack creation
 // Wrapper for co
-static inline void CoroutineCtxSwitch(coroutine_desc* src, coroutine_desc* dst) {
+static inline void CoroutineCtxSwitch( coroutine_desc * src, coroutine_desc * dst ) __attribute__((nonnull (1, 2))) {
 	// set state of current coroutine to inactive
 	src->state = src->state == Halted ? Halted : Inactive;
@@ -152,5 +152,5 @@
 }
 
-static inline void resume(coroutine_desc * dst) {
+static inline void resume( coroutine_desc * dst ) __attribute__((nonnull (1))) {
 	// optimization : read TLS once and reuse it
 	// Safety note: this is preemption safe since if
Index: libcfa/src/concurrency/invoke.h
===================================================================
--- libcfa/src/concurrency/invoke.h	(revision b0c7419eaea3be8c569d9296836b9fdf20f008ab)
+++ libcfa/src/concurrency/invoke.h	(revision 8c50aed95a361f33369903cfdf38ceaaefeffddf)
@@ -200,9 +200,9 @@
 	#ifdef __cforall
 	extern "Cforall" {
-		static inline thread_desc *& get_next( thread_desc & this ) {
+		static inline thread_desc *& get_next( thread_desc & this ) __attribute__((const)) {
 			return this.next;
 		}
 
-		static inline [thread_desc *&, thread_desc *& ] __get( thread_desc & this ) {
+		static inline [thread_desc *&, thread_desc *& ] __get( thread_desc & this ) /*__attribute__((const))*/ {
 			return this.node.[next, prev];
 		}
@@ -220,5 +220,5 @@
 		}
 
-		static inline bool ?==?( const __monitor_group_t & lhs, const __monitor_group_t & rhs ) {
+		static inline bool ?==?( const __monitor_group_t & lhs, const __monitor_group_t & rhs ) __attribute__((const)) {
 			if( (lhs.data != 0) != (rhs.data != 0) ) return false;
 			if( lhs.size != rhs.size ) return false;
Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision b0c7419eaea3be8c569d9296836b9fdf20f008ab)
+++ libcfa/src/concurrency/kernel.cfa	(revision 8c50aed95a361f33369903cfdf38ceaaefeffddf)
@@ -110,6 +110,6 @@
 //-----------------------------------------------------------------------------
 //Start and stop routine for the kernel, declared first to make sure they run first
-static void kernel_startup(void)  __attribute__(( constructor( STARTUP_PRIORITY_KERNEL ) ));
-static void kernel_shutdown(void) __attribute__(( destructor ( STARTUP_PRIORITY_KERNEL ) ));
+static void __kernel_startup (void) __attribute__(( constructor( STARTUP_PRIORITY_KERNEL ) ));
+static void __kernel_shutdown(void) __attribute__(( destructor ( STARTUP_PRIORITY_KERNEL ) ));
 
 //-----------------------------------------------------------------------------
@@ -208,5 +208,6 @@
 }
 
-static void start(processor * this);
+static void * CtxInvokeProcessor(void * arg);
+
 void ?{}(processor & this, const char * name, cluster & cltr) with( this ) {
 	this.name = name;
@@ -221,5 +222,9 @@
 	idleLock{};
 
-	start( &this );
+	__cfaabi_dbg_print_safe("Kernel : Starting core %p\n", &this);
+
+	this.stack = __create_pthread( &this.kernel_thread, CtxInvokeProcessor, (void *)&this );
+
+	__cfaabi_dbg_print_safe("Kernel : core %p started\n", &this);
 }
 
@@ -259,7 +264,7 @@
 // Kernel Scheduling logic
 //=============================================================================================
-static thread_desc * nextThread(cluster * this);
-static void runThread(processor * this, thread_desc * dst);
-static void halt(processor * this);
+static thread_desc * __next_thread(cluster * this);
+static void __run_thread(processor * this, thread_desc * dst);
+static void __halt(processor * this);
 
 //Main of the processor contexts
@@ -284,5 +289,5 @@
 		thread_desc * readyThread = 0p;
 		for( unsigned int spin_count = 0; ! __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST); spin_count++ ) {
-			readyThread = nextThread( this->cltr );
+			readyThread = __next_thread( this->cltr );
 
 			if(readyThread) {
@@ -291,5 +296,5 @@
 				/* paranoid */ verifyf( readyThread->next == 0p, "Expected null got %p", readyThread->next );
 
-				runThread(this, readyThread);
+				__run_thread(this, readyThread);
 
 				/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
@@ -298,5 +303,5 @@
 			} else {
 				// spin(this, &spin_count);
-				// halt(this);
+				__halt(this);
 			}
 		}
@@ -318,5 +323,5 @@
 // runThread runs a thread by context switching
 // from the processor coroutine to the target thread
-static void runThread(processor * this, thread_desc * thrd_dst) {
+static void __run_thread(processor * this, thread_desc * thrd_dst) {
 	coroutine_desc * proc_cor = get_coroutine(this->runner);
 
@@ -359,5 +364,5 @@
 		if(unlikely(thrd_dst->preempted != __NO_PREEMPTION)) {
 			// The thread was preempted, reschedule it and reset the flag
-			ScheduleThread( thrd_dst );
+			__schedule_thread( thrd_dst );
 			break RUNNING;
 		}
@@ -460,5 +465,5 @@
 } // Abort
 
-void * create_pthread( pthread_t * pthread, void * (*start)(void *), void * arg ) {
+void * __create_pthread( pthread_t * pthread, void * (*start)(void *), void * arg ) {
 	pthread_attr_t attr;
 
@@ -488,14 +493,6 @@
 }
 
-static void start(processor * this) {
-	__cfaabi_dbg_print_safe("Kernel : Starting core %p\n", this);
-
-	this->stack = create_pthread( &this->kernel_thread, CtxInvokeProcessor, (void *)this );
-
-	__cfaabi_dbg_print_safe("Kernel : core %p started\n", this);
-}
-
 // KERNEL_ONLY
-void kernel_first_resume( processor * this ) {
+static void __kernel_first_resume( processor * this ) {
 	thread_desc * src = mainThread;
 	coroutine_desc * dst = get_coroutine(this->runner);
@@ -529,5 +526,5 @@
 
 // KERNEL_ONLY
-void kernel_last_resume( processor * this ) {
+static void __kernel_last_resume( processor * this ) {
 	coroutine_desc * src = &mainThread->self_cor;
 	coroutine_desc * dst = get_coroutine(this->runner);
@@ -544,5 +541,5 @@
 // Scheduler routines
 // KERNEL ONLY
-void ScheduleThread( thread_desc * thrd ) with( *thrd->curr_cluster ) {
+void __schedule_thread( thread_desc * thrd ) with( *thrd->curr_cluster ) {
 	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
 	/* paranoid */ #if defined( __CFA_WITH_VERIFY__ )
@@ -574,5 +571,5 @@
 
 // KERNEL ONLY
-static thread_desc * nextThread(cluster * this) with( *this ) {
+static thread_desc * __next_thread(cluster * this) with( *this ) {
 	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
 
@@ -600,5 +597,5 @@
 			// Wake lost the race,
 			thrd->state = Inactive;
-			ScheduleThread( thrd );
+			__schedule_thread( thrd );
 			break;
 		case Rerun:
@@ -668,5 +665,5 @@
 //-----------------------------------------------------------------------------
 // Kernel boot procedures
-static void kernel_startup(void) {
+static void __kernel_startup(void) {
 	verify( ! kernelTLS.preemption_state.enabled );
 	__cfaabi_dbg_print_safe("Kernel : Starting\n");
@@ -729,10 +726,10 @@
 	// Add the main thread to the ready queue
 	// once resume is called on mainProcessor->runner the mainThread needs to be scheduled like any normal thread
-	ScheduleThread(mainThread);
+	__schedule_thread(mainThread);
 
 	// SKULLDUGGERY: Force a context switch to the main processor to set the main thread's context to the current UNIX
 	// context. Hence, the main thread does not begin through CtxInvokeThread, like all other threads. The trick here is that
 	// mainThread is on the ready queue when this call is made.
-	kernel_first_resume( kernelTLS.this_processor );
+	__kernel_first_resume( kernelTLS.this_processor );
 
 
@@ -746,5 +743,5 @@
 }
 
-static void kernel_shutdown(void) {
+static void __kernel_shutdown(void) {
 	__cfaabi_dbg_print_safe("\n--------------------------------------------------\nKernel : Shutting down\n");
 
@@ -757,5 +754,5 @@
 	// which is currently here
 	__atomic_store_n(&mainProcessor->do_terminate, true, __ATOMIC_RELEASE);
-	kernel_last_resume( kernelTLS.this_processor );
+	__kernel_last_resume( kernelTLS.this_processor );
 	mainThread->self_cor.state = Halted;
 
@@ -783,5 +780,5 @@
 // Kernel Quiescing
 //=============================================================================================
-static void halt(processor * this) with( *this ) {
+static void __halt(processor * this) with( *this ) {
 	// verify( ! __atomic_load_n(&do_terminate, __ATOMIC_SEQ_CST) );
 
@@ -972,5 +969,5 @@
 //-----------------------------------------------------------------------------
 // Debug
-bool threading_enabled(void) {
+bool threading_enabled(void) __attribute__((const)) {
 	return true;
 }
Index: libcfa/src/concurrency/kernel.hfa
===================================================================
--- libcfa/src/concurrency/kernel.hfa	(revision b0c7419eaea3be8c569d9296836b9fdf20f008ab)
+++ libcfa/src/concurrency/kernel.hfa	(revision 8c50aed95a361f33369903cfdf38ceaaefeffddf)
@@ -108,7 +108,5 @@
 static inline void  ?{}(processor & this, const char * name) { this{name, *mainCluster }; }
 
-static inline [processor *&, processor *& ] __get( processor & this ) {
-	return this.node.[next, prev];
-}
+static inline [processor *&, processor *& ] __get( processor & this ) /*__attribute__((const))*/ { return this.node.[next, prev]; }
 
 //-----------------------------------------------------------------------------
@@ -153,10 +151,8 @@
 static inline void ?{} (cluster & this, const char * name)        { this{name, default_preemption()}; }
 
-static inline [cluster *&, cluster *& ] __get( cluster & this ) {
-	return this.node.[next, prev];
-}
+static inline [cluster *&, cluster *& ] __get( cluster & this ) /*__attribute__((const))*/ { return this.node.[next, prev]; }
 
-static inline struct processor * active_processor() { return TL_GET( this_processor ); } // UNSAFE
-static inline struct cluster   * active_cluster  () { return TL_GET( this_processor )->cltr; }
+static inline struct processor * active_processor() __attribute__((const)) { return TL_GET( this_processor ); } // UNSAFE
+static inline struct cluster   * active_cluster  () __attribute__((const)) { return TL_GET( this_processor )->cltr; }
 
 // Local Variables: //
Index: libcfa/src/concurrency/kernel_private.hfa
===================================================================
--- libcfa/src/concurrency/kernel_private.hfa	(revision b0c7419eaea3be8c569d9296836b9fdf20f008ab)
+++ libcfa/src/concurrency/kernel_private.hfa	(revision 8c50aed95a361f33369903cfdf38ceaaefeffddf)
@@ -31,5 +31,5 @@
 }
 
-void ScheduleThread( thread_desc * ) __attribute__((nonnull (1)));
+void __schedule_thread( thread_desc * ) __attribute__((nonnull (1)));
 
 //Block current thread and release/wake-up the following resources
@@ -40,5 +40,5 @@
 void main(processorCtx_t *);
 
-void * create_pthread( pthread_t *, void * (*)(void *), void * );
+void * __create_pthread( pthread_t *, void * (*)(void *), void * );
 
 static inline void wake_fast(processor * this) {
@@ -85,5 +85,5 @@
 #define KERNEL_STORAGE(T,X) static char storage_##X[sizeof(T)]
 
-static inline uint32_t tls_rand() {
+static inline uint32_t __tls_rand() {
 	kernelTLS.rand_seed ^= kernelTLS.rand_seed << 6;
 	kernelTLS.rand_seed ^= kernelTLS.rand_seed >> 21;
Index: libcfa/src/concurrency/preemption.cfa
===================================================================
--- libcfa/src/concurrency/preemption.cfa	(revision b0c7419eaea3be8c569d9296836b9fdf20f008ab)
+++ libcfa/src/concurrency/preemption.cfa	(revision 8c50aed95a361f33369903cfdf38ceaaefeffddf)
@@ -306,5 +306,5 @@
 	signal_block( SIGALRM );
 
-	alarm_stack = create_pthread( &alarm_thread, alarm_loop, 0p );
+	alarm_stack = __create_pthread( &alarm_thread, alarm_loop, 0p );
 }
 
Index: libcfa/src/concurrency/thread.cfa
===================================================================
--- libcfa/src/concurrency/thread.cfa	(revision b0c7419eaea3be8c569d9296836b9fdf20f008ab)
+++ libcfa/src/concurrency/thread.cfa	(revision 8c50aed95a361f33369903cfdf38ceaaefeffddf)
@@ -22,11 +22,4 @@
 #define __CFA_INVOKE_PRIVATE__
 #include "invoke.h"
-
-extern "C" {
-	#include <fenv.h>
-	#include <stddef.h>
-}
-
-//extern volatile thread_local processor * this_processor;
 
 //-----------------------------------------------------------------------------
@@ -56,4 +49,22 @@
 }
 
+//-----------------------------------------------------------------------------
+// Starting and stopping threads
+forall( dtype T | is_thread(T) )
+void __thrd_start( T & this, void (*main_p)(T &) ) {
+	thread_desc * this_thrd = get_thread(this);
+
+	disable_interrupts();
+	CtxStart(main_p, get_coroutine(this), this, CtxInvokeThread);
+
+	this_thrd->context.[SP, FP] = this_thrd->self_cor.context.[SP, FP];
+	verify( this_thrd->context.SP );
+
+	__schedule_thread(this_thrd);
+	enable_interrupts( __cfaabi_dbg_ctx );
+}
+
+//-----------------------------------------------------------------------------
+// Support for threads that don't ues the thread keyword
 forall( dtype T | sized(T) | is_thread(T) | { void ?{}(T&); } )
 void ?{}( scoped(T)& this ) with( this ) {
@@ -73,20 +84,4 @@
 }
 
-//-----------------------------------------------------------------------------
-// Starting and stopping threads
-forall( dtype T | is_thread(T) )
-void __thrd_start( T & this, void (*main_p)(T &) ) {
-	thread_desc * this_thrd = get_thread(this);
-
-	disable_interrupts();
-	CtxStart(main_p, get_coroutine(this), this, CtxInvokeThread);
-
-	this_thrd->context.[SP, FP] = this_thrd->self_cor.context.[SP, FP];
-	verify( this_thrd->context.SP );
-
-	ScheduleThread(this_thrd);
-	enable_interrupts( __cfaabi_dbg_ctx );
-}
-
 // Local Variables: //
 // mode: c //
Index: libcfa/src/concurrency/thread.hfa
===================================================================
--- libcfa/src/concurrency/thread.hfa	(revision b0c7419eaea3be8c569d9296836b9fdf20f008ab)
+++ libcfa/src/concurrency/thread.hfa	(revision 8c50aed95a361f33369903cfdf38ceaaefeffddf)
@@ -31,24 +31,19 @@
 };
 
-#define DECL_THREAD(X) thread_desc* get_thread(X& this) { return &this.__thrd; } void main(X& this)
+// define that satisfies the trait without using the thread keyword
+#define DECL_THREAD(X) thread_desc* get_thread(X& this) __attribute__((const)) { return &this.__thrd; } void main(X& this)
+
+// Inline getters for threads/coroutines/monitors
+forall( dtype T | is_thread(T) )
+static inline coroutine_desc* get_coroutine(T & this) __attribute__((const)) { return &get_thread(this)->self_cor; }
 
 forall( dtype T | is_thread(T) )
-static inline coroutine_desc* get_coroutine(T & this) {
-	return &get_thread(this)->self_cor;
-}
+static inline monitor_desc  * get_monitor  (T & this) __attribute__((const)) { return &get_thread(this)->self_mon; }
 
-forall( dtype T | is_thread(T) )
-static inline monitor_desc* get_monitor(T & this) {
-	return &get_thread(this)->self_mon;
-}
+static inline coroutine_desc* get_coroutine(thread_desc * this) __attribute__((const)) { return &this->self_cor; }
+static inline monitor_desc  * get_monitor  (thread_desc * this) __attribute__((const)) { return &this->self_mon; }
 
-static inline coroutine_desc* get_coroutine(thread_desc * this) {
-	return &this->self_cor;
-}
-
-static inline monitor_desc* get_monitor(thread_desc * this) {
-	return &this->self_mon;
-}
-
+//-----------------------------------------------------------------------------
+// forward declarations needed for threads
 extern struct cluster * mainCluster;