Index: libcfa/src/concurrency/clib/cfathread.cfa
===================================================================
--- libcfa/src/concurrency/clib/cfathread.cfa	(revision e0c072c3bbbe8115f3120141f3503d077cada4be)
+++ libcfa/src/concurrency/clib/cfathread.cfa	(revision a5e7233ed9642389f0f296f388965729df83c758)
@@ -21,4 +21,12 @@
 
 #include "cfathread.h"
+
+extern void ?{}(processor &, const char[], cluster &, $thread *);
+extern "C" {
+      extern void __cfactx_invoke_thread(void (*main)(void *), void * this);
+}
+
+//================================================================================
+// Thread run y the C Interface
 
 struct cfathread_object {
@@ -65,7 +73,62 @@
 }
 
-processor * procs = 0p;
-int proc_cnt = 1;
-
+//================================================================================
+// Special Init Thread responsible for the initialization or processors
+struct __cfainit {
+	$thread self;
+	void (*init)( void * );
+	void * arg;
+};
+void main(__cfainit & this);
+void ^?{}(__cfainit & mutex this);
+
+static inline $thread * get_thread( __cfainit & this ) { return &this.self; }
+
+typedef ThreadCancelled(__cfainit) __cfainit_exception;
+typedef ThreadCancelled_vtable(__cfainit) __cfainit_vtable;
+
+void defaultResumptionHandler(ThreadCancelled(__cfainit) & except) {
+	abort | "The init thread was cancelled";
+}
+
+__cfainit_vtable ___cfainit_vtable_instance;
+
+__cfainit_vtable const & get_exception_vtable(__cfainit_exception *) {
+	return ___cfainit_vtable_instance;
+}
+
+static void ?{}( __cfainit & this, void (*init)( void * ), void * arg ) {
+	this.init = init;
+	this.arg = arg;
+	((thread&)this){"Processir Init"};
+
+	// Don't use __thrd_start! just prep the context manually
+	$thread * this_thrd = get_thread(this);
+	void (*main_p)(__cfainit &) = main;
+
+	disable_interrupts();
+	__cfactx_start(main_p, get_coroutine(this), this, __cfactx_invoke_thread);
+
+	this_thrd->context.[SP, FP] = this_thrd->self_cor.context.[SP, FP];
+	/* paranoid */ verify( this_thrd->context.SP );
+
+	this_thrd->state = Ready;
+	enable_interrupts( __cfaabi_dbg_ctx );
+}
+
+void ^?{}(__cfainit & mutex this) {
+	^(this.self){};
+}
+
+void main( __cfainit & this ) {
+	__attribute__((unused)) void * const thrd_obj = (void*)&this;
+	__attribute__((unused)) void * const thrd_hdl = (void*)active_thread();
+	/* paranoid */ verify( thrd_obj == thrd_hdl );
+
+	this.init( this.arg );
+}
+
+//================================================================================
+// Main Api
 extern "C" {
 	int cfathread_cluster_create(cfathread_cluster_t * cl) __attribute__((nonnull(1))) {
@@ -79,7 +142,18 @@
 
 	int cfathread_cluster_add_worker(cfathread_cluster_t cl, pthread_t* tid, void (*init_routine) (void *), void * arg) {
-		// processor * proc = new("C-processor", *cl, init_routine, arg);
+		__cfainit * it = 0p;
+		if(init_routine) {
+			it = alloc();
+			(*it){init_routine, arg};
+		}
 		processor * proc = alloc();
-		(*proc){ "C-processor", *cl, init_routine, arg };
+		(*proc){ "C-processor", *cl, get_thread(*it) };
+
+		// Wait for the init thread to return before continuing
+		if(it) {
+			^(*it){};
+			free(it);
+		}
+
 		if(tid) *tid = proc->kernel_thread;
 		return 0;
Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision e0c072c3bbbe8115f3120141f3503d077cada4be)
+++ libcfa/src/concurrency/kernel.cfa	(revision a5e7233ed9642389f0f296f388965729df83c758)
@@ -149,9 +149,4 @@
 	#endif
 
-	// if we need to run some special setup, now is the time to do it.
-	if(this->init.fnc) {
-		this->init.fnc(this->init.arg);
-	}
-
 	{
 		// Setup preemption data
@@ -162,4 +157,9 @@
 		#endif
 
+		// if we need to run some special setup, now is the time to do it.
+		if(this->init.thrd) {
+			this->init.thrd->curr_cluster = this->cltr;
+			__run_thread(this, this->init.thrd);
+		}
 
 		__cfadbg_print_safe(runtime_core, "Kernel : core %p started\n", this);
Index: libcfa/src/concurrency/kernel.hfa
===================================================================
--- libcfa/src/concurrency/kernel.hfa	(revision e0c072c3bbbe8115f3120141f3503d077cada4be)
+++ libcfa/src/concurrency/kernel.hfa	(revision a5e7233ed9642389f0f296f388965729df83c758)
@@ -112,6 +112,5 @@
 	// it is not a particularly safe scheme as it can make processors less homogeneous
 	struct {
-		void (*fnc) (void *);
-		void * arg;
+		$thread * thrd;
 	} init;
 
@@ -127,10 +126,10 @@
 };
 
-void  ?{}(processor & this, const char name[], struct cluster & cltr, void (*init) (void *), void * arg);
+void  ?{}(processor & this, const char name[], struct cluster & cltr);
 void ^?{}(processor & this);
 
-static inline void  ?{}(processor & this)                        { this{ "Anonymous Processor", *mainCluster, 0p, 0p}; }
-static inline void  ?{}(processor & this, struct cluster & cltr) { this{ "Anonymous Processor", cltr, 0p, 0p}; }
-static inline void  ?{}(processor & this, const char name[])     { this{name, *mainCluster, 0p, 0p }; }
+static inline void  ?{}(processor & this)                        { this{ "Anonymous Processor", *mainCluster}; }
+static inline void  ?{}(processor & this, struct cluster & cltr) { this{ "Anonymous Processor", cltr}; }
+static inline void  ?{}(processor & this, const char name[])     { this{name, *mainCluster}; }
 
 DLISTED_MGD_IMPL_OUT(processor)
Index: libcfa/src/concurrency/kernel/startup.cfa
===================================================================
--- libcfa/src/concurrency/kernel/startup.cfa	(revision e0c072c3bbbe8115f3120141f3503d077cada4be)
+++ libcfa/src/concurrency/kernel/startup.cfa	(revision a5e7233ed9642389f0f296f388965729df83c758)
@@ -73,5 +73,5 @@
 static void __kernel_first_resume( processor * this );
 static void __kernel_last_resume ( processor * this );
-static void init(processor & this, const char name[], cluster & _cltr, void (*fnc) (void *), void * arg);
+static void init(processor & this, const char name[], cluster & _cltr, $thread * initT);
 static void deinit(processor & this);
 static void doregister( struct cluster & cltr );
@@ -198,5 +198,5 @@
 		( this.terminated ){};
 		( this.runner ){};
-		init( this, "Main Processor", *mainCluster, 0p, 0p );
+		init( this, "Main Processor", *mainCluster, 0p );
 		kernel_thread = pthread_self();
 
@@ -452,5 +452,5 @@
 }
 
-static void init(processor & this, const char name[], cluster & _cltr, void (*fnc) (void *), void * arg) with( this ) {
+static void init(processor & this, const char name[], cluster & _cltr, $thread * initT) with( this ) {
 	this.name = name;
 	this.cltr = &_cltr;
@@ -464,6 +464,5 @@
 	this.io.dirty   = false;
 
-	this.init.fnc = fnc;
-	this.init.arg = arg;
+	this.init.thrd = initT;
 
 	this.idle = eventfd(0, 0);
@@ -516,10 +515,10 @@
 }
 
-void ?{}(processor & this, const char name[], cluster & _cltr, void (*fnc) (void *), void * arg) {
+void ?{}(processor & this, const char name[], cluster & _cltr, $thread * initT) {
 	( this.terminated ){};
 	( this.runner ){};
 
 	disable_interrupts();
-		init( this, name, _cltr, fnc, arg );
+		init( this, name, _cltr, initT );
 	enable_interrupts( __cfaabi_dbg_ctx );
 
@@ -527,5 +526,8 @@
 
 	this.stack = __create_pthread( &this.kernel_thread, __invoke_processor, (void *)&this );
-
+}
+
+void ?{}(processor & this, const char name[], cluster & _cltr) {
+	(this){name, _cltr, 0p};
 }
 
Index: tests/concurrent/.expect/clib_tls.txt
===================================================================
--- tests/concurrent/.expect/clib_tls.txt	(revision a5e7233ed9642389f0f296f388965729df83c758)
+++ tests/concurrent/.expect/clib_tls.txt	(revision a5e7233ed9642389f0f296f388965729df83c758)
@@ -0,0 +1,12 @@
+Local Init
+Local Init
+Local Init
+Local Init
+Starting Checkers
+Done
+Done
+Done
+Done
+Done
+Done
+Done
Index: tests/concurrent/clib.c
===================================================================
--- tests/concurrent/clib.c	(revision e0c072c3bbbe8115f3120141f3503d077cada4be)
+++ tests/concurrent/clib.c	(revision a5e7233ed9642389f0f296f388965729df83c758)
@@ -62,10 +62,15 @@
 	cfathread_cluster_add_worker( cl, NULL, NULL, NULL );
 	cfathread_cluster_add_worker( cl, NULL, NULL, NULL );
+
+	cfathread_attr_t attr;
+	cfathread_attr_init(&attr);
+	cfathread_attr_setcluster(&attr, cl);
+
 	cfathread_t u;
-	cfathread_create( &u, NULL, Unparker, NULL );
+	cfathread_create( &u, &attr, Unparker, NULL );
 	{
 		cfathread_t t[20];
 		for(int i = 0; i < 20; i++) {
-			cfathread_create( &t[i], NULL, Worker, NULL );
+			cfathread_create( &t[i], &attr, Worker, NULL );
 		}
 		for(int i = 0; i < 20; i++) {
@@ -75,4 +80,5 @@
 	stop = true;
 	cfathread_join(u, NULL);
+	cfathread_attr_destroy(&attr);
 	fflush(stdout);
 	_exit(0);
Index: tests/concurrent/clib_tls.c
===================================================================
--- tests/concurrent/clib_tls.c	(revision a5e7233ed9642389f0f296f388965729df83c758)
+++ tests/concurrent/clib_tls.c	(revision a5e7233ed9642389f0f296f388965729df83c758)
@@ -0,0 +1,52 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <clib/cfathread.h>
+#include <bits/defs.hfa>
+
+extern "C" {
+void _exit(int status);
+}
+
+thread_local int checkval = 0xBAADF00D;
+
+void init(void * ) {
+	printf("Local Init\n");
+	checkval = 0xFEEDFACE;
+}
+
+void * checker( void * ) {
+	for(int i = 0; i < 50; i++) {
+		if(checkval != 0xFeedFace) {
+			printf("Bad Food!\n");
+		}
+		cfathread_yield();
+	}
+	printf("Done\n");
+	return NULL;
+}
+
+int main() {
+	init(NULL);
+	cfathread_cluster_t cl = cfathread_cluster_self();
+
+	cfathread_cluster_add_worker( cl, NULL, init, NULL );
+	cfathread_cluster_add_worker( cl, NULL, init, NULL );
+	cfathread_cluster_add_worker( cl, NULL, init, NULL );
+
+	cfathread_attr_t attr;
+	cfathread_attr_init(&attr);
+	cfathread_attr_setcluster(&attr, cl);
+	{
+		printf("Starting Checkers\n");
+		cfathread_t t[7];
+		for(int i = 0; i < 7; i++) {
+			cfathread_create( &t[i], &attr, checker, NULL );
+		}
+		for(int i = 0; i < 7; i++) {
+			cfathread_join( t[i], NULL );
+		}
+	}
+	cfathread_attr_destroy(&attr);
+	fflush(stdout);
+	_exit(0);
+}
