Index: libcfa/src/concurrency/CtxSwitch-x86_64.S
===================================================================
--- libcfa/src/concurrency/CtxSwitch-x86_64.S	(revision e3fea427569271c0c15fd2ca6d3296f213856ead)
+++ libcfa/src/concurrency/CtxSwitch-x86_64.S	(revision 52142c2076aee6b5b1601fdc0e5c0f6e9ce6581e)
@@ -87,5 +87,6 @@
 CtxInvokeStub:
 	movq %rbx, %rdi
-	jmp *%r12
+	movq %r12, %rsi
+	jmp *%r13
 	.size  CtxInvokeStub, .-CtxInvokeStub
 
Index: libcfa/src/concurrency/coroutine.cfa
===================================================================
--- libcfa/src/concurrency/coroutine.cfa	(revision e3fea427569271c0c15fd2ca6d3296f213856ead)
+++ libcfa/src/concurrency/coroutine.cfa	(revision 52142c2076aee6b5b1601fdc0e5c0f6e9ce6581e)
@@ -187,9 +187,5 @@
 // is not inline (We can't inline Cforall in C)
 extern "C" {
-	void __suspend_internal(void) {
-		suspend();
-	}
-
-	void __leave_coroutine( coroutine_desc * src ) {
+	void __leave_coroutine( struct coroutine_desc * src ) {
 		coroutine_desc * starter = src->cancellation != 0 ? src->last : src->starter;
 
@@ -207,4 +203,16 @@
 		CoroutineCtxSwitch( src, starter );
 	}
+
+	struct coroutine_desc * __finish_coroutine(void) {
+		struct coroutine_desc * cor = kernelTLS.this_thread->curr_cor;
+
+		if(cor->state == Primed) {
+			suspend();
+		}
+
+		cor->state = Active;
+
+		return cor;
+	}
 }
 
Index: libcfa/src/concurrency/coroutine.hfa
===================================================================
--- libcfa/src/concurrency/coroutine.hfa	(revision e3fea427569271c0c15fd2ca6d3296f213856ead)
+++ libcfa/src/concurrency/coroutine.hfa	(revision 52142c2076aee6b5b1601fdc0e5c0f6e9ce6581e)
@@ -61,9 +61,8 @@
 // Start coroutine routines
 extern "C" {
-      forall(dtype T | is_coroutine(T))
-      void CtxInvokeCoroutine(T * this);
+	void CtxInvokeCoroutine(void (*main)(void *), void * this);
 
-      forall(dtype T | is_coroutine(T))
-      void CtxStart(T * this, void ( *invoke)(T *));
+	forall(dtype T)
+	void CtxStart(void (*main)(T &), struct coroutine_desc * cor, T & this, void (*invoke)(void (*main)(void *), void *));
 
 	extern void _CtxCoroutine_Unwind(struct _Unwind_Exception * storage, struct coroutine_desc *) __attribute__ ((__noreturn__));
@@ -129,6 +128,8 @@
 
 	if( unlikely(dst->context.SP == 0p) ) {
+		TL_GET( this_thread )->curr_cor = dst;
 		__stack_prepare(&dst->stack, 65000);
-		CtxStart(&cor, CtxInvokeCoroutine);
+		CtxStart(main, dst, cor, CtxInvokeCoroutine);
+		TL_GET( this_thread )->curr_cor = src;
 	}
 
Index: libcfa/src/concurrency/invoke.c
===================================================================
--- libcfa/src/concurrency/invoke.c	(revision e3fea427569271c0c15fd2ca6d3296f213856ead)
+++ libcfa/src/concurrency/invoke.c	(revision 52142c2076aee6b5b1601fdc0e5c0f6e9ce6581e)
@@ -29,8 +29,7 @@
 // Called from the kernel when starting a coroutine or task so must switch back to user mode.
 
-extern void __suspend_internal(void);
-extern void __leave_coroutine( struct coroutine_desc * );
-extern void __finish_creation( struct thread_desc * );
-extern void __leave_thread_monitor( struct thread_desc * this );
+extern void __leave_coroutine ( struct coroutine_desc * );
+extern struct coroutine_desc * __finish_coroutine(void);
+extern void __leave_thread_monitor();
 extern void disable_interrupts() OPTIONAL_THREAD;
 extern void enable_interrupts( __cfaabi_dbg_ctx_param );
@@ -38,15 +37,10 @@
 void CtxInvokeCoroutine(
 	void (*main)(void *),
-	struct coroutine_desc *(*get_coroutine)(void *),
 	void *this
 ) {
-	struct coroutine_desc* cor = get_coroutine( this );
+	// Finish setting up the coroutine by setting its state
+	struct coroutine_desc * cor = __finish_coroutine();
 
-	if(cor->state == Primed) {
-		__suspend_internal();
-	}
-
-	cor->state = Active;
-
+	// Call the main of the coroutine
 	main( this );
 
@@ -83,16 +77,7 @@
 
 void CtxInvokeThread(
-	void (*dtor)(void *),
 	void (*main)(void *),
-	struct thread_desc *(*get_thread)(void *),
 	void *this
 ) {
-	// Fetch the thread handle from the user defined thread structure
-	struct thread_desc* thrd = get_thread( this );
-
-	// First suspend, once the thread arrives here,
-	// the function pointer to main can be invalidated without risk
-	__finish_creation( thrd );
-
 	// Officially start the thread by enabling preemption
 	enable_interrupts( __cfaabi_dbg_ctx );
@@ -108,16 +93,14 @@
 	// The order of these 4 operations is very important
 	//Final suspend, should never return
-	__leave_thread_monitor( thrd );
+	__leave_thread_monitor();
 	__cabi_abort( "Resumed dead thread" );
 }
 
-
 void CtxStart(
 	void (*main)(void *),
-	struct coroutine_desc *(*get_coroutine)(void *),
+	struct coroutine_desc * cor,
 	void *this,
 	void (*invoke)(void *)
 ) {
-	struct coroutine_desc * cor = get_coroutine( this );
 	struct __stack_t * stack = cor->stack.storage;
 
@@ -138,5 +121,6 @@
 
 	fs->dummyReturn = NULL;
-	fs->argument[0] = this;     // argument to invoke
+	fs->argument[0] = main;     // argument to invoke
+	fs->argument[1] = this;     // argument to invoke
 	fs->rturn = invoke;
 
@@ -156,9 +140,10 @@
 	fs->dummyReturn = NULL;
 	fs->rturn = CtxInvokeStub;
-	fs->fixedRegisters[0] = this;
-	fs->fixedRegisters[1] = invoke;
+	fs->fixedRegisters[0] = main;
+	fs->fixedRegisters[1] = this;
+	fs->fixedRegisters[2] = invoke;
 
 #elif defined( __ARM_ARCH )
-
+#error ARM needs to be upgrade to use to parameters like X86/X64 (A.K.A. : I broke this and do not know how to fix it)
 	struct FakeStack {
 		float fpRegs[16];			// floating point registers
Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision e3fea427569271c0c15fd2ca6d3296f213856ead)
+++ libcfa/src/concurrency/kernel.cfa	(revision 52142c2076aee6b5b1601fdc0e5c0f6e9ce6581e)
@@ -469,5 +469,5 @@
 	);
 
-	Abort( pthread_attr_setstack( &attr, stack, stacksize ), "pthread_attr_setstack" ); 
+	Abort( pthread_attr_setstack( &attr, stack, stacksize ), "pthread_attr_setstack" );
 
 	Abort( pthread_create( pthread, &attr, start, arg ), "pthread_create" );
@@ -490,6 +490,7 @@
 	verify( ! kernelTLS.preemption_state.enabled );
 
+	kernelTLS.this_thread->curr_cor = dst;
 	__stack_prepare( &dst->stack, 65000 );
-	CtxStart(&this->runner, CtxInvokeCoroutine);
+	CtxStart(main, dst, this->runner, CtxInvokeCoroutine);
 
 	verify( ! kernelTLS.preemption_state.enabled );
@@ -505,4 +506,6 @@
 	CtxSwitch( &src->context, &dst->context );
 	// when CtxSwitch returns we are back in the src coroutine
+
+	mainThread->curr_cor = &mainThread->self_cor;
 
 	// set state of new coroutine to active
Index: libcfa/src/concurrency/kernel_private.hfa
===================================================================
--- libcfa/src/concurrency/kernel_private.hfa	(revision e3fea427569271c0c15fd2ca6d3296f213856ead)
+++ libcfa/src/concurrency/kernel_private.hfa	(revision 52142c2076aee6b5b1601fdc0e5c0f6e9ce6581e)
@@ -88,6 +88,5 @@
 // Threads
 extern "C" {
-      forall(dtype T | is_thread(T))
-      void CtxInvokeThread(T * this);
+      void CtxInvokeThread(void (*main)(void *), void * this);
 }
 
Index: libcfa/src/concurrency/monitor.cfa
===================================================================
--- libcfa/src/concurrency/monitor.cfa	(revision e3fea427569271c0c15fd2ca6d3296f213856ead)
+++ libcfa/src/concurrency/monitor.cfa	(revision 52142c2076aee6b5b1601fdc0e5c0f6e9ce6581e)
@@ -243,5 +243,6 @@
 	// last routine called by a thread.
 	// Should never return
-	void __leave_thread_monitor( thread_desc * thrd ) {
+	void __leave_thread_monitor() {
+		thread_desc * thrd = TL_GET( this_thread );
 		monitor_desc * this = &thrd->self_mon;
 
Index: libcfa/src/concurrency/thread.cfa
===================================================================
--- libcfa/src/concurrency/thread.cfa	(revision e3fea427569271c0c15fd2ca6d3296f213856ead)
+++ libcfa/src/concurrency/thread.cfa	(revision 52142c2076aee6b5b1601fdc0e5c0f6e9ce6581e)
@@ -58,5 +58,5 @@
 void ?{}( scoped(T)& this ) with( this ) {
 	handle{};
-	__thrd_start(handle);
+	__thrd_start(handle, main);
 }
 
@@ -64,5 +64,5 @@
 void ?{}( scoped(T)& this, P params ) with( this ) {
 	handle{ params };
-	__thrd_start(handle);
+	__thrd_start(handle, main);
 }
 
@@ -75,27 +75,17 @@
 // Starting and stopping threads
 forall( dtype T | is_thread(T) )
-void __thrd_start( T& this ) {
+void __thrd_start( T & this, void (*main_p)(T &) ) {
 	thread_desc * this_thrd = get_thread(this);
 	thread_desc * curr_thrd = TL_GET( this_thread );
 
 	disable_interrupts();
-	CtxStart(&this, CtxInvokeThread);
+	CtxStart(main_p, get_coroutine(this), this, CtxInvokeThread);
+
 	this_thrd->context.[SP, FP] = this_thrd->self_cor.context.[SP, FP];
 	verify( this_thrd->context.SP );
-	CtxSwitch( &curr_thrd->context, &this_thrd->context );
+	// CtxSwitch( &curr_thrd->context, &this_thrd->context );
 
 	ScheduleThread(this_thrd);
 	enable_interrupts( __cfaabi_dbg_ctx );
-}
-
-extern "C" {
-	// KERNEL ONLY
-	void __finish_creation(thread_desc * this) {
-		// set new coroutine that the processor is executing
-		// and context switch to it
-		verify( kernelTLS.this_thread != this );
-		verify( kernelTLS.this_thread->context.SP );
-		CtxSwitch( &this->context, &kernelTLS.this_thread->context );
-	}
 }
 
Index: libcfa/src/concurrency/thread.hfa
===================================================================
--- libcfa/src/concurrency/thread.hfa	(revision e3fea427569271c0c15fd2ca6d3296f213856ead)
+++ libcfa/src/concurrency/thread.hfa	(revision 52142c2076aee6b5b1601fdc0e5c0f6e9ce6581e)
@@ -54,5 +54,5 @@
 
 forall( dtype T | is_thread(T) )
-void __thrd_start( T & this );
+void __thrd_start( T & this, void (*)(T &) );
 
 //-----------------------------------------------------------------------------
