Index: src/InitTweak/FixGlobalInit.cc
===================================================================
--- src/InitTweak/FixGlobalInit.cc	(revision 21750628bd6069d85a828ac6db4f49592347417d)
+++ src/InitTweak/FixGlobalInit.cc	(revision eb2e723e2366bc9652cf9888645a9249f5fd129a)
@@ -84,6 +84,6 @@
 			// for library code are run before constructors and destructors for user code,
 			// specify a priority when building the library. Priorities 0-100 are reserved by gcc.
-			ctorParameters.push_back( new ConstantExpr( Constant::from_int( 101 ) ) );
-			dtorParameters.push_back( new ConstantExpr( Constant::from_int( 101 ) ) );
+			ctorParameters.push_back( new ConstantExpr( Constant::from_int( 102 ) ) );
+			dtorParameters.push_back( new ConstantExpr( Constant::from_int( 102 ) ) );
 		}
 		initFunction = new FunctionDecl( "_init_" + fixedName, DeclarationNode::Static, LinkageSpec::C, new FunctionType( Type::Qualifiers(), false ), new CompoundStmt( noLabels ), false, false );
Index: src/examples/thread.c
===================================================================
--- src/examples/thread.c	(revision 21750628bd6069d85a828ac6db4f49592347417d)
+++ src/examples/thread.c	(revision eb2e723e2366bc9652cf9888645a9249f5fd129a)
@@ -1,67 +1,67 @@
-#include <kernel>
+// #include <kernel>
 #include <stdlib>
 #include <threads>
 
-// Start coroutine routines
-extern "C" {
-      forall(dtype T | is_coroutine(T))
-      void CtxInvokeCoroutine(T * this);
+// // Start coroutine routines
+// extern "C" {
+//       forall(dtype T | is_coroutine(T))
+//       void CtxInvokeCoroutine(T * this);
 
-      forall(dtype T | is_coroutine(T))
-      void CtxStart(T * this, void ( *invoke)(T *));
+//       forall(dtype T | is_coroutine(T))
+//       void CtxStart(T * this, void ( *invoke)(T *));
 
-	forall(dtype T | is_coroutine(T))
-      void CtxInvokeThread(T * this);
-}
+// 	forall(dtype T | is_coroutine(T))
+//       void CtxInvokeThread(T * this);
+// }
 
-struct MyThread {
-	thread_h t;
-	unsigned id;
-	unsigned count;
-};
+// struct MyThread {
+// 	thread_h t;
+// 	unsigned id;
+// 	unsigned count;
+// };
 
-void ?{}( MyThread * this ) {
-	this->id = 0;
-	this->count = 10;
-}
+// void ?{}( MyThread * this ) {
+// 	this->id = 0;
+// 	this->count = 10;
+// }
 
-void ?{}( MyThread * this, unsigned id, unsigned count ) {
-	this->id = id;
-	this->count = count;
-}
+// void ?{}( MyThread * this, unsigned id, unsigned count ) {
+// 	this->id = id;
+// 	this->count = count;
+// }
 
-void ^?{}( MyThread * this ) {}
+// void ^?{}( MyThread * this ) {}
 
-void main(MyThread* this) {
-	printf("Main called with %p\n", this);
-	printf("Thread %d : Suspending %d times\n", this->id, this->count);
+// void main(MyThread* this) {
+// 	printf("Main called with %p\n", this);
+// 	printf("Thread %d : Suspending %d times\n", this->id, this->count);
 
-	for(int i = 0; i < this->count; i++) {
-		printf("Thread %d : Suspend No. %d\n", this->id, i + 1);
-		printf("Back to %p\n", &this->t.c);
-		suspend();
-	}
-}
+// 	for(int i = 0; i < this->count; i++) {
+// 		printf("Thread %d : Suspend No. %d\n", this->id, i + 1);
+// 		printf("Back to %p\n", &this->t.c);
+// 		suspend();
+// 	}
+// }
 
-thread_h* get_thread(MyThread* this) {
-	return &this->t;
-}
+// thread_h* get_thread(MyThread* this) {
+// 	return &this->t;
+// }
 
-coroutine* get_coroutine(MyThread* this) {
-	return &this->t.c;
-}
+// coroutine* get_coroutine(MyThread* this) {
+// 	return &this->t.c;
+// }
 
 int main() {
+	printf("Main is %p\n", this_coroutine());
 
-	thread(MyThread) thread1;
-	thread(MyThread) thread2;
+	// thread(MyThread) thread1;
+	// thread(MyThread) thread2;
 
-	thread2.handle.id = 1;
+	// thread2.handle.id = 1;
 
-	printf("\n\nMain is %p\n", this_coroutine());
 
-	kernel_run();
+	// // kernel_run();
 
-	printf("Kernel terminated correctly\n");
+	// printf("Kernel terminated correctly\n");
 
 	return 0;
Index: src/libcfa/concurrency/CtxSwitch-x86_64.S
===================================================================
--- src/libcfa/concurrency/CtxSwitch-x86_64.S	(revision 21750628bd6069d85a828ac6db4f49592347417d)
+++ src/libcfa/concurrency/CtxSwitch-x86_64.S	(revision eb2e723e2366bc9652cf9888645a9249f5fd129a)
@@ -84,4 +84,13 @@
 	jmp *%r12
 
+.text
+	.align 2
+.globl	CtxGet
+CtxGet:
+	movq %rsp,SP_OFFSET(%rdi)
+	movq %rbp,FP_OFFSET(%rdi)
+
+	ret
+
 // Local Variables: //
 // mode: c //
Index: src/libcfa/concurrency/coroutines.c
===================================================================
--- src/libcfa/concurrency/coroutines.c	(revision 21750628bd6069d85a828ac6db4f49592347417d)
+++ src/libcfa/concurrency/coroutines.c	(revision eb2e723e2366bc9652cf9888645a9249f5fd129a)
@@ -36,18 +36,7 @@
 static size_t pageSize = 0;				// architecture pagesize HACK, should go in proper runtime singleton
 
-//Extra private desctructor for the main
-//FIXME the main should not actually allocate a stack
-//Since the main is never resumed the extra stack does not cause 
-//any problem but it is wasted memory
-void ?{}(coStack_t* this, size_t size);
-void ?{}(coroutine* this, size_t size);
-
-//Main coroutine
-//FIXME do not construct a stack for the main
-coroutine main_coroutine = { 1000 };
-
 //Current coroutine
 //Will need to be in TLS when multi-threading is added
-coroutine* current_coroutine = &main_coroutine;
+coroutine* current_coroutine;
 
 //-----------------------------------------------------------------------------
Index: src/libcfa/concurrency/invoke.h
===================================================================
--- src/libcfa/concurrency/invoke.h	(revision 21750628bd6069d85a828ac6db4f49592347417d)
+++ src/libcfa/concurrency/invoke.h	(revision eb2e723e2366bc9652cf9888645a9249f5fd129a)
@@ -52,5 +52,6 @@
       // assembler routines that performs the context switch
       extern void CtxInvokeStub( void );
-      void CtxSwitch( void *from, void *to ) asm ("CtxSwitch");
+      void CtxSwitch( void * from, void * to ) asm ("CtxSwitch");
+      void CtxGet( void * this ) asm ("CtxGet");
 
 #endif //_INVOKE_PRIVATE_H_
Index: src/libcfa/concurrency/kernel
===================================================================
--- src/libcfa/concurrency/kernel	(revision 21750628bd6069d85a828ac6db4f49592347417d)
+++ src/libcfa/concurrency/kernel	(revision eb2e723e2366bc9652cf9888645a9249f5fd129a)
@@ -21,5 +21,5 @@
 
 struct processor {
-	struct proc_coroutine * cor;
+	struct processorCtx_t * ctx;
 	unsigned int thread_index;
 	unsigned int thread_count;
Index: src/libcfa/concurrency/kernel.c
===================================================================
--- src/libcfa/concurrency/kernel.c	(revision 21750628bd6069d85a828ac6db4f49592347417d)
+++ src/libcfa/concurrency/kernel.c	(revision eb2e723e2366bc9652cf9888645a9249f5fd129a)
@@ -20,4 +20,7 @@
 //C Includes
 #include <stddef.h>
+extern "C" {
+#include <sys/resource.h>
+}
 
 //CFA Includes
@@ -29,9 +32,12 @@
 #include "invoke.h"
 
-processor systemProcessorStorage = {};
-processor * systemProcessor = &systemProcessorStorage;
+processor * systemProcessor;
+thread_h * mainThread;
+
+void kernel_startup(void)  __attribute__((constructor(101)));
+void kernel_shutdown(void) __attribute__((destructor(101)));
 
 void ?{}(processor * this) {
-	this->cor = NULL;
+	this->ctx = NULL;
 	this->thread_index = 0;
 	this->thread_count = 10;
@@ -51,27 +57,18 @@
 //-----------------------------------------------------------------------------
 // Processor coroutine
-struct proc_coroutine {
+struct processorCtx_t {
 	processor * proc;
 	coroutine c;
 };
 
-void ?{}(coroutine * this, processor * proc) {
-	this{};
-}
-
-DECL_COROUTINE(proc_coroutine)
-
-void ?{}(proc_coroutine * this, processor * proc) {
-	(&this->c){proc};
+DECL_COROUTINE(processorCtx_t)
+
+void ?{}(processorCtx_t * this, processor * proc) {
+	(&this->c){};
 	this->proc = proc;
-	proc->cor = this;
-}
-
-void ^?{}(proc_coroutine * this) {
-	^(&this->c){};
 }
 
 void CtxInvokeProcessor(processor * proc) {
-	proc_coroutine proc_cor_storage = {proc};
+	processorCtx_t proc_cor_storage = {proc};
 	resume( &proc_cor_storage );
 }
@@ -79,12 +76,12 @@
 //-----------------------------------------------------------------------------
 // Processor running routines
-void main(proc_coroutine * cor);
+void main(processorCtx_t * ctx);
 thread_h * nextThread(processor * this);
 void runThread(processor * this, thread_h * dst);
 void spin(processor * this, unsigned int * spin_count);
 
-void main(proc_coroutine * cor) {
-	processor * this;
-	this = cor->proc;
+void main(processorCtx_t * ctx) {
+	processor * this = ctx->proc;
+	LIB_DEBUG_PRINTF("Kernel : core %p starting\n", this);
 
 	thread_h * readyThread = NULL;
@@ -116,5 +113,5 @@
 
 void runThread(processor * this, thread_h * dst) {
-	coroutine * proc_ctx = get_coroutine(this->cor);
+	coroutine * proc_ctx = get_coroutine(this->ctx);
 	coroutine * thrd_ctx = get_coroutine(dst);
 	thrd_ctx->last = proc_ctx;
@@ -138,5 +135,5 @@
 // Kernel runner (Temporary)
 
-void scheduler_add( struct thread_h * thrd ) {
+void scheduler_add( thread_h * thrd ) {
 	LIB_DEBUG_PRINTF("Kernel : scheduling %p on core %p (%d spots)\n", thrd, systemProcessor, systemProcessor->thread_count);
 	for(int i = 0; i < systemProcessor->thread_count; i++) {
@@ -149,5 +146,5 @@
 }
 
-void scheduler_remove( struct thread_h * thrd ) {
+void scheduler_remove( thread_h * thrd ) {
 	LIB_DEBUG_PRINTF("Kernel : unscheduling %p from core %p\n", thrd, systemProcessor);
 	for(int i = 0; i < systemProcessor->thread_count; i++) {
@@ -162,10 +159,121 @@
 		}
 	}
-	LIB_DEBUG_PRINTF("Kernel : terminating core %p\n\n\n", systemProcessor);	
+	LIB_DEBUG_PRINTF("Kernel : terminating core %p\n", systemProcessor);	
 	systemProcessor->terminated = true;
 }
 
-void kernel_run( void ) {
-	CtxInvokeProcessor(systemProcessor);
+//-----------------------------------------------------------------------------
+// Kernel storage
+#define KERNEL_STORAGE(T,X) static char X##_storage[sizeof(T)]
+
+KERNEL_STORAGE(processorCtx_t, systemProcessorCtx);
+KERNEL_STORAGE(processor, systemProcessor);
+KERNEL_STORAGE(thread_h, mainThread);
+KERNEL_STORAGE(machine_context_t, mainThread_context);
+
+//-----------------------------------------------------------------------------
+// Main thread construction
+struct mainThread_info_t {
+	machine_context_t ctx;	
+	unsigned int size;		// size of stack
+	void *base;				// base of stack
+	void *storage;			// pointer to stack
+	void *limit;			// stack grows towards stack limit
+	void *context;			// address of cfa_context_t
+	void *top;				// address of top of storage
+};
+
+void ?{}( mainThread_info_t * this ) {
+	CtxGet( &this->ctx );
+	this->base = this->ctx.FP;
+	this->storage = this->ctx.SP;
+
+	rlimit r;
+	int ret = getrlimit( RLIMIT_STACK, &r);
+	this->size = r.rlim_cur;
+
+	this->limit = (void *)(((intptr_t)this->base) - this->size);
+	this->context = &mainThread_context_storage;
+	this->top = this->base;
+}
+
+void ?{}( coStack_t * this, mainThread_info_t * info) {
+	this->size = info->size;
+	this->storage = info->storage;
+	this->limit = info->limit;
+	this->base = info->base;
+	this->context = info->context;
+	this->top = info->top;
+	this->userStack = true;
+}
+
+void ?{}( coroutine * this, mainThread_info_t * info) {
+	(&this->stack){ info };	
+	this->name = "Main Thread";
+	this->errno_ = 0;
+	this->state = Inactive;
+	this->notHalted = true;
+}
+
+void ?{}( thread_h * this, mainThread_info_t * info) {
+	(&this->c){ info };
+}
+
+//-----------------------------------------------------------------------------
+// Kernel boot procedures
+void kernel_startup(void) {
+
+	// SKULLDUGGERY: the mainThread steals the process main thread 
+	// which will then be scheduled by the systemProcessor normally
+	LIB_DEBUG_PRINTF("Kernel : Starting\n");	
+
+	mainThread_info_t ctx;
+	LIB_DEBUG_PRINTF("Kernel :    base : %p\n", ctx.base );
+	LIB_DEBUG_PRINTF("Kernel :     top : %p\n", ctx.top );
+	LIB_DEBUG_PRINTF("Kernel :   limit : %p\n", ctx.limit );
+	LIB_DEBUG_PRINTF("Kernel :    size : %x\n", ctx.size );
+	LIB_DEBUG_PRINTF("Kernel : storage : %p\n", ctx.storage );
+	LIB_DEBUG_PRINTF("Kernel : context : %p\n", ctx.context );
+
+	// Start by initializing the main thread
+	mainThread = (thread_h *)&mainThread_storage;
+	LIB_DEBUG_PRINTF("Kernel : Main thread : %p\n", mainThread );
+	mainThread{ &ctx };
+
+	// // Initialize the system processor
+	systemProcessor = (processor *)&systemProcessor_storage;
+	systemProcessor{};
+
+	// Initialize the system processor ctx
+	// (the coroutine that contains the processing control flow)
+	systemProcessor->ctx = (processorCtx_t *)&systemProcessorCtx_storage;
+	systemProcessor->ctx{ systemProcessor };
+
+	scheduler_add(mainThread);
+
+	current_coroutine = &mainThread->c;
+
+	LIB_DEBUG_PRINTF("Kernel : Starting system processor\n");	
+	resume(systemProcessor->ctx);
+
+	LIB_DEBUG_PRINTF("Kernel : Started\n--------------------------------------------------\n\n");
+}
+void kernel_shutdown(void) {
+	LIB_DEBUG_PRINTF("\n--------------------------------------------------\nKernel : Shutting down");
+
+	LIB_DEBUG_PRINTF("Unscheduling main thread\n");
+	scheduler_remove(mainThread);
+
+	LIB_DEBUG_PRINTF("Suspending main\n");
+	suspend();
+
+	LIB_DEBUG_PRINTF("Kernel : Control return to initial process thread\n");
+
+	^(systemProcessor->ctx){};
+	^(systemProcessor){};
+
+	^(mainThread){};
+
+	LIB_DEBUG_PRINTF("Kernel : Shutdown complete\n");	
 }
 
