Index: libcfa/src/concurrency/CtxSwitch-i386.S
===================================================================
--- libcfa/src/concurrency/CtxSwitch-i386.S	(revision 2fabdc0253be63e4524fc9b7dd87d64721f61dd9)
+++ libcfa/src/concurrency/CtxSwitch-i386.S	(revision b2f6113b6922a9164605cb05cd095dc177208621)
@@ -53,7 +53,7 @@
 	// Save floating & SSE control words on the stack.
 
-        sub    $8,%esp
-        stmxcsr 0(%esp)         // 4 bytes
-        fnstcw  4(%esp)         // 2 bytes
+	sub    $8,%esp
+	stmxcsr 0(%esp)         // 4 bytes
+	fnstcw  4(%esp)         // 2 bytes
 
 	// Save volatile registers on the stack.
@@ -67,6 +67,4 @@
 	movl %esp,SP_OFFSET(%eax)
 	movl %ebp,FP_OFFSET(%eax)
-//	movl 4(%ebp),%ebx	// save previous eip for debugger
-//	movl %ebx,PC_OFFSET(%eax)
 
 	// Copy the "to" context argument from the stack to register eax
@@ -89,7 +87,7 @@
 	// Load floating & SSE control words from the stack.
 
-        fldcw   4(%esp)
-        ldmxcsr 0(%esp)
-        add    $8,%esp
+	fldcw   4(%esp)
+	ldmxcsr 0(%esp)
+	add    $8,%esp
 
 	// Return to thread.
Index: libcfa/src/concurrency/CtxSwitch-x86_64.S
===================================================================
--- libcfa/src/concurrency/CtxSwitch-x86_64.S	(revision 2fabdc0253be63e4524fc9b7dd87d64721f61dd9)
+++ libcfa/src/concurrency/CtxSwitch-x86_64.S	(revision b2f6113b6922a9164605cb05cd095dc177208621)
@@ -39,5 +39,4 @@
 #define SP_OFFSET	( 0 * PTR_BYTE )
 #define FP_OFFSET	( 1 * PTR_BYTE )
-#define PC_OFFSET	( 2 * PTR_BYTE )
 
 .text
Index: libcfa/src/concurrency/coroutine.cfa
===================================================================
--- libcfa/src/concurrency/coroutine.cfa	(revision 2fabdc0253be63e4524fc9b7dd87d64721f61dd9)
+++ libcfa/src/concurrency/coroutine.cfa	(revision b2f6113b6922a9164605cb05cd095dc177208621)
@@ -35,9 +35,9 @@
 
 extern "C" {
-      void _CtxCoroutine_Unwind(struct _Unwind_Exception * storage, struct coroutine_desc *) __attribute__ ((__noreturn__));
-      static void _CtxCoroutine_UnwindCleanup(_Unwind_Reason_Code, struct _Unwind_Exception *) __attribute__ ((__noreturn__));
-      static void _CtxCoroutine_UnwindCleanup(_Unwind_Reason_Code, struct _Unwind_Exception *) {
-            abort();
-      }
+	void _CtxCoroutine_Unwind(struct _Unwind_Exception * storage, struct coroutine_desc *) __attribute__ ((__noreturn__));
+	static void _CtxCoroutine_UnwindCleanup(_Unwind_Reason_Code, struct _Unwind_Exception *) __attribute__ ((__noreturn__));
+	static void _CtxCoroutine_UnwindCleanup(_Unwind_Reason_Code, struct _Unwind_Exception *) {
+		abort();
+	}
 }
 
@@ -47,57 +47,65 @@
 // minimum feasible stack size in bytes
 #define MinStackSize 1000
-static size_t pageSize = 0;				// architecture pagesize HACK, should go in proper runtime singleton
+extern size_t __page_size;				// architecture pagesize HACK, should go in proper runtime singleton
+
+void __stack_prepare( __stack_info_t * this, size_t create_size );
 
 //-----------------------------------------------------------------------------
 // Coroutine ctors and dtors
-void ?{}( coStack_t & this, void * storage, size_t storageSize ) with( this ) {
-      size		 = storageSize == 0 ? 65000 : storageSize; // size of stack
-      this.storage = storage;                                // pointer to stack
-      limit		 = NULL;                                   // stack grows towards stack limit
-      base		 = NULL;                                   // base of stack
-      context	 = NULL;                                   // address of cfa_context_t
-      top		 = NULL;                                   // address of top of storage
-      userStack	 = storage != NULL;
-}
-
-void ^?{}(coStack_t & this) {
-      if ( ! this.userStack && this.storage ) {
-            __cfaabi_dbg_debug_do(
-                  if ( mprotect( this.storage, pageSize, PROT_READ | PROT_WRITE ) == -1 ) {
-                        abort( "(coStack_t *)%p.^?{}() : internal error, mprotect failure, error(%d) %s.", &this, errno, strerror( errno ) );
-                  }
-            );
-            free( this.storage );
-      }
+void ?{}( __stack_info_t & this, void * storage, size_t storageSize ) {
+	this.storage   = (__stack_t *)storage;
+
+	// Did we get a piece of storage ?
+	if (this.storage || storageSize != 0) {
+		// We either got a piece of storage or the user asked for a specific size
+		// Immediately create the stack
+		// (This is slightly unintuitive that non-default sized coroutines create are eagerly created
+		// but it avoids that all coroutines carry an unnecessary size)
+		verify( storageSize != 0 );
+		__stack_prepare( &this, storageSize );
+	}
+}
+
+void ^?{}(__stack_info_t & this) {
+	if ( ! this.userStack && this.storage ) {
+		void * storage = (char*)(this.storage) - this.storage->size;
+		__cfaabi_dbg_debug_do(
+			storage = (char*)(storage) - __page_size;
+			if ( mprotect( storage, __page_size, PROT_READ | PROT_WRITE ) == -1 ) {
+				abort( "(coStack_t *)%p.^?{}() : internal error, mprotect failure, error(%d) %s.", &this, errno, strerror( errno ) );
+			}
+		);
+		__cfaabi_dbg_print_safe("Kernel : Deleting stack %p\n", storage);
+		free( storage );
+	}
 }
 
 void ?{}( coroutine_desc & this, const char * name, void * storage, size_t storageSize ) with( this ) {
-      (this.stack){storage, storageSize};
-      this.name = name;
-      errno_ = 0;
-      state = Start;
-      starter = NULL;
-      last = NULL;
-      cancellation = NULL;
+	(this.stack){storage, storageSize};
+	this.name = name;
+	state = Start;
+	starter = NULL;
+	last = NULL;
+	cancellation = NULL;
 }
 
 void ^?{}(coroutine_desc& this) {
-      if(this.state != Halted && this.state != Start) {
-            coroutine_desc * src = TL_GET( this_thread )->curr_cor;
-            coroutine_desc * dst = &this;
-
-            struct _Unwind_Exception storage;
-            storage.exception_class = -1;
-            storage.exception_cleanup = _CtxCoroutine_UnwindCleanup;
-            this.cancellation = &storage;
-            this.last = src;
-
-	      // not resuming self ?
-	      if ( src == dst ) {
-		      abort( "Attempt by coroutine %.256s (%p) to terminate itself.\n", src->name, src );
-            }
-
-	      CoroutineCtxSwitch( src, dst );
-      }
+	if(this.state != Halted && this.state != Start) {
+		coroutine_desc * src = TL_GET( this_thread )->curr_cor;
+		coroutine_desc * dst = &this;
+
+		struct _Unwind_Exception storage;
+		storage.exception_class = -1;
+		storage.exception_cleanup = _CtxCoroutine_UnwindCleanup;
+		this.cancellation = &storage;
+		this.last = src;
+
+		// not resuming self ?
+		if ( src == dst ) {
+			abort( "Attempt by coroutine %.256s (%p) to terminate itself.\n", src->name, src );
+		}
+
+		CoroutineCtxSwitch( src, dst );
+	}
 }
 
@@ -106,87 +114,91 @@
 forall(dtype T | is_coroutine(T))
 void prime(T& cor) {
-      coroutine_desc* this = get_coroutine(cor);
-      assert(this->state == Start);
-
-      this->state = Primed;
-      resume(cor);
+	coroutine_desc* this = get_coroutine(cor);
+	assert(this->state == Start);
+
+	this->state = Primed;
+	resume(cor);
 }
 
 // Wrapper for co
 void CoroutineCtxSwitch(coroutine_desc* src, coroutine_desc* dst) {
-      // Safety note : Preemption must be disabled since there is a race condition
-      // kernelTLS.this_thread->curr_cor and $rsp/$rbp must agree at all times
-      verify( TL_GET( preemption_state.enabled ) || TL_GET( this_processor )->do_terminate );
-      disable_interrupts();
-
-      // set state of current coroutine to inactive
-      src->state = src->state == Halted ? Halted : Inactive;
-
-      // set new coroutine that task is executing
-      TL_GET( this_thread )->curr_cor = dst;
-
-      // context switch to specified coroutine
-      assert( src->stack.context );
-      CtxSwitch( src->stack.context, dst->stack.context );
-      // when CtxSwitch returns we are back in the src coroutine
-
-      // set state of new coroutine to active
-      src->state = Active;
-
-      enable_interrupts( __cfaabi_dbg_ctx );
-      verify( TL_GET( preemption_state.enabled ) || TL_GET( this_processor )->do_terminate );
-
-
-      if( unlikely(src->cancellation != NULL) ) {
-            _CtxCoroutine_Unwind(src->cancellation, src);
-      }
-} //ctxSwitchDirect
-
-void create_stack( coStack_t* this, unsigned int storageSize ) with( *this ) {
-      //TEMP HACK do this on proper kernel startup
-      if(pageSize == 0ul) pageSize = sysconf( _SC_PAGESIZE );
-
-      size_t cxtSize = libCeiling( sizeof(machine_context_t), 8 ); // minimum alignment
-
-      if ( !storage ) {
-            __cfaabi_dbg_print_safe("Kernel : Creating stack of size %zu for stack obj %p\n", cxtSize + size + 8, this);
-
-            userStack = false;
-            size = libCeiling( storageSize, 16 );
-            // use malloc/memalign because "new" raises an exception for out-of-memory
-
-            // assume malloc has 8 byte alignment so add 8 to allow rounding up to 16 byte alignment
-            __cfaabi_dbg_debug_do( storage = memalign( pageSize, cxtSize + size + pageSize ) );
-            __cfaabi_dbg_no_debug_do( storage = malloc( cxtSize + size + 8 ) );
-
-            __cfaabi_dbg_debug_do(
-                  if ( mprotect( storage, pageSize, PROT_NONE ) == -1 ) {
-                        abort( "(uMachContext &)%p.createContext() : internal error, mprotect failure, error(%d) %s.", this, (int)errno, strerror( (int)errno ) );
-                  } // if
-            );
-
-            if ( (intptr_t)storage == 0 ) {
-                  abort( "Attempt to allocate %zd bytes of storage for coroutine or task execution-state but insufficient memory available.", size );
-            } // if
-
-            __cfaabi_dbg_debug_do( limit = (char *)storage + pageSize );
-            __cfaabi_dbg_no_debug_do( limit = (char *)libCeiling( (unsigned long)storage, 16 ) ); // minimum alignment
-
-      } else {
-            __cfaabi_dbg_print_safe("Kernel : stack obj %p using user stack %p(%u bytes)\n", this, storage, storageSize);
-
-            assertf( ((size_t)storage & (libAlign() - 1)) == 0ul, "Stack storage %p for task/coroutine must be aligned on %d byte boundary.", storage, (int)libAlign() );
-            userStack = true;
-            size = storageSize - cxtSize;
-
-            if ( size % 16 != 0u ) size -= 8;
-
-            limit = (char *)libCeiling( (unsigned long)storage, 16 ); // minimum alignment
-      } // if
-      assertf( size >= MinStackSize, "Stack size %zd provides less than minimum of %d bytes for a stack.", size, MinStackSize );
-
-      base = (char *)limit + size;
-      context = base;
-      top = (char *)context + cxtSize;
+	// Safety note : Preemption must be disabled since there is a race condition
+	// kernelTLS.this_thread->curr_cor and $rsp/$rbp must agree at all times
+	verify( TL_GET( preemption_state.enabled ) || TL_GET( this_processor )->do_terminate );
+	disable_interrupts();
+
+	// set state of current coroutine to inactive
+	src->state = src->state == Halted ? Halted : Inactive;
+
+	// set new coroutine that task is executing
+	TL_GET( this_thread )->curr_cor = dst;
+
+	// context switch to specified coroutine
+	CtxSwitch( &src->context, &dst->context );
+	// when CtxSwitch returns we are back in the src coroutine
+
+	// set state of new coroutine to active
+	src->state = Active;
+
+	enable_interrupts( __cfaabi_dbg_ctx );
+	verify( TL_GET( preemption_state.enabled ) || TL_GET( this_processor )->do_terminate );
+
+	if( unlikely(src->cancellation != NULL) ) {
+		_CtxCoroutine_Unwind(src->cancellation, src);
+	}
+}
+
+[void *, size_t] __stack_alloc( size_t storageSize ) {
+	static const size_t stack_data_size = libCeiling( sizeof(__stack_t), 16 ); // minimum alignment
+	assert(__page_size != 0l);
+	size_t size = libCeiling( storageSize, 16 ) + stack_data_size;
+
+	// If we are running debug, we also need to allocate a guardpage to catch stack overflows.
+	void * storage;
+	__cfaabi_dbg_debug_do(
+		storage = memalign( __page_size, size + __page_size );
+	);
+	__cfaabi_dbg_no_debug_do(
+		storage = (void*)malloc(size);
+	);
+
+	__cfaabi_dbg_print_safe("Kernel : Created stack %p of size %zu\n", storage, size);
+	__cfaabi_dbg_debug_do(
+		if ( mprotect( storage, __page_size, PROT_NONE ) == -1 ) {
+			abort( "__stack_alloc : internal error, mprotect failure, error(%d) %s.", (int)errno, strerror( (int)errno ) );
+		}
+		storage = (void *)(((intptr_t)storage) + __page_size);
+	);
+
+	verify( ((intptr_t)storage & (libAlign() - 1)) == 0ul );
+	return [storage, size];
+}
+
+void __stack_prepare( __stack_info_t * this, size_t create_size ) {
+	static const size_t stack_data_size = libCeiling( sizeof(__stack_t), 16 ); // minimum alignment
+	bool userStack;
+	void * storage;
+	size_t size;
+	if ( !this->storage ) {
+		userStack = false;
+		[storage, size] = __stack_alloc( create_size );
+	} else {
+		userStack = true;
+		__cfaabi_dbg_print_safe("Kernel : stack obj %p using user stack %p(%zu bytes)\n", this, this->storage, this->storage->size);
+
+		// The stack must be aligned, advance the pointer to the next align data
+		storage = (void*)libCeiling( (intptr_t)this->storage, libAlign());
+
+		// The size needs to be shrinked to fit all the extra data structure and be aligned
+		ptrdiff_t diff = (intptr_t)storage - (intptr_t)this->storage;
+		size = libFloor(create_size - stack_data_size - diff, libAlign());
+	} // if
+	assertf( size >= MinStackSize, "Stack size %zd provides less than minimum of %d bytes for a stack.", size, MinStackSize );
+
+	this->storage = (__stack_t *)((intptr_t)storage + size);
+	this->storage->size  = size;
+	this->storage->limit = storage;
+	this->storage->base  = (void*)((intptr_t)storage + size);
+	this->userStack = userStack;
 }
 
@@ -194,24 +206,24 @@
 // is not inline (We can't inline Cforall in C)
 extern "C" {
-      void __suspend_internal(void) {
-            suspend();
-      }
-
-      void __leave_coroutine( coroutine_desc * src ) {
-            coroutine_desc * starter = src->cancellation != 0 ? src->last : src->starter;
-
-            src->state = Halted;
-
-            assertf( starter != 0,
-                  "Attempt to suspend/leave coroutine \"%.256s\" (%p) that has never been resumed.\n"
-                  "Possible cause is a suspend executed in a member called by a coroutine user rather than by the coroutine main.",
-                  src->name, src );
-            assertf( starter->state != Halted,
-                  "Attempt by coroutine \"%.256s\" (%p) to suspend/leave back to terminated coroutine \"%.256s\" (%p).\n"
-                  "Possible cause is terminated coroutine's main routine has already returned.",
-                  src->name, src, starter->name, starter );
-
-            CoroutineCtxSwitch( src, starter );
-      }
+	void __suspend_internal(void) {
+		suspend();
+	}
+
+	void __leave_coroutine( coroutine_desc * src ) {
+		coroutine_desc * starter = src->cancellation != 0 ? src->last : src->starter;
+
+		src->state = Halted;
+
+		assertf( starter != 0,
+			"Attempt to suspend/leave coroutine \"%.256s\" (%p) that has never been resumed.\n"
+			"Possible cause is a suspend executed in a member called by a coroutine user rather than by the coroutine main.",
+			src->name, src );
+		assertf( starter->state != Halted,
+			"Attempt by coroutine \"%.256s\" (%p) to suspend/leave back to terminated coroutine \"%.256s\" (%p).\n"
+			"Possible cause is terminated coroutine's main routine has already returned.",
+			src->name, src, starter->name, starter );
+
+		CoroutineCtxSwitch( src, starter );
+	}
 }
 
Index: libcfa/src/concurrency/coroutine.hfa
===================================================================
--- libcfa/src/concurrency/coroutine.hfa	(revision 2fabdc0253be63e4524fc9b7dd87d64721f61dd9)
+++ libcfa/src/concurrency/coroutine.hfa	(revision b2f6113b6922a9164605cb05cd095dc177208621)
@@ -68,5 +68,5 @@
 // Private wrappers for context switch and stack creation
 extern void CoroutineCtxSwitch(coroutine_desc * src, coroutine_desc * dst);
-extern void create_stack( coStack_t * this, unsigned int storageSize );
+extern void __stack_prepare   ( __stack_info_t * this, size_t size /* ignored if storage already allocated */);
 
 // Suspend implementation inlined for performance
@@ -102,6 +102,6 @@
 	coroutine_desc * dst = get_coroutine(cor);
 
-	if( unlikely(!dst->stack.base) ) {
-		create_stack(&dst->stack, dst->stack.size);
+	if( unlikely(!dst->stack.storage || !dst->stack.storage->base) ) {
+		__stack_prepare(&dst->stack, 65000);
 		CtxStart(&cor, CtxInvokeCoroutine);
 	}
Index: libcfa/src/concurrency/invoke.c
===================================================================
--- libcfa/src/concurrency/invoke.c	(revision 2fabdc0253be63e4524fc9b7dd87d64721f61dd9)
+++ libcfa/src/concurrency/invoke.c	(revision b2f6113b6922a9164605cb05cd095dc177208621)
@@ -122,5 +122,6 @@
 	void (*invoke)(void *)
 ) {
-	struct coStack_t* stack = &get_coroutine( this )->stack;
+	struct coroutine_desc * cor = get_coroutine( this );
+	struct __stack_t * stack = cor->stack.storage;
 
 #if defined( __i386 )
@@ -136,12 +137,14 @@
 	};
 
-	((struct machine_context_t *)stack->context)->SP = (char *)stack->base - sizeof( struct FakeStack );
-	((struct machine_context_t *)stack->context)->FP = NULL;		// terminate stack with NULL fp
+	cor->context.SP = (char *)stack->base - sizeof( struct FakeStack );
+	cor->context.FP = NULL;		// terminate stack with NULL fp
 
-	((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->dummyReturn = NULL;
-	((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->argument[0] = this;     // argument to invoke
-	((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->rturn = invoke;
-	((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->mxcr = 0x1F80; //Vol. 2A 3-520
-	((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->fcw = 0x037F;  //Vol. 1 8-7
+	struct FakeStack *fs = (struct FakeStack *)cor->context.SP;
+
+	fs->dummyReturn = NULL;
+	fs->argument[0] = this;     // argument to invoke
+	fs->rturn = invoke;
+	fs->mxcr = 0x1F80; //Vol. 2A 3-520
+	fs->fcw = 0x037F;  //Vol. 1 8-7
 
 #elif defined( __x86_64 )
@@ -155,13 +158,15 @@
 	};
 
-	((struct machine_context_t *)stack->context)->SP = (char *)stack->base - sizeof( struct FakeStack );
-	((struct machine_context_t *)stack->context)->FP = NULL;		// terminate stack with NULL fp
+	cor->context.SP = (char *)stack->base - sizeof( struct FakeStack );
+	cor->context.FP = NULL;		// terminate stack with NULL fp
 
-	((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->dummyReturn = NULL;
-	((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->rturn = CtxInvokeStub;
-	((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->fixedRegisters[0] = this;
-	((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->fixedRegisters[1] = invoke;
-	((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->mxcr = 0x1F80; //Vol. 2A 3-520
-	((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->fcw = 0x037F;  //Vol. 1 8-7
+	struct FakeStack *fs = (struct FakeStack *)cor->context.SP;
+
+	fs->dummyReturn = NULL;
+	fs->rturn = CtxInvokeStub;
+	fs->fixedRegisters[0] = this;
+	fs->fixedRegisters[1] = invoke;
+	fs->mxcr = 0x1F80; //Vol. 2A 3-520
+	fs->fcw = 0x037F;  //Vol. 1 8-7
 
 #elif defined( __ARM_ARCH )
@@ -173,8 +178,8 @@
 	};
 
-	((struct machine_context_t *)stack->context)->SP = (char *)stack->base - sizeof( struct FakeStack );
-	((struct machine_context_t *)stack->context)->FP = NULL;
+	cor->context.SP = (char *)stack->base - sizeof( struct FakeStack );
+	cor->context.FP = NULL;
 
-	struct FakeStack *fs = (struct FakeStack *)((struct machine_context_t *)stack->context)->SP;
+	struct FakeStack *fs = (struct FakeStack *)cor->context.SP;
 
 	fs->intRegs[8] = CtxInvokeStub;
Index: libcfa/src/concurrency/invoke.h
===================================================================
--- libcfa/src/concurrency/invoke.h	(revision 2fabdc0253be63e4524fc9b7dd87d64721f61dd9)
+++ libcfa/src/concurrency/invoke.h	(revision b2f6113b6922a9164605cb05cd095dc177208621)
@@ -62,12 +62,42 @@
 	#endif
 
-	struct coStack_t {
-		size_t size;									// size of stack
-		void * storage;									// pointer to stack
-		void * limit;									// stack grows towards stack limit
-		void * base;									// base of stack
-		void * context;									// address of cfa_context_t
-		void * top;										// address of top of storage
-		bool userStack;									// whether or not the user allocated the stack
+	struct __stack_context_t {
+		void * SP;
+
+		void * FP;
+		// copy of global UNIX variable errno
+		int errno_;
+	};
+
+	// low adresses  :           +----------------------+ <- start of allocation
+	//                           |  optional guard page |
+	//                           +----------------------+ <- __stack_t.limit
+	//                           |                      |
+	//                           |       /\ /\ /\       |
+	//                           |       || || ||       |
+	//                           |                      |
+	//                           |    program  stack    |
+	//                           |                      |
+	// __stack_info_t.storage -> +----------------------+ <- __stack_t.base
+	//                           |      __stack_t       |
+	// high adresses :           +----------------------+ <- end of allocation
+
+	struct __stack_t {
+		// size of stack
+		size_t size;
+
+		// stack grows towards stack limit
+		void * limit;
+
+		// base of stack
+		void * base;
+	};
+
+	struct __stack_info_t {
+		// pointer to stack
+		struct __stack_t * storage;
+
+		// whether or not the user allocated the stack
+		bool userStack;
 	};
 
@@ -75,15 +105,16 @@
 
 	struct coroutine_desc {
+		// context that is switch during a CtxSwitch
+		struct __stack_context_t context;
+
 		// stack information of the coroutine
-		struct coStack_t stack;
+		struct __stack_info_t stack;
 
 		// textual name for coroutine/task, initialized by uC++ generated code
 		const char * name;
 
-		// copy of global UNIX variable errno
-		int errno_;
-
 		// current execution status for coroutine
 		enum coroutine_state state;
+
 		// first coroutine to resume this one
 		struct coroutine_desc * starter;
@@ -230,5 +261,5 @@
 	// assembler routines that performs the context switch
 	extern void CtxInvokeStub( void );
-	void CtxSwitch( void * from, void * to ) asm ("CtxSwitch");
+	void CtxSwitch( struct __stack_context_t * from, struct __stack_context_t * to ) asm ("CtxSwitch");
 	// void CtxStore ( void * this ) asm ("CtxStore");
 	// void CtxRet   ( void * dst  ) asm ("CtxRet");
Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision 2fabdc0253be63e4524fc9b7dd87d64721f61dd9)
+++ libcfa/src/concurrency/kernel.cfa	(revision b2f6113b6922a9164605cb05cd095dc177208621)
@@ -42,8 +42,8 @@
 //-----------------------------------------------------------------------------
 // Kernel storage
-KERNEL_STORAGE(cluster,           mainCluster);
-KERNEL_STORAGE(processor,         mainProcessor);
-KERNEL_STORAGE(thread_desc,       mainThread);
-KERNEL_STORAGE(machine_context_t, mainThreadCtx);
+KERNEL_STORAGE(cluster,		mainCluster);
+KERNEL_STORAGE(processor,	mainProcessor);
+KERNEL_STORAGE(thread_desc,	mainThread);
+KERNEL_STORAGE(__stack_t, 	mainThreadCtx);
 
 cluster     * mainCluster;
@@ -54,4 +54,6 @@
 struct { __dllist_t(cluster) list; __spinlock_t lock; } __cfa_dbg_global_clusters;
 }
+
+size_t __page_size = 0;
 
 //-----------------------------------------------------------------------------
@@ -66,17 +68,15 @@
 // Struct to steal stack
 struct current_stack_info_t {
-	machine_context_t ctx;
+	__stack_t * storage;		// pointer to stack object
 	unsigned int size;		// size of stack
 	void *base;				// base of stack
-	void *storage;			// pointer to stack
 	void *limit;			// stack grows towards stack limit
 	void *context;			// address of cfa_context_t
-	void *top;				// address of top of storage
 };
 
 void ?{}( current_stack_info_t & this ) {
-	CtxGet( this.ctx );
-	this.base = this.ctx.FP;
-	this.storage = this.ctx.SP;
+	__stack_context_t ctx;
+	CtxGet( ctx );
+	this.base = ctx.FP;
 
 	rlimit r;
@@ -86,25 +86,23 @@
 	this.limit = (void *)(((intptr_t)this.base) - this.size);
 	this.context = &storage_mainThreadCtx;
-	this.top = this.base;
 }
 
 //-----------------------------------------------------------------------------
 // Main thread construction
-void ?{}( coStack_t & this, current_stack_info_t * info) with( this ) {
-	size      = info->size;
-	storage   = info->storage;
-	limit     = info->limit;
-	base      = info->base;
-	context   = info->context;
-	top       = info->top;
-	userStack = true;
-}
 
 void ?{}( coroutine_desc & this, current_stack_info_t * info) with( this ) {
-	stack{ info };
+	context.errno_ = 0;
+	stack.storage = info->storage;
+	stack.userStack = true;
+	with(*stack.storage) {
+		size      = info->size;
+		limit     = info->limit;
+		base      = info->base;
+	}
 	name = "Main Thread";
-	errno_ = 0;
 	state = Start;
 	starter = NULL;
+	last = NULL;
+	cancellation = NULL;
 }
 
@@ -312,9 +310,9 @@
 	// to waste the perfectly valid stack create by pthread.
 	current_stack_info_t info;
-	machine_context_t ctx;
-	info.context = &ctx;
+	__stack_t ctx;
+	info.storage = &ctx;
 	(proc->runner){ proc, &info };
 
-	__cfaabi_dbg_print_safe("Coroutine : created stack %p\n", get_coroutine(proc->runner)->stack.base);
+	__cfaabi_dbg_print_safe("Coroutine : created stack %p\n", get_coroutine(proc->runner)->stack.storage);
 
 	//Set global state
@@ -353,5 +351,5 @@
 	verify( ! kernelTLS.preemption_state.enabled );
 
-	create_stack(&dst->stack, dst->stack.size);
+	__stack_prepare( &dst->stack, 65000 );
 	CtxStart(&this->runner, CtxInvokeCoroutine);
 
@@ -372,6 +370,5 @@
 
 	// context switch to specified coroutine
-	assert( src->stack.context );
-	CtxSwitch( src->stack.context, dst->stack.context );
+	CtxSwitch( &src->context, &dst->context );
 	// when CtxSwitch returns we are back in the src coroutine
 
@@ -545,4 +542,6 @@
 	__cfaabi_dbg_print_safe("Kernel : Starting\n");
 
+	__page_size = sysconf( _SC_PAGESIZE );
+
 	__cfa_dbg_global_clusters.list{ __get };
 	__cfa_dbg_global_clusters.lock{};
@@ -559,4 +558,5 @@
 	mainThread = (thread_desc *)&storage_mainThread;
 	current_stack_info_t info;
+	info.storage = (__stack_t*)&storage_mainThreadCtx;
 	(*mainThread){ &info };
 
Index: libcfa/src/concurrency/thread.cfa
===================================================================
--- libcfa/src/concurrency/thread.cfa	(revision 2fabdc0253be63e4524fc9b7dd87d64721f61dd9)
+++ libcfa/src/concurrency/thread.cfa	(revision b2f6113b6922a9164605cb05cd095dc177208621)
@@ -80,8 +80,7 @@
 
 	disable_interrupts();
-	create_stack(&thrd_c->stack, thrd_c->stack.size);
+	assert( thrd_c->stack.storage );
 	CtxStart(&this, CtxInvokeThread);
-	assert( thrd_c->last->stack.context );
-	CtxSwitch( thrd_c->last->stack.context, thrd_c->stack.context );
+	CtxSwitch( &thrd_c->last->context, &thrd_c->context );
 
 	ScheduleThread(thrd_h);
@@ -118,6 +117,5 @@
 	// set new coroutine that the processor is executing
 	// and context switch to it
-	assert( src->stack.context );
-	CtxSwitch( src->stack.context, dst->stack.context );
+	CtxSwitch( &src->context, &dst->context );
 
 	// set state of new coroutine to active
Index: libcfa/src/concurrency/thread.hfa
===================================================================
--- libcfa/src/concurrency/thread.hfa	(revision 2fabdc0253be63e4524fc9b7dd87d64721f61dd9)
+++ libcfa/src/concurrency/thread.hfa	(revision b2f6113b6922a9164605cb05cd095dc177208621)
@@ -61,12 +61,12 @@
 void ^?{}(thread_desc & this);
 
-static inline void ?{}(thread_desc & this)                                                                  { this{ "Anonymous Thread", *mainCluster, NULL, 0 }; }
+static inline void ?{}(thread_desc & this)                                                                  { this{ "Anonymous Thread", *mainCluster, NULL, 65000 }; }
 static inline void ?{}(thread_desc & this, size_t stackSize )                                               { this{ "Anonymous Thread", *mainCluster, NULL, stackSize }; }
 static inline void ?{}(thread_desc & this, void * storage, size_t storageSize )                             { this{ "Anonymous Thread", *mainCluster, storage, storageSize }; }
-static inline void ?{}(thread_desc & this, struct cluster & cl )                                            { this{ "Anonymous Thread", cl, NULL, 0 }; }
-static inline void ?{}(thread_desc & this, struct cluster & cl, size_t stackSize )                          { this{ "Anonymous Thread", cl, 0, stackSize }; }
+static inline void ?{}(thread_desc & this, struct cluster & cl )                                            { this{ "Anonymous Thread", cl, NULL, 65000 }; }
+static inline void ?{}(thread_desc & this, struct cluster & cl, size_t stackSize )                          { this{ "Anonymous Thread", cl, NULL, stackSize }; }
 static inline void ?{}(thread_desc & this, struct cluster & cl, void * storage, size_t storageSize )        { this{ "Anonymous Thread", cl, storage, storageSize }; }
-static inline void ?{}(thread_desc & this, const char * const name)                                         { this{ name, *mainCluster, NULL, 0 }; }
-static inline void ?{}(thread_desc & this, const char * const name, struct cluster & cl )                   { this{ name, cl, NULL, 0 }; }
+static inline void ?{}(thread_desc & this, const char * const name)                                         { this{ name, *mainCluster, NULL, 65000 }; }
+static inline void ?{}(thread_desc & this, const char * const name, struct cluster & cl )                   { this{ name, cl, NULL, 65000 }; }
 static inline void ?{}(thread_desc & this, const char * const name, struct cluster & cl, size_t stackSize ) { this{ name, cl, NULL, stackSize }; }
 
