Index: benchmark/ctxswitch/cfa_cor.cfa
===================================================================
--- benchmark/ctxswitch/cfa_cor.cfa	(revision 1bc59750faba2d00c7c1e3ae02f709af36251377)
+++ benchmark/ctxswitch/cfa_cor.cfa	(revision 673cd637ca3377db61b5ca17aa0ec910e12ae819)
@@ -11,5 +11,5 @@
 }
 
-void main( GreatSuspender & this ) {
+void main( __attribute__((unused)) GreatSuspender & this ) {
 	while( true ) {
 		suspend();
Index: benchmark/ctxswitch/cfa_thrd2.cfa
===================================================================
--- benchmark/ctxswitch/cfa_thrd2.cfa	(revision 1bc59750faba2d00c7c1e3ae02f709af36251377)
+++ benchmark/ctxswitch/cfa_thrd2.cfa	(revision 673cd637ca3377db61b5ca17aa0ec910e12ae819)
@@ -8,5 +8,5 @@
 thread Fibre {};
 
-void main(Fibre & this) {
+void main(__attribute__((unused)) Fibre & this) {
 	while(!done) {
 		yield();
Index: libcfa/src/bits/containers.hfa
===================================================================
--- libcfa/src/bits/containers.hfa	(revision 1bc59750faba2d00c7c1e3ae02f709af36251377)
+++ libcfa/src/bits/containers.hfa	(revision 673cd637ca3377db61b5ca17aa0ec910e12ae819)
@@ -186,5 +186,5 @@
 
 	forall(dtype T | is_node(T))
-	static inline bool ?!=?( __queue(T) & this, zero_t zero ) {
+	static inline bool ?!=?( __queue(T) & this, __attribute__((unused)) zero_t zero ) {
 		return this.head != 0;
 	}
@@ -196,5 +196,5 @@
 //-----------------------------------------------------------------------------
 #ifdef __cforall
-	forall(dtype TYPE | sized(TYPE))
+	forall(dtype TYPE)
 	#define T TYPE
 	#define __getter_t * [T * & next, T * & prev] ( T & )
@@ -268,5 +268,5 @@
 
 	forall(dtype T | sized(T))
-	static inline bool ?!=?( __dllist(T) & this, zero_t zero ) {
+	static inline bool ?!=?( __dllist(T) & this, __attribute__((unused)) zero_t zero ) {
 		return this.head != 0;
 	}
Index: libcfa/src/concurrency/CtxSwitch-i386.S
===================================================================
--- libcfa/src/concurrency/CtxSwitch-i386.S	(revision 1bc59750faba2d00c7c1e3ae02f709af36251377)
+++ libcfa/src/concurrency/CtxSwitch-i386.S	(revision 673cd637ca3377db61b5ca17aa0ec910e12ae819)
@@ -41,7 +41,8 @@
 #define PC_OFFSET	( 2 * PTR_BYTE )
 
-.text
+	.text
 	.align 2
-.globl	CtxSwitch
+	.globl CtxSwitch
+	.type  CtxSwitch, @function
 CtxSwitch:
 
@@ -50,10 +51,4 @@
 
 	movl 4(%esp),%eax
-
-	// Save floating & SSE control words on the stack.
-
-        sub    $8,%esp
-        stmxcsr 0(%esp)         // 4 bytes
-        fnstcw  4(%esp)         // 2 bytes
 
 	// Save volatile registers on the stack.
@@ -67,6 +62,4 @@
 	movl %esp,SP_OFFSET(%eax)
 	movl %ebp,FP_OFFSET(%eax)
-//	movl 4(%ebp),%ebx	// save previous eip for debugger
-//	movl %ebx,PC_OFFSET(%eax)
 
 	// Copy the "to" context argument from the stack to register eax
@@ -87,13 +80,8 @@
 	popl %ebx
 
-	// Load floating & SSE control words from the stack.
-
-        fldcw   4(%esp)
-        ldmxcsr 0(%esp)
-        add    $8,%esp
-
 	// Return to thread.
 
 	ret
+	.size  CtxSwitch, .-CtxSwitch
 
 // Local Variables: //
Index: libcfa/src/concurrency/CtxSwitch-x86_64.S
===================================================================
--- libcfa/src/concurrency/CtxSwitch-x86_64.S	(revision 1bc59750faba2d00c7c1e3ae02f709af36251377)
+++ libcfa/src/concurrency/CtxSwitch-x86_64.S	(revision 673cd637ca3377db61b5ca17aa0ec910e12ae819)
@@ -39,16 +39,10 @@
 #define SP_OFFSET	( 0 * PTR_BYTE )
 #define FP_OFFSET	( 1 * PTR_BYTE )
-#define PC_OFFSET	( 2 * PTR_BYTE )
 
-.text
+	.text
 	.align 2
-.globl	CtxSwitch
+	.globl CtxSwitch
+	.type  CtxSwitch, @function
 CtxSwitch:
-
-	// Save floating & SSE control words on the stack.
-
-	subq   $8,%rsp
-	stmxcsr 0(%rsp)         // 4 bytes
-	fnstcw  4(%rsp)         // 2 bytes
 
 	// Save volatile registers on the stack.
@@ -78,68 +72,8 @@
 	popq %r15
 
-	// Load floating & SSE control words from the stack.
-
-	fldcw   4(%rsp)
-	ldmxcsr 0(%rsp)
-	addq   $8,%rsp
-
 	// Return to thread.
 
 	ret
-
-//.text
-//	.align 2
-//.globl	CtxStore
-//CtxStore:
-//	// Save floating & SSE control words on the stack.
-//
-//	subq   $8,%rsp
-//	stmxcsr 0(%rsp)         // 4 bytes
-//	fnstcw  4(%rsp)         // 2 bytes
-//
-//	// Save volatile registers on the stack.
-//
-//	pushq %r15
-//	pushq %r14
-//	pushq %r13
-//	pushq %r12
-//	pushq %rbx
-//
-//	// Save old context in the "from" area.
-//
-//	movq %rsp,SP_OFFSET(%rdi)
-//	movq %rbp,FP_OFFSET(%rdi)
-//
-//	// Return to thread
-//
-//	ret
-//
-//.text
-//	.align 2
-//.globl 	CtxRet
-//CtxRet:
-//	// Load new context from the "to" area.
-//
-//	movq SP_OFFSET(%rdi),%rsp
-//	movq FP_OFFSET(%rdi),%rbp
-//
-//	// Load volatile registers from the stack.
-//
-//	popq %rbx
-//	popq %r12
-//	popq %r13
-//	popq %r14
-//	popq %r15
-//
-//	// Load floating & SSE control words from the stack.
-//
-//	fldcw   4(%rsp)
-//	ldmxcsr 0(%rsp)
-//	addq   $8,%rsp
-//
-//	// Return to thread.
-//
-//	ret
-
+	.size  CtxSwitch, .-CtxSwitch
 
 .text
Index: libcfa/src/concurrency/coroutine.cfa
===================================================================
--- libcfa/src/concurrency/coroutine.cfa	(revision 1bc59750faba2d00c7c1e3ae02f709af36251377)
+++ libcfa/src/concurrency/coroutine.cfa	(revision 673cd637ca3377db61b5ca17aa0ec910e12ae819)
@@ -35,9 +35,9 @@
 
 extern "C" {
-      void _CtxCoroutine_Unwind(struct _Unwind_Exception * storage, struct coroutine_desc *) __attribute__ ((__noreturn__));
-      static void _CtxCoroutine_UnwindCleanup(_Unwind_Reason_Code, struct _Unwind_Exception *) __attribute__ ((__noreturn__));
-      static void _CtxCoroutine_UnwindCleanup(_Unwind_Reason_Code, struct _Unwind_Exception *) {
-            abort();
-      }
+	void _CtxCoroutine_Unwind(struct _Unwind_Exception * storage, struct coroutine_desc *) __attribute__ ((__noreturn__));
+	static void _CtxCoroutine_UnwindCleanup(_Unwind_Reason_Code, struct _Unwind_Exception *) __attribute__ ((__noreturn__));
+	static void _CtxCoroutine_UnwindCleanup(_Unwind_Reason_Code, struct _Unwind_Exception *) {
+		abort();
+	}
 }
 
@@ -47,57 +47,70 @@
 // minimum feasible stack size in bytes
 #define MinStackSize 1000
-static size_t pageSize = 0;				// architecture pagesize HACK, should go in proper runtime singleton
+extern size_t __page_size;				// architecture pagesize HACK, should go in proper runtime singleton
+
+void __stack_prepare( __stack_info_t * this, size_t create_size );
 
 //-----------------------------------------------------------------------------
 // Coroutine ctors and dtors
-void ?{}( coStack_t & this, void * storage, size_t storageSize ) with( this ) {
-      size		 = storageSize == 0 ? 65000 : storageSize; // size of stack
-      this.storage = storage;                                // pointer to stack
-      limit		 = NULL;                                   // stack grows towards stack limit
-      base		 = NULL;                                   // base of stack
-      context	 = NULL;                                   // address of cfa_context_t
-      top		 = NULL;                                   // address of top of storage
-      userStack	 = storage != NULL;
-}
-
-void ^?{}(coStack_t & this) {
-      if ( ! this.userStack && this.storage ) {
-            __cfaabi_dbg_debug_do(
-                  if ( mprotect( this.storage, pageSize, PROT_READ | PROT_WRITE ) == -1 ) {
-                        abort( "(coStack_t *)%p.^?{}() : internal error, mprotect failure, error(%d) %s.", &this, errno, strerror( errno ) );
-                  }
-            );
-            free( this.storage );
-      }
+void ?{}( __stack_info_t & this, void * storage, size_t storageSize ) {
+	this.storage   = (__stack_t *)storage;
+
+	// Did we get a piece of storage ?
+	if (this.storage || storageSize != 0) {
+		// We either got a piece of storage or the user asked for a specific size
+		// Immediately create the stack
+		// (This is slightly unintuitive that non-default sized coroutines create are eagerly created
+		// but it avoids that all coroutines carry an unnecessary size)
+		verify( storageSize != 0 );
+		__stack_prepare( &this, storageSize );
+	}
+}
+
+void ^?{}(__stack_info_t & this) {
+	bool userStack = ((intptr_t)this.storage & 0x1) != 0;
+	if ( ! userStack && this.storage ) {
+		__attribute__((may_alias)) intptr_t * istorage = (intptr_t *)&this.storage;
+		*istorage &= (intptr_t)-1;
+
+		void * storage = this.storage->limit;
+		__cfaabi_dbg_debug_do(
+			storage = (char*)(storage) - __page_size;
+			if ( mprotect( storage, __page_size, PROT_READ | PROT_WRITE ) == -1 ) {
+				abort( "(coStack_t *)%p.^?{}() : internal error, mprotect failure, error(%d) %s.", &this, errno, strerror( errno ) );
+			}
+		);
+		__cfaabi_dbg_print_safe("Kernel : Deleting stack %p\n", storage);
+		free( storage );
+	}
 }
 
 void ?{}( coroutine_desc & this, const char * name, void * storage, size_t storageSize ) with( this ) {
-      (this.stack){storage, storageSize};
-      this.name = name;
-      errno_ = 0;
-      state = Start;
-      starter = NULL;
-      last = NULL;
-      cancellation = NULL;
+	(this.context){NULL, NULL};
+	(this.stack){storage, storageSize};
+	this.name = name;
+	state = Start;
+	starter = NULL;
+	last = NULL;
+	cancellation = NULL;
 }
 
 void ^?{}(coroutine_desc& this) {
-      if(this.state != Halted && this.state != Start) {
-            coroutine_desc * src = TL_GET( this_thread )->curr_cor;
-            coroutine_desc * dst = &this;
-
-            struct _Unwind_Exception storage;
-            storage.exception_class = -1;
-            storage.exception_cleanup = _CtxCoroutine_UnwindCleanup;
-            this.cancellation = &storage;
-            this.last = src;
-
-	      // not resuming self ?
-	      if ( src == dst ) {
-		      abort( "Attempt by coroutine %.256s (%p) to terminate itself.\n", src->name, src );
-            }
-
-	      CoroutineCtxSwitch( src, dst );
-      }
+	if(this.state != Halted && this.state != Start) {
+		coroutine_desc * src = TL_GET( this_thread )->curr_cor;
+		coroutine_desc * dst = &this;
+
+		struct _Unwind_Exception storage;
+		storage.exception_class = -1;
+		storage.exception_cleanup = _CtxCoroutine_UnwindCleanup;
+		this.cancellation = &storage;
+		this.last = src;
+
+		// not resuming self ?
+		if ( src == dst ) {
+			abort( "Attempt by coroutine %.256s (%p) to terminate itself.\n", src->name, src );
+		}
+
+		CoroutineCtxSwitch( src, dst );
+	}
 }
 
@@ -106,87 +119,63 @@
 forall(dtype T | is_coroutine(T))
 void prime(T& cor) {
-      coroutine_desc* this = get_coroutine(cor);
-      assert(this->state == Start);
-
-      this->state = Primed;
-      resume(cor);
-}
-
-// Wrapper for co
-void CoroutineCtxSwitch(coroutine_desc* src, coroutine_desc* dst) {
-      // Safety note : Preemption must be disabled since there is a race condition
-      // kernelTLS.this_thread->curr_cor and $rsp/$rbp must agree at all times
-      verify( TL_GET( preemption_state.enabled ) || TL_GET( this_processor )->do_terminate );
-      disable_interrupts();
-
-      // set state of current coroutine to inactive
-      src->state = src->state == Halted ? Halted : Inactive;
-
-      // set new coroutine that task is executing
-      TL_GET( this_thread )->curr_cor = dst;
-
-      // context switch to specified coroutine
-      assert( src->stack.context );
-      CtxSwitch( src->stack.context, dst->stack.context );
-      // when CtxSwitch returns we are back in the src coroutine
-
-      // set state of new coroutine to active
-      src->state = Active;
-
-      enable_interrupts( __cfaabi_dbg_ctx );
-      verify( TL_GET( preemption_state.enabled ) || TL_GET( this_processor )->do_terminate );
-
-
-      if( unlikely(src->cancellation != NULL) ) {
-            _CtxCoroutine_Unwind(src->cancellation, src);
-      }
-} //ctxSwitchDirect
-
-void create_stack( coStack_t* this, unsigned int storageSize ) with( *this ) {
-      //TEMP HACK do this on proper kernel startup
-      if(pageSize == 0ul) pageSize = sysconf( _SC_PAGESIZE );
-
-      size_t cxtSize = libCeiling( sizeof(machine_context_t), 8 ); // minimum alignment
-
-      if ( !storage ) {
-            __cfaabi_dbg_print_safe("Kernel : Creating stack of size %zu for stack obj %p\n", cxtSize + size + 8, this);
-
-            userStack = false;
-            size = libCeiling( storageSize, 16 );
-            // use malloc/memalign because "new" raises an exception for out-of-memory
-
-            // assume malloc has 8 byte alignment so add 8 to allow rounding up to 16 byte alignment
-            __cfaabi_dbg_debug_do( storage = memalign( pageSize, cxtSize + size + pageSize ) );
-            __cfaabi_dbg_no_debug_do( storage = malloc( cxtSize + size + 8 ) );
-
-            __cfaabi_dbg_debug_do(
-                  if ( mprotect( storage, pageSize, PROT_NONE ) == -1 ) {
-                        abort( "(uMachContext &)%p.createContext() : internal error, mprotect failure, error(%d) %s.", this, (int)errno, strerror( (int)errno ) );
-                  } // if
-            );
-
-            if ( (intptr_t)storage == 0 ) {
-                  abort( "Attempt to allocate %zd bytes of storage for coroutine or task execution-state but insufficient memory available.", size );
-            } // if
-
-            __cfaabi_dbg_debug_do( limit = (char *)storage + pageSize );
-            __cfaabi_dbg_no_debug_do( limit = (char *)libCeiling( (unsigned long)storage, 16 ) ); // minimum alignment
-
-      } else {
-            __cfaabi_dbg_print_safe("Kernel : stack obj %p using user stack %p(%u bytes)\n", this, storage, storageSize);
-
-            assertf( ((size_t)storage & (libAlign() - 1)) == 0ul, "Stack storage %p for task/coroutine must be aligned on %d byte boundary.", storage, (int)libAlign() );
-            userStack = true;
-            size = storageSize - cxtSize;
-
-            if ( size % 16 != 0u ) size -= 8;
-
-            limit = (char *)libCeiling( (unsigned long)storage, 16 ); // minimum alignment
-      } // if
-      assertf( size >= MinStackSize, "Stack size %zd provides less than minimum of %d bytes for a stack.", size, MinStackSize );
-
-      base = (char *)limit + size;
-      context = base;
-      top = (char *)context + cxtSize;
+	coroutine_desc* this = get_coroutine(cor);
+	assert(this->state == Start);
+
+	this->state = Primed;
+	resume(cor);
+}
+
+[void *, size_t] __stack_alloc( size_t storageSize ) {
+	static const size_t stack_data_size = libCeiling( sizeof(__stack_t), 16 ); // minimum alignment
+	assert(__page_size != 0l);
+	size_t size = libCeiling( storageSize, 16 ) + stack_data_size;
+
+	// If we are running debug, we also need to allocate a guardpage to catch stack overflows.
+	void * storage;
+	__cfaabi_dbg_debug_do(
+		storage = memalign( __page_size, size + __page_size );
+	);
+	__cfaabi_dbg_no_debug_do(
+		storage = (void*)malloc(size);
+	);
+
+	__cfaabi_dbg_print_safe("Kernel : Created stack %p of size %zu\n", storage, size);
+	__cfaabi_dbg_debug_do(
+		if ( mprotect( storage, __page_size, PROT_NONE ) == -1 ) {
+			abort( "__stack_alloc : internal error, mprotect failure, error(%d) %s.", (int)errno, strerror( (int)errno ) );
+		}
+		storage = (void *)(((intptr_t)storage) + __page_size);
+	);
+
+	verify( ((intptr_t)storage & (libAlign() - 1)) == 0ul );
+	return [storage, size];
+}
+
+void __stack_prepare( __stack_info_t * this, size_t create_size ) {
+	static const size_t stack_data_size = libCeiling( sizeof(__stack_t), 16 ); // minimum alignment
+	bool userStack;
+	void * storage;
+	size_t size;
+	if ( !this->storage ) {
+		userStack = false;
+		[storage, size] = __stack_alloc( create_size );
+	} else {
+		userStack = true;
+		__cfaabi_dbg_print_safe("Kernel : stack obj %p using user stack %p(%zd bytes)\n", this, this->storage, (intptr_t)this->storage->limit - (intptr_t)this->storage->base);
+
+		// The stack must be aligned, advance the pointer to the next align data
+		storage = (void*)libCeiling( (intptr_t)this->storage, libAlign());
+
+		// The size needs to be shrinked to fit all the extra data structure and be aligned
+		ptrdiff_t diff = (intptr_t)storage - (intptr_t)this->storage;
+		size = libFloor(create_size - stack_data_size - diff, libAlign());
+	} // if
+	assertf( size >= MinStackSize, "Stack size %zd provides less than minimum of %d bytes for a stack.", size, MinStackSize );
+
+	this->storage = (__stack_t *)((intptr_t)storage + size);
+	this->storage->limit = storage;
+	this->storage->base  = (void*)((intptr_t)storage + size);
+	__attribute__((may_alias)) intptr_t * istorage = (intptr_t*)&this->storage;
+	*istorage |= userStack ? 0x1 : 0x0;
 }
 
@@ -194,24 +183,24 @@
 // is not inline (We can't inline Cforall in C)
 extern "C" {
-      void __suspend_internal(void) {
-            suspend();
-      }
-
-      void __leave_coroutine( coroutine_desc * src ) {
-            coroutine_desc * starter = src->cancellation != 0 ? src->last : src->starter;
-
-            src->state = Halted;
-
-            assertf( starter != 0,
-                  "Attempt to suspend/leave coroutine \"%.256s\" (%p) that has never been resumed.\n"
-                  "Possible cause is a suspend executed in a member called by a coroutine user rather than by the coroutine main.",
-                  src->name, src );
-            assertf( starter->state != Halted,
-                  "Attempt by coroutine \"%.256s\" (%p) to suspend/leave back to terminated coroutine \"%.256s\" (%p).\n"
-                  "Possible cause is terminated coroutine's main routine has already returned.",
-                  src->name, src, starter->name, starter );
-
-            CoroutineCtxSwitch( src, starter );
-      }
+	void __suspend_internal(void) {
+		suspend();
+	}
+
+	void __leave_coroutine( coroutine_desc * src ) {
+		coroutine_desc * starter = src->cancellation != 0 ? src->last : src->starter;
+
+		src->state = Halted;
+
+		assertf( starter != 0,
+			"Attempt to suspend/leave coroutine \"%.256s\" (%p) that has never been resumed.\n"
+			"Possible cause is a suspend executed in a member called by a coroutine user rather than by the coroutine main.",
+			src->name, src );
+		assertf( starter->state != Halted,
+			"Attempt by coroutine \"%.256s\" (%p) to suspend/leave back to terminated coroutine \"%.256s\" (%p).\n"
+			"Possible cause is terminated coroutine's main routine has already returned.",
+			src->name, src, starter->name, starter );
+
+		CoroutineCtxSwitch( src, starter );
+	}
 }
 
Index: libcfa/src/concurrency/coroutine.hfa
===================================================================
--- libcfa/src/concurrency/coroutine.hfa	(revision 1bc59750faba2d00c7c1e3ae02f709af36251377)
+++ libcfa/src/concurrency/coroutine.hfa	(revision 673cd637ca3377db61b5ca17aa0ec910e12ae819)
@@ -64,9 +64,35 @@
       forall(dtype T | is_coroutine(T))
       void CtxStart(T * this, void ( *invoke)(T *));
+
+	extern void _CtxCoroutine_Unwind(struct _Unwind_Exception * storage, struct coroutine_desc *) __attribute__ ((__noreturn__));
+
+	extern void CtxSwitch( struct __stack_context_t * from, struct __stack_context_t * to ) asm ("CtxSwitch");
+	// void CtxStore ( void * this ) asm ("CtxStore");
+	// void CtxRet   ( void * dst  ) asm ("CtxRet");
 }
 
 // Private wrappers for context switch and stack creation
-extern void CoroutineCtxSwitch(coroutine_desc * src, coroutine_desc * dst);
-extern void create_stack( coStack_t * this, unsigned int storageSize );
+// Wrapper for co
+static inline void CoroutineCtxSwitch(coroutine_desc* src, coroutine_desc* dst) {
+	// set state of current coroutine to inactive
+	src->state = src->state == Halted ? Halted : Inactive;
+
+	// set new coroutine that task is executing
+	TL_GET( this_thread )->curr_cor = dst;
+
+	// context switch to specified coroutine
+	verify( dst->context.SP );
+	CtxSwitch( &src->context, &dst->context );
+	// when CtxSwitch returns we are back in the src coroutine
+
+	// set state of new coroutine to active
+	src->state = Active;
+
+	if( unlikely(src->cancellation != NULL) ) {
+		_CtxCoroutine_Unwind(src->cancellation, src);
+	}
+}
+
+extern void __stack_prepare   ( __stack_info_t * this, size_t size /* ignored if storage already allocated */);
 
 // Suspend implementation inlined for performance
@@ -102,6 +128,6 @@
 	coroutine_desc * dst = get_coroutine(cor);
 
-	if( unlikely(!dst->stack.base) ) {
-		create_stack(&dst->stack, dst->stack.size);
+	if( unlikely(dst->context.SP == NULL) ) {
+		__stack_prepare(&dst->stack, 65000);
 		CtxStart(&cor, CtxInvokeCoroutine);
 	}
Index: libcfa/src/concurrency/invoke.c
===================================================================
--- libcfa/src/concurrency/invoke.c	(revision 1bc59750faba2d00c7c1e3ae02f709af36251377)
+++ libcfa/src/concurrency/invoke.c	(revision 673cd637ca3377db61b5ca17aa0ec910e12ae819)
@@ -29,5 +29,5 @@
 extern void __suspend_internal(void);
 extern void __leave_coroutine( struct coroutine_desc * );
-extern void __finish_creation( struct coroutine_desc * );
+extern void __finish_creation( struct thread_desc * );
 extern void __leave_thread_monitor( struct thread_desc * this );
 extern void disable_interrupts();
@@ -46,6 +46,4 @@
 
 	cor->state = Active;
-
-	enable_interrupts( __cfaabi_dbg_ctx );
 
 	main( this );
@@ -93,8 +91,5 @@
 	// First suspend, once the thread arrives here,
 	// the function pointer to main can be invalidated without risk
-	__finish_creation(&thrd->self_cor);
-
-	// Restore the last to NULL, we clobbered because of the thunk problem
-	thrd->self_cor.last = NULL;
+	__finish_creation( thrd );
 
 	// Officially start the thread by enabling preemption
@@ -122,5 +117,6 @@
 	void (*invoke)(void *)
 ) {
-	struct coStack_t* stack = &get_coroutine( this )->stack;
+	struct coroutine_desc * cor = get_coroutine( this );
+	struct __stack_t * stack = cor->stack.storage;
 
 #if defined( __i386 )
@@ -128,6 +124,4 @@
 	struct FakeStack {
 	    void *fixedRegisters[3];		  	// fixed registers ebx, edi, esi (popped on 1st uSwitch, values unimportant)
-	    uint32_t mxcr;                        // SSE Status and Control bits (control bits are preserved across function calls)
-	    uint16_t fcw;                         // X97 FPU control word (preserved across function calls)
 	    void *rturn;                          // where to go on return from uSwitch
 	    void *dummyReturn;				// fake return compiler would have pushed on call to uInvoke
@@ -136,12 +130,12 @@
 	};
 
-	((struct machine_context_t *)stack->context)->SP = (char *)stack->base - sizeof( struct FakeStack );
-	((struct machine_context_t *)stack->context)->FP = NULL;		// terminate stack with NULL fp
+	cor->context.SP = (char *)stack->base - sizeof( struct FakeStack );
+	cor->context.FP = NULL;		// terminate stack with NULL fp
 
-	((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->dummyReturn = NULL;
-	((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->argument[0] = this;     // argument to invoke
-	((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->rturn = invoke;
-	((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->mxcr = 0x1F80; //Vol. 2A 3-520
-	((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->fcw = 0x037F;  //Vol. 1 8-7
+	struct FakeStack *fs = (struct FakeStack *)cor->context.SP;
+
+	fs->dummyReturn = NULL;
+	fs->argument[0] = this;     // argument to invoke
+	fs->rturn = invoke;
 
 #elif defined( __x86_64 )
@@ -149,19 +143,17 @@
 	struct FakeStack {
 		void *fixedRegisters[5];            // fixed registers rbx, r12, r13, r14, r15
-		uint32_t mxcr;                      // SSE Status and Control bits (control bits are preserved across function calls)
-		uint16_t fcw;                       // X97 FPU control word (preserved across function calls)
 		void *rturn;                        // where to go on return from uSwitch
 		void *dummyReturn;                  // NULL return address to provide proper alignment
 	};
 
-	((struct machine_context_t *)stack->context)->SP = (char *)stack->base - sizeof( struct FakeStack );
-	((struct machine_context_t *)stack->context)->FP = NULL;		// terminate stack with NULL fp
+	cor->context.SP = (char *)stack->base - sizeof( struct FakeStack );
+	cor->context.FP = NULL;		// terminate stack with NULL fp
 
-	((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->dummyReturn = NULL;
-	((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->rturn = CtxInvokeStub;
-	((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->fixedRegisters[0] = this;
-	((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->fixedRegisters[1] = invoke;
-	((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->mxcr = 0x1F80; //Vol. 2A 3-520
-	((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->fcw = 0x037F;  //Vol. 1 8-7
+	struct FakeStack *fs = (struct FakeStack *)cor->context.SP;
+
+	fs->dummyReturn = NULL;
+	fs->rturn = CtxInvokeStub;
+	fs->fixedRegisters[0] = this;
+	fs->fixedRegisters[1] = invoke;
 
 #elif defined( __ARM_ARCH )
@@ -173,8 +165,8 @@
 	};
 
-	((struct machine_context_t *)stack->context)->SP = (char *)stack->base - sizeof( struct FakeStack );
-	((struct machine_context_t *)stack->context)->FP = NULL;
+	cor->context.SP = (char *)stack->base - sizeof( struct FakeStack );
+	cor->context.FP = NULL;
 
-	struct FakeStack *fs = (struct FakeStack *)((struct machine_context_t *)stack->context)->SP;
+	struct FakeStack *fs = (struct FakeStack *)cor->context.SP;
 
 	fs->intRegs[8] = CtxInvokeStub;
Index: libcfa/src/concurrency/invoke.h
===================================================================
--- libcfa/src/concurrency/invoke.h	(revision 1bc59750faba2d00c7c1e3ae02f709af36251377)
+++ libcfa/src/concurrency/invoke.h	(revision 673cd637ca3377db61b5ca17aa0ec910e12ae819)
@@ -62,12 +62,33 @@
 	#endif
 
-	struct coStack_t {
-		size_t size;									// size of stack
-		void * storage;									// pointer to stack
-		void * limit;									// stack grows towards stack limit
-		void * base;									// base of stack
-		void * context;									// address of cfa_context_t
-		void * top;										// address of top of storage
-		bool userStack;									// whether or not the user allocated the stack
+	struct __stack_context_t {
+		void * SP;
+		void * FP;
+	};
+
+	// low adresses  :           +----------------------+ <- start of allocation
+	//                           |  optional guard page |
+	//                           +----------------------+ <- __stack_t.limit
+	//                           |                      |
+	//                           |       /\ /\ /\       |
+	//                           |       || || ||       |
+	//                           |                      |
+	//                           |    program  stack    |
+	//                           |                      |
+	// __stack_info_t.storage -> +----------------------+ <- __stack_t.base
+	//                           |      __stack_t       |
+	// high adresses :           +----------------------+ <- end of allocation
+
+	struct __stack_t {
+		// stack grows towards stack limit
+		void * limit;
+
+		// base of stack
+		void * base;
+	};
+
+	struct __stack_info_t {
+		// pointer to stack
+		struct __stack_t * storage;
 	};
 
@@ -75,15 +96,16 @@
 
 	struct coroutine_desc {
+		// context that is switch during a CtxSwitch
+		struct __stack_context_t context;
+
 		// stack information of the coroutine
-		struct coStack_t stack;
-
-		// textual name for coroutine/task, initialized by uC++ generated code
+		struct __stack_info_t stack;
+
+		// textual name for coroutine/task
 		const char * name;
-
-		// copy of global UNIX variable errno
-		int errno_;
 
 		// current execution status for coroutine
 		enum coroutine_state state;
+
 		// first coroutine to resume this one
 		struct coroutine_desc * starter;
@@ -139,4 +161,12 @@
 	struct thread_desc {
 		// Core threading fields
+		// context that is switch during a CtxSwitch
+		struct __stack_context_t context;
+
+		// current execution status for coroutine
+		enum coroutine_state state;
+
+		//SKULLDUGGERY errno is not save in the thread data structure because returnToKernel appears to be the only function to require saving and restoring it
+
 		// coroutine body used to store context
 		struct coroutine_desc  self_cor;
@@ -169,7 +199,7 @@
 	#ifdef __cforall
 	extern "Cforall" {
-		static inline struct coroutine_desc * volatile active_coroutine() { return TL_GET( this_thread )->curr_cor; }
-		static inline struct thread_desc    * volatile active_thread   () { return TL_GET( this_thread    ); }
-		static inline struct processor      * volatile active_processor() { return TL_GET( this_processor ); } // UNSAFE
+		static inline struct coroutine_desc * active_coroutine() { return TL_GET( this_thread )->curr_cor; }
+		static inline struct thread_desc    * active_thread   () { return TL_GET( this_thread    ); }
+		static inline struct processor      * active_processor() { return TL_GET( this_processor ); } // UNSAFE
 
 		static inline thread_desc * & get_next( thread_desc & this ) {
@@ -230,26 +260,7 @@
 	// assembler routines that performs the context switch
 	extern void CtxInvokeStub( void );
-	void CtxSwitch( void * from, void * to ) asm ("CtxSwitch");
+	extern void CtxSwitch( struct __stack_context_t * from, struct __stack_context_t * to ) asm ("CtxSwitch");
 	// void CtxStore ( void * this ) asm ("CtxStore");
 	// void CtxRet   ( void * dst  ) asm ("CtxRet");
-
-	#if   defined( __i386 )
-	#define CtxGet( ctx ) __asm__ ( \
-			"movl %%esp,%0\n"   \
-			"movl %%ebp,%1\n"   \
-		: "=rm" (ctx.SP), "=rm" (ctx.FP) )
-	#elif defined( __x86_64 )
-	#define CtxGet( ctx ) __asm__ ( \
-			"movq %%rsp,%0\n"   \
-			"movq %%rbp,%1\n"   \
-		: "=rm" (ctx.SP), "=rm" (ctx.FP) )
-	#elif defined( __ARM_ARCH )
-	#define CtxGet( ctx ) __asm__ ( \
-			"mov %0,%%sp\n"   \
-			"mov %1,%%r11\n"   \
-		: "=rm" (ctx.SP), "=rm" (ctx.FP) )
-	#else
-		#error unknown hardware architecture
-	#endif
 
 #endif //_INVOKE_PRIVATE_H_
Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision 1bc59750faba2d00c7c1e3ae02f709af36251377)
+++ libcfa/src/concurrency/kernel.cfa	(revision 673cd637ca3377db61b5ca17aa0ec910e12ae819)
@@ -36,4 +36,73 @@
 #include "invoke.h"
 
+//-----------------------------------------------------------------------------
+// Some assembly required
+#if   defined( __i386 )
+	#define CtxGet( ctx )        \
+		__asm__ volatile (     \
+			"movl %%esp,%0\n"\
+			"movl %%ebp,%1\n"\
+			: "=rm" (ctx.SP),\
+				"=rm" (ctx.FP) \
+		)
+
+	// mxcr : SSE Status and Control bits (control bits are preserved across function calls)
+	// fcw  : X87 FPU control word (preserved across function calls)
+	#define __x87_store         \
+		uint32_t __mxcr;      \
+		uint16_t __fcw;       \
+		__asm__ volatile (    \
+			"stmxcsr %0\n"  \
+			"fnstcw  %1\n"  \
+			: "=m" (__mxcr),\
+				"=m" (__fcw)  \
+		)
+
+	#define __x87_load         \
+		__asm__ volatile (   \
+			"fldcw  %1\n"  \
+			"ldmxcsr %0\n" \
+			::"m" (__mxcr),\
+				"m" (__fcw)  \
+		)
+
+#elif defined( __x86_64 )
+	#define CtxGet( ctx )        \
+		__asm__ volatile (     \
+			"movq %%rsp,%0\n"\
+			"movq %%rbp,%1\n"\
+			: "=rm" (ctx.SP),\
+				"=rm" (ctx.FP) \
+		)
+
+	#define __x87_store         \
+		uint32_t __mxcr;      \
+		uint16_t __fcw;       \
+		__asm__ volatile (    \
+			"stmxcsr %0\n"  \
+			"fnstcw  %1\n"  \
+			: "=m" (__mxcr),\
+				"=m" (__fcw)  \
+		)
+
+	#define __x87_load          \
+		__asm__ volatile (    \
+			"fldcw  %1\n"   \
+			"ldmxcsr %0\n"  \
+			:: "m" (__mxcr),\
+				"m" (__fcw)  \
+		)
+
+
+#elif defined( __ARM_ARCH )
+#define CtxGet( ctx ) __asm__ ( \
+		"mov %0,%%sp\n"   \
+		"mov %1,%%r11\n"   \
+	: "=rm" (ctx.SP), "=rm" (ctx.FP) )
+#else
+	#error unknown hardware architecture
+#endif
+
+//-----------------------------------------------------------------------------
 //Start and stop routine for the kernel, declared first to make sure they run first
 static void kernel_startup(void)  __attribute__(( constructor( STARTUP_PRIORITY_KERNEL ) ));
@@ -42,8 +111,8 @@
 //-----------------------------------------------------------------------------
 // Kernel storage
-KERNEL_STORAGE(cluster,           mainCluster);
-KERNEL_STORAGE(processor,         mainProcessor);
-KERNEL_STORAGE(thread_desc,       mainThread);
-KERNEL_STORAGE(machine_context_t, mainThreadCtx);
+KERNEL_STORAGE(cluster,		mainCluster);
+KERNEL_STORAGE(processor,	mainProcessor);
+KERNEL_STORAGE(thread_desc,	mainThread);
+KERNEL_STORAGE(__stack_t, 	mainThreadCtx);
 
 cluster     * mainCluster;
@@ -54,4 +123,6 @@
 struct { __dllist_t(cluster) list; __spinlock_t lock; } __cfa_dbg_global_clusters;
 }
+
+size_t __page_size = 0;
 
 //-----------------------------------------------------------------------------
@@ -66,48 +137,43 @@
 // Struct to steal stack
 struct current_stack_info_t {
-	machine_context_t ctx;
-	unsigned int size;		// size of stack
+	__stack_t * storage;		// pointer to stack object
 	void *base;				// base of stack
-	void *storage;			// pointer to stack
 	void *limit;			// stack grows towards stack limit
 	void *context;			// address of cfa_context_t
-	void *top;				// address of top of storage
 };
 
 void ?{}( current_stack_info_t & this ) {
-	CtxGet( this.ctx );
-	this.base = this.ctx.FP;
-	this.storage = this.ctx.SP;
+	__stack_context_t ctx;
+	CtxGet( ctx );
+	this.base = ctx.FP;
 
 	rlimit r;
 	getrlimit( RLIMIT_STACK, &r);
-	this.size = r.rlim_cur;
-
-	this.limit = (void *)(((intptr_t)this.base) - this.size);
+	size_t size = r.rlim_cur;
+
+	this.limit = (void *)(((intptr_t)this.base) - size);
 	this.context = &storage_mainThreadCtx;
-	this.top = this.base;
 }
 
 //-----------------------------------------------------------------------------
 // Main thread construction
-void ?{}( coStack_t & this, current_stack_info_t * info) with( this ) {
-	size      = info->size;
-	storage   = info->storage;
-	limit     = info->limit;
-	base      = info->base;
-	context   = info->context;
-	top       = info->top;
-	userStack = true;
-}
 
 void ?{}( coroutine_desc & this, current_stack_info_t * info) with( this ) {
-	stack{ info };
+	stack.storage = info->storage;
+	with(*stack.storage) {
+		limit     = info->limit;
+		base      = info->base;
+	}
+	__attribute__((may_alias)) intptr_t * istorage = (intptr_t*) &stack.storage;
+	*istorage |= 0x1;
 	name = "Main Thread";
-	errno_ = 0;
 	state = Start;
 	starter = NULL;
+	last = NULL;
+	cancellation = NULL;
 }
 
 void ?{}( thread_desc & this, current_stack_info_t * info) with( this ) {
+	state = Start;
 	self_cor{ info };
 	curr_cor = &self_cor;
@@ -240,11 +306,12 @@
 }
 
+static int * __volatile_errno() __attribute__((noinline));
+static int * __volatile_errno() { asm(""); return &errno; }
+
 // KERNEL ONLY
 // runThread runs a thread by context switching
 // from the processor coroutine to the target thread
-static void runThread(processor * this, thread_desc * dst) {
-	assert(dst->curr_cor);
+static void runThread(processor * this, thread_desc * thrd_dst) {
 	coroutine_desc * proc_cor = get_coroutine(this->runner);
-	coroutine_desc * thrd_cor = dst->curr_cor;
 
 	// Reset the terminating actions here
@@ -252,9 +319,18 @@
 
 	// Update global state
-	kernelTLS.this_thread = dst;
-
-	// Context Switch to the thread
-	ThreadCtxSwitch(proc_cor, thrd_cor);
-	// when ThreadCtxSwitch returns we are back in the processor coroutine
+	kernelTLS.this_thread = thrd_dst;
+
+	// set state of processor coroutine to inactive and the thread to active
+	proc_cor->state = proc_cor->state == Halted ? Halted : Inactive;
+	thrd_dst->state = Active;
+
+	// set context switch to the thread that the processor is executing
+	verify( thrd_dst->context.SP );
+	CtxSwitch( &proc_cor->context, &thrd_dst->context );
+	// when CtxSwitch returns we are back in the processor coroutine
+
+	// set state of processor coroutine to active and the thread to inactive
+	thrd_dst->state = thrd_dst->state == Halted ? Halted : Inactive;
+	proc_cor->state = Active;
 }
 
@@ -262,6 +338,27 @@
 static void returnToKernel() {
 	coroutine_desc * proc_cor = get_coroutine(kernelTLS.this_processor->runner);
-	coroutine_desc * thrd_cor = kernelTLS.this_thread->curr_cor;
-	ThreadCtxSwitch(thrd_cor, proc_cor);
+	thread_desc * thrd_src = kernelTLS.this_thread;
+
+	// set state of current coroutine to inactive
+	thrd_src->state = thrd_src->state == Halted ? Halted : Inactive;
+	proc_cor->state = Active;
+	int local_errno = *__volatile_errno();
+	#if defined( __i386 ) || defined( __x86_64 )
+		__x87_store;
+	#endif
+
+	// set new coroutine that the processor is executing
+	// and context switch to it
+	verify( proc_cor->context.SP );
+	CtxSwitch( &thrd_src->context, &proc_cor->context );
+
+	// set state of new coroutine to active
+	proc_cor->state = proc_cor->state == Halted ? Halted : Inactive;
+	thrd_src->state = Active;
+
+	#if defined( __i386 ) || defined( __x86_64 )
+		__x87_load;
+	#endif
+	*__volatile_errno() = local_errno;
 }
 
@@ -312,9 +409,9 @@
 	// to waste the perfectly valid stack create by pthread.
 	current_stack_info_t info;
-	machine_context_t ctx;
-	info.context = &ctx;
+	__stack_t ctx;
+	info.storage = &ctx;
 	(proc->runner){ proc, &info };
 
-	__cfaabi_dbg_print_safe("Coroutine : created stack %p\n", get_coroutine(proc->runner)->stack.base);
+	__cfaabi_dbg_print_safe("Coroutine : created stack %p\n", get_coroutine(proc->runner)->stack.storage);
 
 	//Set global state
@@ -347,31 +444,24 @@
 
 // KERNEL_ONLY
-void kernel_first_resume(processor * this) {
-	coroutine_desc * src = mainThread->curr_cor;
+void kernel_first_resume( processor * this ) {
+	thread_desc * src = mainThread;
 	coroutine_desc * dst = get_coroutine(this->runner);
 
 	verify( ! kernelTLS.preemption_state.enabled );
 
-	create_stack(&dst->stack, dst->stack.size);
+	__stack_prepare( &dst->stack, 65000 );
 	CtxStart(&this->runner, CtxInvokeCoroutine);
 
 	verify( ! kernelTLS.preemption_state.enabled );
 
-	dst->last = src;
-	dst->starter = dst->starter ? dst->starter : src;
+	dst->last = &src->self_cor;
+	dst->starter = dst->starter ? dst->starter : &src->self_cor;
 
 	// set state of current coroutine to inactive
 	src->state = src->state == Halted ? Halted : Inactive;
 
-	// SKULLDUGGERY normally interrupts are enable before leaving a coroutine ctxswitch.
-	// Therefore, when first creating a coroutine, interrupts are enable before calling the main.
-	// This is consistent with thread creation. However, when creating the main processor coroutine,
-	// we wan't interrupts to be disabled. Therefore, we double-disable interrupts here so they will
-	// stay disabled.
-	disable_interrupts();
-
 	// context switch to specified coroutine
-	assert( src->stack.context );
-	CtxSwitch( src->stack.context, dst->stack.context );
+	verify( dst->context.SP );
+	CtxSwitch( &src->context, &dst->context );
 	// when CtxSwitch returns we are back in the src coroutine
 
@@ -380,4 +470,17 @@
 
 	verify( ! kernelTLS.preemption_state.enabled );
+}
+
+// KERNEL_ONLY
+void kernel_last_resume( processor * this ) {
+	coroutine_desc * src = &mainThread->self_cor;
+	coroutine_desc * dst = get_coroutine(this->runner);
+
+	verify( ! kernelTLS.preemption_state.enabled );
+	verify( dst->starter == src );
+	verify( dst->context.SP );
+
+	// context switch to the processor
+	CtxSwitch( &src->context, &dst->context );
 }
 
@@ -388,5 +491,5 @@
 void ScheduleThread( thread_desc * thrd ) {
 	verify( thrd );
-	verify( thrd->self_cor.state != Halted );
+	verify( thrd->state != Halted );
 
 	verify( ! kernelTLS.preemption_state.enabled );
@@ -545,4 +648,6 @@
 	__cfaabi_dbg_print_safe("Kernel : Starting\n");
 
+	__page_size = sysconf( _SC_PAGESIZE );
+
 	__cfa_dbg_global_clusters.list{ __get };
 	__cfa_dbg_global_clusters.lock{};
@@ -559,4 +664,5 @@
 	mainThread = (thread_desc *)&storage_mainThread;
 	current_stack_info_t info;
+	info.storage = (__stack_t*)&storage_mainThreadCtx;
 	(*mainThread){ &info };
 
@@ -627,5 +733,5 @@
 	// which is currently here
 	__atomic_store_n(&mainProcessor->do_terminate, true, __ATOMIC_RELEASE);
-	returnToKernel();
+	kernel_last_resume( kernelTLS.this_processor );
 	mainThread->self_cor.state = Halted;
 
Index: libcfa/src/concurrency/thread.cfa
===================================================================
--- libcfa/src/concurrency/thread.cfa	(revision 1bc59750faba2d00c7c1e3ae02f709af36251377)
+++ libcfa/src/concurrency/thread.cfa	(revision 673cd637ca3377db61b5ca17aa0ec910e12ae819)
@@ -31,6 +31,7 @@
 // Thread ctors and dtors
 void ?{}(thread_desc & this, const char * const name, cluster & cl, void * storage, size_t storageSize ) with( this ) {
+	context{ NULL, NULL };
 	self_cor{ name, storage, storageSize };
-	verify(&self_cor);
+	state = Start;
 	curr_cor = &self_cor;
 	self_mon.owner = &this;
@@ -73,17 +74,14 @@
 forall( dtype T | is_thread(T) )
 void __thrd_start( T& this ) {
-	coroutine_desc* thrd_c = get_coroutine(this);
-	thread_desc   * thrd_h = get_thread   (this);
-	thrd_c->last = TL_GET( this_thread )->curr_cor;
-
-	// __cfaabi_dbg_print_safe("Thread start : %p (t %p, c %p)\n", this, thrd_c, thrd_h);
+	thread_desc * this_thrd = get_thread(this);
+	thread_desc * curr_thrd = TL_GET( this_thread );
 
 	disable_interrupts();
-	create_stack(&thrd_c->stack, thrd_c->stack.size);
 	CtxStart(&this, CtxInvokeThread);
-	assert( thrd_c->last->stack.context );
-	CtxSwitch( thrd_c->last->stack.context, thrd_c->stack.context );
+	this_thrd->context.[SP, FP] = this_thrd->self_cor.context.[SP, FP];
+	verify( this_thrd->context.SP );
+	CtxSwitch( &curr_thrd->context, &this_thrd->context );
 
-	ScheduleThread(thrd_h);
+	ScheduleThread(this_thrd);
 	enable_interrupts( __cfaabi_dbg_ctx );
 }
@@ -91,6 +89,10 @@
 extern "C" {
 	// KERNEL ONLY
-	void __finish_creation(coroutine_desc * thrd_c) {
-		ThreadCtxSwitch( thrd_c, thrd_c->last );
+	void __finish_creation(thread_desc * this) {
+		// set new coroutine that the processor is executing
+		// and context switch to it
+		verify( kernelTLS.this_thread != this );
+		verify( kernelTLS.this_thread->context.SP );
+		CtxSwitch( &this->context, &kernelTLS.this_thread->context );
 	}
 }
@@ -110,20 +112,4 @@
 }
 
-// KERNEL ONLY
-void ThreadCtxSwitch(coroutine_desc* src, coroutine_desc* dst) {
-	// set state of current coroutine to inactive
-	src->state = src->state == Halted ? Halted : Inactive;
-	dst->state = Active;
-
-	// set new coroutine that the processor is executing
-	// and context switch to it
-	assert( src->stack.context );
-	CtxSwitch( src->stack.context, dst->stack.context );
-
-	// set state of new coroutine to active
-	dst->state = dst->state == Halted ? Halted : Inactive;
-	src->state = Active;
-}
-
 // Local Variables: //
 // mode: c //
Index: libcfa/src/concurrency/thread.hfa
===================================================================
--- libcfa/src/concurrency/thread.hfa	(revision 1bc59750faba2d00c7c1e3ae02f709af36251377)
+++ libcfa/src/concurrency/thread.hfa	(revision 673cd637ca3377db61b5ca17aa0ec910e12ae819)
@@ -61,12 +61,12 @@
 void ^?{}(thread_desc & this);
 
-static inline void ?{}(thread_desc & this)                                                                  { this{ "Anonymous Thread", *mainCluster, NULL, 0 }; }
+static inline void ?{}(thread_desc & this)                                                                  { this{ "Anonymous Thread", *mainCluster, NULL, 65000 }; }
 static inline void ?{}(thread_desc & this, size_t stackSize )                                               { this{ "Anonymous Thread", *mainCluster, NULL, stackSize }; }
 static inline void ?{}(thread_desc & this, void * storage, size_t storageSize )                             { this{ "Anonymous Thread", *mainCluster, storage, storageSize }; }
-static inline void ?{}(thread_desc & this, struct cluster & cl )                                            { this{ "Anonymous Thread", cl, NULL, 0 }; }
-static inline void ?{}(thread_desc & this, struct cluster & cl, size_t stackSize )                          { this{ "Anonymous Thread", cl, 0, stackSize }; }
+static inline void ?{}(thread_desc & this, struct cluster & cl )                                            { this{ "Anonymous Thread", cl, NULL, 65000 }; }
+static inline void ?{}(thread_desc & this, struct cluster & cl, size_t stackSize )                          { this{ "Anonymous Thread", cl, NULL, stackSize }; }
 static inline void ?{}(thread_desc & this, struct cluster & cl, void * storage, size_t storageSize )        { this{ "Anonymous Thread", cl, storage, storageSize }; }
-static inline void ?{}(thread_desc & this, const char * const name)                                         { this{ name, *mainCluster, NULL, 0 }; }
-static inline void ?{}(thread_desc & this, const char * const name, struct cluster & cl )                   { this{ name, cl, NULL, 0 }; }
+static inline void ?{}(thread_desc & this, const char * const name)                                         { this{ name, *mainCluster, NULL, 65000 }; }
+static inline void ?{}(thread_desc & this, const char * const name, struct cluster & cl )                   { this{ name, cl, NULL, 65000 }; }
 static inline void ?{}(thread_desc & this, const char * const name, struct cluster & cl, size_t stackSize ) { this{ name, cl, NULL, stackSize }; }
 
Index: libcfa/src/time.hfa
===================================================================
--- libcfa/src/time.hfa	(revision 1bc59750faba2d00c7c1e3ae02f709af36251377)
+++ libcfa/src/time.hfa	(revision 673cd637ca3377db61b5ca17aa0ec910e12ae819)
@@ -30,5 +30,5 @@
 
 static inline {
-	Duration ?=?( Duration & dur, zero_t ) { return dur{ 0 }; }
+	Duration ?=?( Duration & dur, __attribute__((unused)) zero_t ) { return dur{ 0 }; }
 
 	Duration +?( Duration rhs ) with( rhs ) {	return (Duration)@{ +tv }; }
@@ -59,10 +59,10 @@
 	bool ?>=?( Duration lhs, Duration rhs ) { return lhs.tv >= rhs.tv; }
 
-	bool ?==?( Duration lhs, zero_t ) { return lhs.tv == 0; }
-	bool ?!=?( Duration lhs, zero_t ) { return lhs.tv != 0; }
-	bool ?<? ( Duration lhs, zero_t ) { return lhs.tv <  0; }
-	bool ?<=?( Duration lhs, zero_t ) { return lhs.tv <= 0; }
-	bool ?>? ( Duration lhs, zero_t ) { return lhs.tv >  0; }
-	bool ?>=?( Duration lhs, zero_t ) { return lhs.tv >= 0; }
+	bool ?==?( Duration lhs, __attribute__((unused)) zero_t ) { return lhs.tv == 0; }
+	bool ?!=?( Duration lhs, __attribute__((unused)) zero_t ) { return lhs.tv != 0; }
+	bool ?<? ( Duration lhs, __attribute__((unused)) zero_t ) { return lhs.tv <  0; }
+	bool ?<=?( Duration lhs, __attribute__((unused)) zero_t ) { return lhs.tv <= 0; }
+	bool ?>? ( Duration lhs, __attribute__((unused)) zero_t ) { return lhs.tv >  0; }
+	bool ?>=?( Duration lhs, __attribute__((unused)) zero_t ) { return lhs.tv >= 0; }
 
 	Duration abs( Duration rhs ) { return rhs.tv >= 0 ? rhs : -rhs; }
@@ -101,7 +101,7 @@
 	void ?{}( timeval & t, time_t sec, suseconds_t usec ) { t.tv_sec = sec; t.tv_usec = usec; }
 	void ?{}( timeval & t, time_t sec ) { t{ sec, 0 }; }
-	void ?{}( timeval & t, zero_t ) { t{ 0, 0 }; }
-
-	timeval ?=?( timeval & t, zero_t ) { return t{ 0 }; }
+	void ?{}( timeval & t, __attribute__((unused)) zero_t ) { t{ 0, 0 }; }
+
+	timeval ?=?( timeval & t, __attribute__((unused)) zero_t ) { return t{ 0 }; }
 	timeval ?+?( timeval lhs, timeval rhs ) { return (timeval)@{ lhs.tv_sec + rhs.tv_sec, lhs.tv_usec + rhs.tv_usec }; }
 	timeval ?-?( timeval lhs, timeval rhs ) { return (timeval)@{ lhs.tv_sec - rhs.tv_sec, lhs.tv_usec - rhs.tv_usec }; }
@@ -116,7 +116,7 @@
 	void ?{}( timespec & t, time_t sec, __syscall_slong_t nsec ) { t.tv_sec = sec; t.tv_nsec = nsec; }
 	void ?{}( timespec & t, time_t sec ) { t{ sec, 0}; }
-	void ?{}( timespec & t, zero_t ) { t{ 0, 0 }; }
-
-	timespec ?=?( timespec & t, zero_t ) { return t{ 0 }; }
+	void ?{}( timespec & t, __attribute__((unused)) zero_t ) { t{ 0, 0 }; }
+
+	timespec ?=?( timespec & t, __attribute__((unused)) zero_t ) { return t{ 0 }; }
 	timespec ?+?( timespec lhs, timespec rhs ) { return (timespec)@{ lhs.tv_sec + rhs.tv_sec, lhs.tv_nsec + rhs.tv_nsec }; }
 	timespec ?-?( timespec lhs, timespec rhs ) { return (timespec)@{ lhs.tv_sec - rhs.tv_sec, lhs.tv_nsec - rhs.tv_nsec }; }
@@ -145,5 +145,5 @@
 void ?{}( Time & time, int year, int month = 0, int day = 0, int hour = 0, int min = 0, int sec = 0, int nsec = 0 );
 static inline {
-	Time ?=?( Time & time, zero_t ) { return time{ 0 }; }
+	Time ?=?( Time & time, __attribute__((unused)) zero_t ) { return time{ 0 }; }
 
 	void ?{}( Time & time, timeval t ) with( time ) { tv = (int64_t)t.tv_sec * TIMEGRAN + t.tv_usec * 1000; }
Index: libcfa/src/time_t.hfa
===================================================================
--- libcfa/src/time_t.hfa	(revision 1bc59750faba2d00c7c1e3ae02f709af36251377)
+++ libcfa/src/time_t.hfa	(revision 673cd637ca3377db61b5ca17aa0ec910e12ae819)
@@ -24,5 +24,5 @@
 
 static inline void ?{}( Duration & dur ) with( dur ) { tv = 0; }
-static inline void ?{}( Duration & dur, zero_t ) with( dur ) { tv = 0; }
+static inline void ?{}( Duration & dur, __attribute__((unused)) zero_t ) with( dur ) { tv = 0; }
 
 
@@ -34,5 +34,5 @@
 
 static inline void ?{}( Time & time ) with( time ) { tv = 0; }
-static inline void ?{}( Time & time, zero_t ) with( time ) { tv = 0; }
+static inline void ?{}( Time & time, __attribute__((unused)) zero_t ) with( time ) { tv = 0; }
 
 // Local Variables: //
Index: tests/Makefile.am
===================================================================
--- tests/Makefile.am	(revision 1bc59750faba2d00c7c1e3ae02f709af36251377)
+++ tests/Makefile.am	(revision 673cd637ca3377db61b5ca17aa0ec910e12ae819)
@@ -22,4 +22,7 @@
 debug=yes
 installed=no
+
+INSTALL_FLAGS=-in-tree
+DEBUG_FLAGS=-debug -O0
 
 quick_test=avl_test operators numericConstants expression enum array typeof cast raii/dtor-early-exit raii/init_once attributes
Index: tests/Makefile.in
===================================================================
--- tests/Makefile.in	(revision 1bc59750faba2d00c7c1e3ae02f709af36251377)
+++ tests/Makefile.in	(revision 673cd637ca3377db61b5ca17aa0ec910e12ae819)
@@ -375,4 +375,6 @@
 debug = yes
 installed = no
+INSTALL_FLAGS = -in-tree
+DEBUG_FLAGS = -debug -O0
 quick_test = avl_test operators numericConstants expression enum array typeof cast raii/dtor-early-exit raii/init_once attributes
 concurrent = 
