Index: libcfa/src/concurrency/coroutine.cfa
===================================================================
--- libcfa/src/concurrency/coroutine.cfa	(revision c920317584861b5ec954f091f900bd1c5aa59a72)
+++ libcfa/src/concurrency/coroutine.cfa	(revision bfcf6b93d595af7bc5fcb05405720c63f503db8f)
@@ -28,4 +28,5 @@
 #include "kernel_private.hfa"
 #include "exception.hfa"
+#include "math.hfa"
 
 #define __CFA_INVOKE_PRIVATE__
@@ -87,4 +88,5 @@
 
 void __stack_prepare( __stack_info_t * this, size_t create_size );
+void __stack_clean  ( __stack_info_t * this );
 
 //-----------------------------------------------------------------------------
@@ -107,16 +109,17 @@
 	bool userStack = ((intptr_t)this.storage & 0x1) != 0;
 	if ( ! userStack && this.storage ) {
-		__attribute__((may_alias)) intptr_t * istorage = (intptr_t *)&this.storage;
-		*istorage &= (intptr_t)-1;
-
-		void * storage = this.storage->limit;
-		__cfaabi_dbg_debug_do(
-			storage = (char*)(storage) - __page_size;
-			if ( mprotect( storage, __page_size, PROT_READ | PROT_WRITE ) == -1 ) {
-				abort( "(coStack_t *)%p.^?{}() : internal error, mprotect failure, error(%d) %s.", &this, errno, strerror( errno ) );
-			}
-		);
-		__cfaabi_dbg_print_safe("Kernel : Deleting stack %p\n", storage);
-		free( storage );
+		__stack_clean( &this );
+		// __attribute__((may_alias)) intptr_t * istorage = (intptr_t *)&this.storage;
+		// *istorage &= (intptr_t)-1;
+
+		// void * storage = this.storage->limit;
+		// __cfaabi_dbg_debug_do(
+		// 	storage = (char*)(storage) - __page_size;
+		// 	if ( mprotect( storage, __page_size, PROT_READ | PROT_WRITE ) == -1 ) {
+		// 		abort( "(coStack_t *)%p.^?{}() : internal error, mprotect failure, error(%d) %s.", &this, errno, strerror( errno ) );
+		// 	}
+		// );
+		// __cfaabi_dbg_print_safe("Kernel : Deleting stack %p\n", storage);
+		// free( storage );
 	}
 }
@@ -167,24 +170,43 @@
 	assert(__page_size != 0l);
 	size_t size = libCeiling( storageSize, 16 ) + stack_data_size;
+	size = ceiling(size, __page_size);
 
 	// If we are running debug, we also need to allocate a guardpage to catch stack overflows.
 	void * storage;
-	__cfaabi_dbg_debug_do(
-		storage = memalign( __page_size, size + __page_size );
-	);
-	__cfaabi_dbg_no_debug_do(
-		storage = (void*)malloc(size);
-	);
-
-	__cfaabi_dbg_print_safe("Kernel : Created stack %p of size %zu\n", storage, size);
-	__cfaabi_dbg_debug_do(
-		if ( mprotect( storage, __page_size, PROT_NONE ) == -1 ) {
-			abort( "__stack_alloc : internal error, mprotect failure, error(%d) %s.", (int)errno, strerror( (int)errno ) );
-		}
-		storage = (void *)(((intptr_t)storage) + __page_size);
-	);
+	// __cfaabi_dbg_debug_do(
+	// 	storage = memalign( __page_size, size + __page_size );
+	// );
+	// __cfaabi_dbg_no_debug_do(
+	// 	storage = (void*)malloc(size);
+	// );
+
+	// __cfaabi_dbg_print_safe("Kernel : Created stack %p of size %zu\n", storage, size);
+	// __cfaabi_dbg_debug_do(
+	// 	if ( mprotect( storage, __page_size, PROT_NONE ) == -1 ) {
+	// 		abort( "__stack_alloc : internal error, mprotect failure, error(%d) %s.", (int)errno, strerror( (int)errno ) );
+	// 	}
+	// 	storage = (void *)(((intptr_t)storage) + __page_size);
+	// );
+	storage = mmap(0p, size + __page_size, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+	if(storage == ((void*)-1)) {
+		abort( "coroutine stack creation : internal error, mmap failure, error(%d) %s.", errno, strerror( errno ) );
+	}
+	if ( mprotect( storage, __page_size, PROT_NONE ) == -1 ) {
+		abort( "coroutine stack creation : internal error, mprotect failure, error(%d) %s.", errno, strerror( errno ) );
+	} // if
+	storage = (void *)(((intptr_t)storage) + __page_size);
 
 	verify( ((intptr_t)storage & (libAlign() - 1)) == 0ul );
 	return [storage, size];
+}
+
+void __stack_clean  ( __stack_info_t * this ) {
+	size_t size = ((intptr_t)this->storage->base) - ((intptr_t)this->storage->limit) + sizeof(__stack_t);
+	void * storage = this->storage->limit;
+
+	storage = (void *)(((intptr_t)storage) - __page_size);
+	if(munmap(storage, size + __page_size) == -1) {
+		abort( "coroutine stack destruction : internal error, munmap failure, error(%d) %s.", errno, strerror( errno ) );
+	}
 }
 
@@ -210,7 +232,7 @@
 	assertf( size >= MinStackSize, "Stack size %zd provides less than minimum of %zd bytes for a stack.", size, MinStackSize );
 
-	this->storage = (__stack_t *)((intptr_t)storage + size);
+	this->storage = (__stack_t *)((intptr_t)storage + size - sizeof(__stack_t));
 	this->storage->limit = storage;
-	this->storage->base  = (void*)((intptr_t)storage + size);
+	this->storage->base  = (void*)((intptr_t)storage + size - sizeof(__stack_t));
 	this->storage->exception_context.top_resume = 0p;
 	this->storage->exception_context.current_exception = 0p;
Index: libcfa/src/concurrency/coroutine.hfa
===================================================================
--- libcfa/src/concurrency/coroutine.hfa	(revision c920317584861b5ec954f091f900bd1c5aa59a72)
+++ libcfa/src/concurrency/coroutine.hfa	(revision bfcf6b93d595af7bc5fcb05405720c63f503db8f)
@@ -102,5 +102,7 @@
 }
 
-extern void __stack_prepare   ( __stack_info_t * this, size_t size /* ignored if storage already allocated */);
+extern void __stack_prepare( __stack_info_t * this, size_t size /* ignored if storage already allocated */);
+extern void __stack_clean  ( __stack_info_t * this );
+
 
 // Suspend implementation inlined for performance
Index: libcfa/src/concurrency/io/setup.cfa
===================================================================
--- libcfa/src/concurrency/io/setup.cfa	(revision c920317584861b5ec954f091f900bd1c5aa59a72)
+++ libcfa/src/concurrency/io/setup.cfa	(revision bfcf6b93d595af7bc5fcb05405720c63f503db8f)
@@ -132,6 +132,5 @@
 		// Wait for the io poller thread to finish
 
-		pthread_join( iopoll.thrd, 0p );
-		free( iopoll.stack );
+		__destroy_pthread( iopoll.thrd, iopoll.stack, 0p );
 
 		int ret = close(iopoll.epollfd);
Index: libcfa/src/concurrency/kernel/startup.cfa
===================================================================
--- libcfa/src/concurrency/kernel/startup.cfa	(revision c920317584861b5ec954f091f900bd1c5aa59a72)
+++ libcfa/src/concurrency/kernel/startup.cfa	(revision bfcf6b93d595af7bc5fcb05405720c63f503db8f)
@@ -29,4 +29,5 @@
 #include "kernel_private.hfa"
 #include "startup.hfa"          // STARTUP_PRIORITY_XXX
+#include "math.hfa"
 
 //-----------------------------------------------------------------------------
@@ -539,4 +540,5 @@
 }
 
+extern size_t __page_size;
 void ^?{}(processor & this) with( this ){
 	if( ! __atomic_load_n(&do_terminate, __ATOMIC_ACQUIRE) ) {
@@ -550,8 +552,5 @@
 	}
 
-	int err = pthread_join( kernel_thread, 0p );
-	if( err != 0 ) abort("KERNEL ERROR: joining processor %p caused error %s\n", &this, strerror(err));
-
-	free( this.stack );
+	__destroy_pthread( kernel_thread, this.stack, 0p );
 
 	disable_interrupts();
@@ -678,14 +677,23 @@
 
 	void * stack;
-	__cfaabi_dbg_debug_do(
-		stack = memalign( __page_size, stacksize + __page_size );
-		// pthread has no mechanism to create the guard page in user supplied stack.
-		if ( mprotect( stack, __page_size, PROT_NONE ) == -1 ) {
-			abort( "mprotect : internal error, mprotect failure, error(%d) %s.", errno, strerror( errno ) );
-		} // if
-	);
-	__cfaabi_dbg_no_debug_do(
-		stack = malloc( stacksize );
-	);
+	#warning due to the thunk problem, stack creation uses mmap, revert to malloc once this goes away
+	// __cfaabi_dbg_debug_do(
+	// 	stack = memalign( __page_size, stacksize + __page_size );
+	// 	// pthread has no mechanism to create the guard page in user supplied stack.
+	// 	if ( mprotect( stack, __page_size, PROT_NONE ) == -1 ) {
+	// 		abort( "mprotect : internal error, mprotect failure, error(%d) %s.", errno, strerror( errno ) );
+	// 	} // if
+	// );
+	// __cfaabi_dbg_no_debug_do(
+	// 	stack = malloc( stacksize );
+	// );
+	stacksize = ceiling( stacksize, __page_size ) + __page_size;
+	stack = mmap(0p, stacksize, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+	if(stack == ((void*)-1)) {
+		abort( "pthread stack creation : internal error, mmap failure, error(%d) %s.", errno, strerror( errno ) );
+	}
+	if ( mprotect( stack, __page_size, PROT_NONE ) == -1 ) {
+		abort( "pthread stack creation : internal error, mprotect failure, error(%d) %s.", errno, strerror( errno ) );
+	} // if
 
 	check( pthread_attr_setstack( &attr, stack, stacksize ), "pthread_attr_setstack" );
@@ -694,4 +702,24 @@
 	return stack;
 }
+
+void __destroy_pthread( pthread_t pthread, void * stack, void ** retval ) {
+	int err = pthread_join( pthread, retval );
+	if( err != 0 ) abort("KERNEL ERROR: joining pthread %p caused error %s\n", (void*)pthread, strerror(err));
+
+	pthread_attr_t attr;
+
+	check( pthread_attr_init( &attr ), "pthread_attr_init" ); // initialize attribute
+
+	size_t stacksize;
+	// default stack size, normally defined by shell limit
+	check( pthread_attr_getstacksize( &attr, &stacksize ), "pthread_attr_getstacksize" );
+	assert( stacksize >= PTHREAD_STACK_MIN );
+	stacksize += __page_size;
+
+	if(munmap(stack, stacksize) == -1) {
+		abort( "pthread stack destruction : internal error, munmap failure, error(%d) %s.", errno, strerror( errno ) );
+	}
+}
+
 
 #if defined(__CFA_WITH_VERIFY__)
Index: libcfa/src/concurrency/kernel_private.hfa
===================================================================
--- libcfa/src/concurrency/kernel_private.hfa	(revision c920317584861b5ec954f091f900bd1c5aa59a72)
+++ libcfa/src/concurrency/kernel_private.hfa	(revision bfcf6b93d595af7bc5fcb05405720c63f503db8f)
@@ -49,4 +49,5 @@
 
 void * __create_pthread( pthread_t *, void * (*)(void *), void * );
+void __destroy_pthread( pthread_t pthread, void * stack, void ** retval );
 
 
Index: libcfa/src/concurrency/preemption.cfa
===================================================================
--- libcfa/src/concurrency/preemption.cfa	(revision c920317584861b5ec954f091f900bd1c5aa59a72)
+++ libcfa/src/concurrency/preemption.cfa	(revision bfcf6b93d595af7bc5fcb05405720c63f503db8f)
@@ -536,6 +536,5 @@
 	// Wait for the preemption thread to finish
 
-	pthread_join( alarm_thread, 0p );
-	free( alarm_stack );
+	__destroy_pthread( alarm_thread, alarm_stack, 0p );
 
 	// Preemption is now fully stopped
