Index: src/libcfa/concurrency/coroutines
===================================================================
--- src/libcfa/concurrency/coroutines	(revision aed3f54d12363aaa409e62e26dd7835d4eea23dc)
+++ src/libcfa/concurrency/coroutines	(revision 6a3d2e7f051b0a830ce5f421976d81987675c0fa)
@@ -0,0 +1,118 @@
+//                              -*- Mode: CFA -*-
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// coroutines --
+//
+// Author           : Thierry Delisle
+// Created On       : Mon Nov 28 12:27:26 2016
+// Last Modified By : Thierry Delisle
+// Last Modified On : Mon Nov 28 12:27:26 2016
+// Update Count     : 0
+//
+
+#ifndef COROUTINES_H
+#define COROUTINES_H
+
+#include "assert"       //
+#include "invoke.h"
+
+//-----------------------------------------------------------------------------
+// Coroutine trait
+// Anything that implements this trait can be resumed.
+// Anything that is resumed is a coroutine.
+trait is_coroutine(dtype T) {
+      void co_main(T* this);
+      coroutine* get_coroutine(T* this);
+};
+
+//-----------------------------------------------------------------------------
+// Ctors and dtors
+void ?{}(coStack_t* this);
+void ?{}(coroutine* this);
+void ^?{}(coStack_t* this);
+void ^?{}(coroutine* this);
+
+//-----------------------------------------------------------------------------
+// Public coroutine API
+static inline void suspend();
+
+forall(dtype T | is_coroutine(T))
+static inline void resume(T* cor);
+
+forall(dtype T | is_coroutine(T))
+void prime(T* cor);
+
+//-----------------------------------------------------------------------------
+// PRIVATE exposed because of inline
+
+// Start coroutine routines
+extern "C" {
+      forall(dtype T | is_coroutine(T))
+      void CtxInvokeCoroutine(T* this);
+
+      forall(dtype T | is_coroutine(T))
+      void CtxStart(T* this, void (*invoke)(T*));
+}
+
+// Get current coroutine
+extern coroutine* current_coroutine; //PRIVATE, never use directly
+static inline coroutine* this_coroutine(void) {
+	return current_coroutine;
+}
+
+// Private wrappers for context switch and stack creation
+extern void corCxtSw(coroutine* src, coroutine* dst);
+extern void create_stack( coStack_t* this, unsigned int storageSize );
+
+// Suspend implementation inlined for performance
+static inline void suspend() {
+      coroutine* src = this_coroutine();		// optimization
+
+	assertf( src->last != 0,
+		"Attempt to suspend coroutine %.256s (%p) that has never been resumed.\n"
+		"Possible cause is a suspend executed in a member called by a coroutine user rather than by the coroutine main.",
+		src->name, src );
+	assertf( src->last->notHalted,
+		"Attempt by coroutine %.256s (%p) to suspend back to terminated coroutine %.256s (%p).\n"
+		"Possible cause is terminated coroutine's main routine has already returned.",
+		src->name, src, src->last->name, src->last );
+
+	corCxtSw( src, src->last );
+}
+
+// Resume implementation inlined for performance
+forall(dtype T | is_coroutine(T))
+static inline void resume(T* cor) {
+	coroutine* src = this_coroutine();		// optimization
+	coroutine* dst = get_coroutine(cor);
+
+      if( unlikely(!dst->stack.base) ) {
+		create_stack(&dst->stack, dst->stack.size);
+		CtxStart(cor, CtxInvokeCoroutine);
+	}
+
+      // not resuming self ?
+	if ( src != dst ) {
+		assertf( dst->notHalted ,
+			"Attempt by coroutine %.256s (%p) to resume terminated coroutine %.256s (%p).\n"
+			"Possible cause is terminated coroutine's main routine has already returned.",
+			src->name, src, dst->name, dst );
+
+            // set last resumer
+		dst->last = src;
+	} // if
+
+      // always done for performance testing
+	corCxtSw( src, dst );
+}
+
+#endif //COROUTINES_H
+
+// Local Variables: //
+// mode: c //
+// tab-width: 4 //
+// End: //
Index: src/libcfa/concurrency/coroutines.c
===================================================================
--- src/libcfa/concurrency/coroutines.c	(revision aed3f54d12363aaa409e62e26dd7835d4eea23dc)
+++ src/libcfa/concurrency/coroutines.c	(revision 6a3d2e7f051b0a830ce5f421976d81987675c0fa)
@@ -0,0 +1,182 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// coroutines.c --
+//
+// Author           : Thierry Delisle
+// Created On       : Mon Nov 28 12:27:26 2016
+// Last Modified By : Thierry Delisle
+// Last Modified On : Mon Nov 28 12:27:26 2016
+// Update Count     : 0
+//
+
+extern "C" {
+#include <stddef.h>
+#include <malloc.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+}
+
+#include "coroutines"
+#include "libhdr.h"
+
+#define __CFA_INVOKE_PRIVATE__
+#include "invoke.h"
+
+//-----------------------------------------------------------------------------
+// Global state variables
+
+// minimum feasible stack size in bytes
+#define MinStackSize 1000
+static size_t pageSize = 0;				// architecture pagesize HACK, should go in proper runtime singleton
+
+//Extra private desctructor for the main
+//FIXME the main should not actually allocate a stack
+//Since the main is never resumed the extra stack does not cause 
+//any problem but it is wasted memory
+void ?{}(coStack_t* this, size_t size);
+void ?{}(coroutine* this, size_t size);
+
+//Main coroutine
+//FIXME do not construct a stack for the main
+coroutine main_coroutine = { 1000 };
+
+//Current coroutine
+//Will need to be in TLS when multi-threading is added
+coroutine* current_coroutine = &main_coroutine;
+
+//-----------------------------------------------------------------------------
+// Coroutine ctors and dtors
+void ?{}(coStack_t* this) {
+	this->size		= 10240;	// size of stack
+	this->storage	= NULL;	// pointer to stack
+	this->limit		= NULL;	// stack grows towards stack limit
+	this->base		= NULL;	// base of stack
+	this->context	= NULL;	// address of cfa_context_t
+	this->top		= NULL;	// address of top of storage
+	this->userStack	= false;	
+}
+
+void ?{}(coStack_t* this, size_t size) {
+	this{};
+	this->size = size;
+
+	create_stack(this, this->size);
+}
+
+void ?{}(coroutine* this) {
+	this->name = "Anonymous Coroutine";
+	this->errno_ = 0;
+	this->state = Start;
+      this->notHalted = true;
+	this->starter = NULL;
+	this->last = NULL;
+}
+
+void ?{}(coroutine* this, size_t size) {
+	this{};
+	(&this->stack){size};
+}
+
+void ^?{}(coStack_t* this) {
+	if ( ! this->userStack ) {
+		LIB_DEBUG_DO(
+			if ( mprotect( this->storage, pageSize, PROT_READ | PROT_WRITE ) == -1 ) {
+				abortf( "(coStack_t *)%p.^?{}() : internal error, mprotect failure, error(%d) %s.", this, errno, strerror( errno ) );
+			}
+		);
+		free( this->storage );
+	}
+}
+
+void ^?{}(coroutine* this) {}
+
+// Part of the Public API
+// Not inline since only ever called once per coroutine
+forall(dtype T | is_coroutine(T))
+void prime(T* cor) {
+	coroutine* this = get_coroutine(cor);
+	assert(this->state == Start);
+
+	this->state = Primed;
+	resume(cor);
+}
+
+// We need to call suspend from invoke.c, so we expose this wrapper that
+// is not inline (We can't inline Cforall in C)
+void suspend_no_inline(void) {
+	suspend();
+}
+
+void corCxtSw(coroutine* src, coroutine* dst) {
+	// THREAD_GETMEM( This )->disableInterrupts();
+
+	// set state of current coroutine to inactive
+	src->state = Inactive;
+
+	// set new coroutine that task is executing
+	current_coroutine = dst;			
+
+	// context switch to specified coroutine
+	CtxSwitch( src->stack.context, dst->stack.context );
+	// when CtxSwitch returns we are back in the src coroutine		
+
+	// set state of new coroutine to active
+	src->state = Active;
+
+	// THREAD_GETMEM( This )->enableInterrupts();
+} //ctxSwitchDirect
+
+void create_stack( coStack_t* this, unsigned int storageSize ) {
+	//TEMP HACK do this on proper kernel startup
+	if(pageSize == 0ul) pageSize = sysconf( _SC_PAGESIZE );
+
+	size_t cxtSize = libCeiling( sizeof(machine_context_t), 8 ); // minimum alignment
+
+	if ( (intptr_t)this->storage == 0 ) {
+		this->userStack = false;
+		this->size = libCeiling( storageSize, 16 );
+		// use malloc/memalign because "new" raises an exception for out-of-memory
+		
+		// assume malloc has 8 byte alignment so add 8 to allow rounding up to 16 byte alignment
+		LIB_DEBUG_DO( this->storage = memalign( pageSize, cxtSize + this->size + pageSize ) );
+		LIB_NO_DEBUG_DO( this->storage = malloc( cxtSize + this->size + 8 ) );
+
+		LIB_DEBUG_DO(
+			if ( mprotect( this->storage, pageSize, PROT_NONE ) == -1 ) {
+				abortf( "(uMachContext &)%p.createContext() : internal error, mprotect failure, error(%d) %s.", this, (int)errno, strerror( (int)errno ) );
+			} // if
+		);
+
+		if ( (intptr_t)this->storage == 0 ) {
+			abortf( "Attempt to allocate %d bytes of storage for coroutine or task execution-state but insufficient memory available.", this->size );
+		} // if
+
+		LIB_DEBUG_DO( this->limit = (char *)this->storage + pageSize );
+		LIB_NO_DEBUG_DO( this->limit = (char *)libCeiling( (unsigned long)this->storage, 16 ) ); // minimum alignment
+
+	} else {
+		assertf( ((size_t)this->storage & (libAlign() - 1)) != 0ul, "Stack storage %p for task/coroutine must be aligned on %d byte boundary.", this->storage, (int)libAlign() );
+		this->userStack = true;
+		this->size = storageSize - cxtSize;
+
+		if ( this->size % 16 != 0u ) this->size -= 8;
+
+		this->limit = (char *)libCeiling( (unsigned long)this->storage, 16 ); // minimum alignment
+	} // if
+	assertf( this->size >= MinStackSize, "Stack size %d provides less than minimum of %d bytes for a stack.", this->size, MinStackSize );
+
+	this->base = (char *)this->limit + this->size;
+	this->context = this->base;
+	this->top = (char *)this->context + cxtSize;
+}
+
+// Local Variables: //
+// mode: c //
+// tab-width: 4 //
+// End: //
Index: src/libcfa/concurrency/threads
===================================================================
--- src/libcfa/concurrency/threads	(revision aed3f54d12363aaa409e62e26dd7835d4eea23dc)
+++ src/libcfa/concurrency/threads	(revision 6a3d2e7f051b0a830ce5f421976d81987675c0fa)
@@ -9,7 +9,7 @@
 //
 // Author           : Thierry Delisle
-// Created On       : Mon Nov 28 12:27:26 2016
+// Created On       : Tue Jan 17 12:27:26 2016
 // Last Modified By : Thierry Delisle
-// Last Modified On : Mon Nov 28 12:27:26 2016
+// Last Modified On : --
 // Update Count     : 0
 //
@@ -18,96 +18,5 @@
 #define THREADS_H
 
-#include "assert"       //
-#include "invoke.h"
 
-//-----------------------------------------------------------------------------
-// Coroutine trait
-// Anything that implements this trait can be resumed.
-// Anything that is resumed is a coroutine.
-trait is_coroutine(dtype T) {
-      void co_main(T* this);
-      coroutine* get_coroutine(T* this);
-};
-
-//-----------------------------------------------------------------------------
-// Ctors and dtors
-void ?{}(coStack_t* this);
-void ?{}(coroutine* this);
-void ^?{}(coStack_t* this);
-void ^?{}(coroutine* this);
-
-//-----------------------------------------------------------------------------
-// Public coroutine API
-static inline void suspend();
-
-forall(dtype T | is_coroutine(T))
-static inline void resume(T* cor);
-
-forall(dtype T | is_coroutine(T))
-void prime(T* cor);
-
-//-----------------------------------------------------------------------------
-// PRIVATE exposed because of inline
-
-// Start coroutine routines
-extern "C" {
-      forall(dtype T | is_coroutine(T))
-      void CtxInvokeCoroutine(T* this);
-
-      forall(dtype T | is_coroutine(T))
-      void CtxStart(T* this, void (*invoke)(T*));
-}
-
-// Get current coroutine
-extern coroutine* current_coroutine; //PRIVATE, never use directly
-static inline coroutine* this_coroutine(void) {
-	return current_coroutine;
-}
-
-// Private wrappers for context switch and stack creation
-extern void corCxtSw(coroutine* src, coroutine* dst);
-extern void create_stack( coStack_t* this, unsigned int storageSize );
-
-// Suspend implementation inlined for performance
-static inline void suspend() {
-      coroutine* src = this_coroutine();		// optimization
-
-	assertf( src->last != 0,
-		"Attempt to suspend coroutine %.256s (%p) that has never been resumed.\n"
-		"Possible cause is a suspend executed in a member called by a coroutine user rather than by the coroutine main.",
-		src->name, src );
-	assertf( src->last->notHalted,
-		"Attempt by coroutine %.256s (%p) to suspend back to terminated coroutine %.256s (%p).\n"
-		"Possible cause is terminated coroutine's main routine has already returned.",
-		src->name, src, src->last->name, src->last );
-
-	corCxtSw( src, src->last );
-}
-
-// Resume implementation inlined for performance
-forall(dtype T | is_coroutine(T))
-static inline void resume(T* cor) {
-	coroutine* src = this_coroutine();		// optimization
-	coroutine* dst = get_coroutine(cor);
-
-      if( unlikely(!dst->stack.base) ) {
-		create_stack(&dst->stack, dst->stack.size);
-		CtxStart(cor, CtxInvokeCoroutine);
-	}
-
-      // not resuming self ?
-	if ( src != dst ) {
-		assertf( dst->notHalted ,
-			"Attempt by coroutine %.256s (%p) to resume terminated coroutine %.256s (%p).\n"
-			"Possible cause is terminated coroutine's main routine has already returned.",
-			src->name, src, dst->name, dst );
-
-            // set last resumer
-		dst->last = src;
-	} // if
-
-      // always done for performance testing
-	corCxtSw( src, dst );
-}
 
 #endif //THREADS_H
Index: src/libcfa/concurrency/threads.c
===================================================================
--- src/libcfa/concurrency/threads.c	(revision aed3f54d12363aaa409e62e26dd7835d4eea23dc)
+++ src/libcfa/concurrency/threads.c	(revision 6a3d2e7f051b0a830ce5f421976d81987675c0fa)
@@ -1,2 +1,3 @@
+//                              -*- Mode: CFA -*-
 //
 // Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
@@ -5,175 +6,14 @@
 // file "LICENCE" distributed with Cforall.
 //
-// threads.c --
+// threads --
 //
 // Author           : Thierry Delisle
-// Created On       : Mon Nov 28 12:27:26 2016
+// Created On       : Tue Jan 17 12:27:26 2016
 // Last Modified By : Thierry Delisle
-// Last Modified On : Mon Nov 28 12:27:26 2016
+// Last Modified On : --
 // Update Count     : 0
 //
 
-extern "C" {
-#include <stddef.h>
-#include <malloc.h>
-#include <errno.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/mman.h>
-}
 
-#include "threads"
-#include "libhdr.h"
-
-#define __CFA_INVOKE_PRIVATE__
-#include "invoke.h"
-
-//-----------------------------------------------------------------------------
-// Global state variables
-
-// minimum feasible stack size in bytes
-#define MinStackSize 1000
-static size_t pageSize = 0;				// architecture pagesize HACK, should go in proper runtime singleton
-
-//Extra private desctructor for the main
-//FIXME the main should not actually allocate a stack
-//Since the main is never resumed the extra stack does not cause 
-//any problem but it is wasted memory
-void ?{}(coStack_t* this, size_t size);
-void ?{}(coroutine* this, size_t size);
-
-//Main coroutine
-//FIXME do not construct a stack for the main
-coroutine main_coroutine = { 1000 };
-
-//Current coroutine
-//Will need to be in TLS when multi-threading is added
-coroutine* current_coroutine = &main_coroutine;
-
-//-----------------------------------------------------------------------------
-// Coroutine ctors and dtors
-void ?{}(coStack_t* this) {
-	this->size		= 10240;	// size of stack
-	this->storage	= NULL;	// pointer to stack
-	this->limit		= NULL;	// stack grows towards stack limit
-	this->base		= NULL;	// base of stack
-	this->context	= NULL;	// address of cfa_context_t
-	this->top		= NULL;	// address of top of storage
-	this->userStack	= false;	
-}
-
-void ?{}(coStack_t* this, size_t size) {
-	this{};
-	this->size = size;
-
-	create_stack(this, this->size);
-}
-
-void ?{}(coroutine* this) {
-	this->name = "Anonymous Coroutine";
-	this->errno_ = 0;
-	this->state = Start;
-      this->notHalted = true;
-	this->starter = NULL;
-	this->last = NULL;
-}
-
-void ?{}(coroutine* this, size_t size) {
-	this{};
-	(&this->stack){size};
-}
-
-void ^?{}(coStack_t* this) {
-	if ( ! this->userStack ) {
-		LIB_DEBUG_DO(
-			if ( mprotect( this->storage, pageSize, PROT_READ | PROT_WRITE ) == -1 ) {
-				abortf( "(coStack_t *)%p.^?{}() : internal error, mprotect failure, error(%d) %s.", this, errno, strerror( errno ) );
-			}
-		);
-		free( this->storage );
-	}
-}
-
-void ^?{}(coroutine* this) {}
-
-// Part of the Public API
-// Not inline since only ever called once per coroutine
-forall(dtype T | is_coroutine(T))
-void prime(T* cor) {
-	coroutine* this = get_coroutine(cor);
-	assert(this->state == Start);
-
-	this->state = Primed;
-	resume(cor);
-}
-
-// We need to call suspend from invoke.c, so we expose this wrapper that
-// is not inline (We can't inline Cforall in C)
-void suspend_no_inline(void) {
-	suspend();
-}
-
-void corCxtSw(coroutine* src, coroutine* dst) {
-	// THREAD_GETMEM( This )->disableInterrupts();
-
-	// set state of current coroutine to inactive
-	src->state = Inactive;
-
-	// set new coroutine that task is executing
-	current_coroutine = dst;			
-
-	// context switch to specified coroutine
-	CtxSwitch( src->stack.context, dst->stack.context );
-	// when CtxSwitch returns we are back in the src coroutine		
-
-	// set state of new coroutine to active
-	src->state = Active;
-
-	// THREAD_GETMEM( This )->enableInterrupts();
-} //ctxSwitchDirect
-
-void create_stack( coStack_t* this, unsigned int storageSize ) {
-	//TEMP HACK do this on proper kernel startup
-	if(pageSize == 0ul) pageSize = sysconf( _SC_PAGESIZE );
-
-	size_t cxtSize = libCeiling( sizeof(machine_context_t), 8 ); // minimum alignment
-
-	if ( (intptr_t)this->storage == 0 ) {
-		this->userStack = false;
-		this->size = libCeiling( storageSize, 16 );
-		// use malloc/memalign because "new" raises an exception for out-of-memory
-		
-		// assume malloc has 8 byte alignment so add 8 to allow rounding up to 16 byte alignment
-		LIB_DEBUG_DO( this->storage = memalign( pageSize, cxtSize + this->size + pageSize ) );
-		LIB_NO_DEBUG_DO( this->storage = malloc( cxtSize + this->size + 8 ) );
-
-		LIB_DEBUG_DO(
-			if ( mprotect( this->storage, pageSize, PROT_NONE ) == -1 ) {
-				abortf( "(uMachContext &)%p.createContext() : internal error, mprotect failure, error(%d) %s.", this, (int)errno, strerror( (int)errno ) );
-			} // if
-		);
-
-		if ( (intptr_t)this->storage == 0 ) {
-			abortf( "Attempt to allocate %d bytes of storage for coroutine or task execution-state but insufficient memory available.", this->size );
-		} // if
-
-		LIB_DEBUG_DO( this->limit = (char *)this->storage + pageSize );
-		LIB_NO_DEBUG_DO( this->limit = (char *)libCeiling( (unsigned long)this->storage, 16 ) ); // minimum alignment
-
-	} else {
-		assertf( ((size_t)this->storage & (libAlign() - 1)) != 0ul, "Stack storage %p for task/coroutine must be aligned on %d byte boundary.", this->storage, (int)libAlign() );
-		this->userStack = true;
-		this->size = storageSize - cxtSize;
-
-		if ( this->size % 16 != 0u ) this->size -= 8;
-
-		this->limit = (char *)libCeiling( (unsigned long)this->storage, 16 ); // minimum alignment
-	} // if
-	assertf( this->size >= MinStackSize, "Stack size %d provides less than minimum of %d bytes for a stack.", this->size, MinStackSize );
-
-	this->base = (char *)this->limit + this->size;
-	this->context = this->base;
-	this->top = (char *)this->context + cxtSize;
-}
 
 // Local Variables: //
