Index: doc/proposals/concurrency/thePlan.md
===================================================================
--- doc/proposals/concurrency/thePlan.md	(revision 68e603106adbbaed3d4ca9b4424521a35e0d6396)
+++ doc/proposals/concurrency/thePlan.md	(revision 68e603106adbbaed3d4ca9b4424521a35e0d6396)
@@ -0,0 +1,24 @@
+_Phase 1_ : Prototype
+Threads and Processors.
+Main needs to call process
+
+_Phase 2_ : Minimum Viable Product
+Main thread is a cfa thread
+Basic monitors for synchronisation and minimal lock support.
+No internal/external scheduling.
+Synchronisation points in thread destructors.
+
+_Phase 3_ : Kernel features
+Threads features ex: detach
+Internal scheduling
+Clusters
+
+_Phase 4_ : Monitor features
+Multi monitors calls,
+External scheduling
+
+_Phase 5_ : Performance
+Proper scheduler
+...
+
+
Index: src/GenPoly/GenPoly.cc
===================================================================
--- src/GenPoly/GenPoly.cc	(revision 1ab7d3fef582ed4704963c9f8694076bc52aa699)
+++ src/GenPoly/GenPoly.cc	(revision 68e603106adbbaed3d4ca9b4424521a35e0d6396)
@@ -28,5 +28,5 @@
 			for ( std::list< Expression* >::iterator param = params.begin(); param != params.end(); ++param ) {
 				TypeExpr *paramType = dynamic_cast< TypeExpr* >( *param );
-				assert(paramType && "Aggregate parameters should be type expressions");
+				assertf(paramType, "Aggregate parameters should be type expressions");
 				if ( isPolyType( paramType->get_type(), env ) ) return true;
 			}
Index: src/examples/thread.c
===================================================================
--- src/examples/thread.c	(revision 68e603106adbbaed3d4ca9b4424521a35e0d6396)
+++ src/examples/thread.c	(revision 68e603106adbbaed3d4ca9b4424521a35e0d6396)
@@ -0,0 +1,68 @@
+#include <kernel>
+#include <stdlib>
+#include <threads>
+
+// Start coroutine routines
+extern "C" {
+      forall(dtype T | is_coroutine(T))
+      void CtxInvokeCoroutine(T * this);
+
+      forall(dtype T | is_coroutine(T))
+      void CtxStart(T * this, void ( *invoke)(T *));
+
+	forall(dtype T | is_coroutine(T))
+      void CtxInvokeThread(T * this);
+}
+
+struct MyThread {
+	thread_h t;
+	unsigned id;
+	unsigned count;
+};
+
+void ?{}( MyThread * this ) {
+	this->id = 0;
+	this->count = 10;
+}
+
+void ?{}( MyThread * this, unsigned id, unsigned count ) {
+	this->id = id;
+	this->count = count;
+}
+
+void ^?{}( MyThread * this ) {}
+
+void main(MyThread* this) {
+	printf("Main called with %p\n", this);
+	printf("Thread %d : Suspending %d times\n", this->id, this->count);
+
+	for(int i = 0; i < this->count; i++) {
+		printf("Thread %d : Suspend No. %d\n", this->id, i + 1);
+		printf("Back to %p\n", &this->t.c);
+		suspend();
+	}
+}
+
+thread_h* get_thread(MyThread* this) {
+	return &this->t;
+}
+
+coroutine* get_coroutine(MyThread* this) {
+	return &this->t.c;
+}
+
+int main() {
+
+	thread(MyThread) thread1;
+	thread(MyThread) thread2;
+
+	thread2.handle.id = 1;
+
+	printf("\n\nMain is %p\n", this_coroutine());
+
+	kernel_run();
+
+	printf("Kernel terminated correctly\n");
+
+	return 0;
+}
Index: src/libcfa/Makefile.am
===================================================================
--- src/libcfa/Makefile.am	(revision 1ab7d3fef582ed4704963c9f8694076bc52aa699)
+++ src/libcfa/Makefile.am	(revision 68e603106adbbaed3d4ca9b4424521a35e0d6396)
@@ -40,5 +40,5 @@
 CC = ${abs_top_srcdir}/src/driver/cfa
 
-headers = limits stdlib math iostream fstream iterator rational assert containers/vector concurrency/threads
+headers = limits stdlib math iostream fstream iterator rational assert containers/vector concurrency/coroutines concurrency/threads concurrency/kernel
 runtimehdrs = concurrency
 libobjs = ${headers:=.o}
Index: src/libcfa/Makefile.in
===================================================================
--- src/libcfa/Makefile.in	(revision 1ab7d3fef582ed4704963c9f8694076bc52aa699)
+++ src/libcfa/Makefile.in	(revision 68e603106adbbaed3d4ca9b4424521a35e0d6396)
@@ -98,5 +98,7 @@
 	libcfa_d_a-assert.$(OBJEXT) \
 	containers/libcfa_d_a-vector.$(OBJEXT) \
-	concurrency/libcfa_d_a-threads.$(OBJEXT)
+	concurrency/libcfa_d_a-coroutines.$(OBJEXT) \
+	concurrency/libcfa_d_a-threads.$(OBJEXT) \
+	concurrency/libcfa_d_a-kernel.$(OBJEXT)
 am__objects_2 = libcfa_d_a-libcfa-prelude.$(OBJEXT) $(am__objects_1) \
 	concurrency/CtxSwitch-@MACHINE_TYPE@.$(OBJEXT) \
@@ -111,5 +113,7 @@
 	libcfa_a-rational.$(OBJEXT) libcfa_a-assert.$(OBJEXT) \
 	containers/libcfa_a-vector.$(OBJEXT) \
-	concurrency/libcfa_a-threads.$(OBJEXT)
+	concurrency/libcfa_a-coroutines.$(OBJEXT) \
+	concurrency/libcfa_a-threads.$(OBJEXT) \
+	concurrency/libcfa_a-kernel.$(OBJEXT)
 am__objects_4 = libcfa_a-libcfa-prelude.$(OBJEXT) $(am__objects_3) \
 	concurrency/CtxSwitch-@MACHINE_TYPE@.$(OBJEXT) \
@@ -272,5 +276,5 @@
 EXTRA_FLAGS = -g -Wall -Wno-unused-function -I${abs_top_srcdir}/src/libcfa/libhdr -imacros libcfa-prelude.c @CFA_FLAGS@
 AM_CCASFLAGS = @CFA_FLAGS@
-headers = limits stdlib math iostream fstream iterator rational assert containers/vector concurrency/threads
+headers = limits stdlib math iostream fstream iterator rational assert containers/vector concurrency/coroutines concurrency/threads concurrency/kernel
 runtimehdrs = concurrency
 libobjs = ${headers:=.o}
@@ -362,5 +366,10 @@
 	@$(MKDIR_P) concurrency/$(DEPDIR)
 	@: > concurrency/$(DEPDIR)/$(am__dirstamp)
+concurrency/libcfa_d_a-coroutines.$(OBJEXT):  \
+	concurrency/$(am__dirstamp) \
+	concurrency/$(DEPDIR)/$(am__dirstamp)
 concurrency/libcfa_d_a-threads.$(OBJEXT): concurrency/$(am__dirstamp) \
+	concurrency/$(DEPDIR)/$(am__dirstamp)
+concurrency/libcfa_d_a-kernel.$(OBJEXT): concurrency/$(am__dirstamp) \
 	concurrency/$(DEPDIR)/$(am__dirstamp)
 concurrency/CtxSwitch-@MACHINE_TYPE@.$(OBJEXT):  \
@@ -375,5 +384,10 @@
 containers/libcfa_a-vector.$(OBJEXT): containers/$(am__dirstamp) \
 	containers/$(DEPDIR)/$(am__dirstamp)
+concurrency/libcfa_a-coroutines.$(OBJEXT):  \
+	concurrency/$(am__dirstamp) \
+	concurrency/$(DEPDIR)/$(am__dirstamp)
 concurrency/libcfa_a-threads.$(OBJEXT): concurrency/$(am__dirstamp) \
+	concurrency/$(DEPDIR)/$(am__dirstamp)
+concurrency/libcfa_a-kernel.$(OBJEXT): concurrency/$(am__dirstamp) \
 	concurrency/$(DEPDIR)/$(am__dirstamp)
 concurrency/libcfa_a-invoke.$(OBJEXT): concurrency/$(am__dirstamp) \
@@ -387,7 +401,11 @@
 	-rm -f *.$(OBJEXT)
 	-rm -f concurrency/CtxSwitch-@MACHINE_TYPE@.$(OBJEXT)
+	-rm -f concurrency/libcfa_a-coroutines.$(OBJEXT)
 	-rm -f concurrency/libcfa_a-invoke.$(OBJEXT)
+	-rm -f concurrency/libcfa_a-kernel.$(OBJEXT)
 	-rm -f concurrency/libcfa_a-threads.$(OBJEXT)
+	-rm -f concurrency/libcfa_d_a-coroutines.$(OBJEXT)
 	-rm -f concurrency/libcfa_d_a-invoke.$(OBJEXT)
+	-rm -f concurrency/libcfa_d_a-kernel.$(OBJEXT)
 	-rm -f concurrency/libcfa_d_a-threads.$(OBJEXT)
 	-rm -f containers/libcfa_a-vector.$(OBJEXT)
@@ -416,7 +434,11 @@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libcfa_d_a-stdlib.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@concurrency/$(DEPDIR)/CtxSwitch-@MACHINE_TYPE@.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@concurrency/$(DEPDIR)/libcfa_a-coroutines.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@concurrency/$(DEPDIR)/libcfa_a-invoke.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@concurrency/$(DEPDIR)/libcfa_a-kernel.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@concurrency/$(DEPDIR)/libcfa_a-threads.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@concurrency/$(DEPDIR)/libcfa_d_a-coroutines.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@concurrency/$(DEPDIR)/libcfa_d_a-invoke.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@concurrency/$(DEPDIR)/libcfa_d_a-kernel.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@concurrency/$(DEPDIR)/libcfa_d_a-threads.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@containers/$(DEPDIR)/libcfa_a-vector.Po@am__quote@
@@ -588,4 +610,18 @@
 @am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_d_a_CFLAGS) $(CFLAGS) -c -o containers/libcfa_d_a-vector.obj `if test -f 'containers/vector.c'; then $(CYGPATH_W) 'containers/vector.c'; else $(CYGPATH_W) '$(srcdir)/containers/vector.c'; fi`
 
+concurrency/libcfa_d_a-coroutines.o: concurrency/coroutines.c
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_d_a_CFLAGS) $(CFLAGS) -MT concurrency/libcfa_d_a-coroutines.o -MD -MP -MF concurrency/$(DEPDIR)/libcfa_d_a-coroutines.Tpo -c -o concurrency/libcfa_d_a-coroutines.o `test -f 'concurrency/coroutines.c' || echo '$(srcdir)/'`concurrency/coroutines.c
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) concurrency/$(DEPDIR)/libcfa_d_a-coroutines.Tpo concurrency/$(DEPDIR)/libcfa_d_a-coroutines.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='concurrency/coroutines.c' object='concurrency/libcfa_d_a-coroutines.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_d_a_CFLAGS) $(CFLAGS) -c -o concurrency/libcfa_d_a-coroutines.o `test -f 'concurrency/coroutines.c' || echo '$(srcdir)/'`concurrency/coroutines.c
+
+concurrency/libcfa_d_a-coroutines.obj: concurrency/coroutines.c
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_d_a_CFLAGS) $(CFLAGS) -MT concurrency/libcfa_d_a-coroutines.obj -MD -MP -MF concurrency/$(DEPDIR)/libcfa_d_a-coroutines.Tpo -c -o concurrency/libcfa_d_a-coroutines.obj `if test -f 'concurrency/coroutines.c'; then $(CYGPATH_W) 'concurrency/coroutines.c'; else $(CYGPATH_W) '$(srcdir)/concurrency/coroutines.c'; fi`
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) concurrency/$(DEPDIR)/libcfa_d_a-coroutines.Tpo concurrency/$(DEPDIR)/libcfa_d_a-coroutines.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='concurrency/coroutines.c' object='concurrency/libcfa_d_a-coroutines.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_d_a_CFLAGS) $(CFLAGS) -c -o concurrency/libcfa_d_a-coroutines.obj `if test -f 'concurrency/coroutines.c'; then $(CYGPATH_W) 'concurrency/coroutines.c'; else $(CYGPATH_W) '$(srcdir)/concurrency/coroutines.c'; fi`
+
 concurrency/libcfa_d_a-threads.o: concurrency/threads.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_d_a_CFLAGS) $(CFLAGS) -MT concurrency/libcfa_d_a-threads.o -MD -MP -MF concurrency/$(DEPDIR)/libcfa_d_a-threads.Tpo -c -o concurrency/libcfa_d_a-threads.o `test -f 'concurrency/threads.c' || echo '$(srcdir)/'`concurrency/threads.c
@@ -602,4 +638,18 @@
 @am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_d_a_CFLAGS) $(CFLAGS) -c -o concurrency/libcfa_d_a-threads.obj `if test -f 'concurrency/threads.c'; then $(CYGPATH_W) 'concurrency/threads.c'; else $(CYGPATH_W) '$(srcdir)/concurrency/threads.c'; fi`
 
+concurrency/libcfa_d_a-kernel.o: concurrency/kernel.c
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_d_a_CFLAGS) $(CFLAGS) -MT concurrency/libcfa_d_a-kernel.o -MD -MP -MF concurrency/$(DEPDIR)/libcfa_d_a-kernel.Tpo -c -o concurrency/libcfa_d_a-kernel.o `test -f 'concurrency/kernel.c' || echo '$(srcdir)/'`concurrency/kernel.c
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) concurrency/$(DEPDIR)/libcfa_d_a-kernel.Tpo concurrency/$(DEPDIR)/libcfa_d_a-kernel.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='concurrency/kernel.c' object='concurrency/libcfa_d_a-kernel.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_d_a_CFLAGS) $(CFLAGS) -c -o concurrency/libcfa_d_a-kernel.o `test -f 'concurrency/kernel.c' || echo '$(srcdir)/'`concurrency/kernel.c
+
+concurrency/libcfa_d_a-kernel.obj: concurrency/kernel.c
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_d_a_CFLAGS) $(CFLAGS) -MT concurrency/libcfa_d_a-kernel.obj -MD -MP -MF concurrency/$(DEPDIR)/libcfa_d_a-kernel.Tpo -c -o concurrency/libcfa_d_a-kernel.obj `if test -f 'concurrency/kernel.c'; then $(CYGPATH_W) 'concurrency/kernel.c'; else $(CYGPATH_W) '$(srcdir)/concurrency/kernel.c'; fi`
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) concurrency/$(DEPDIR)/libcfa_d_a-kernel.Tpo concurrency/$(DEPDIR)/libcfa_d_a-kernel.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='concurrency/kernel.c' object='concurrency/libcfa_d_a-kernel.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_d_a_CFLAGS) $(CFLAGS) -c -o concurrency/libcfa_d_a-kernel.obj `if test -f 'concurrency/kernel.c'; then $(CYGPATH_W) 'concurrency/kernel.c'; else $(CYGPATH_W) '$(srcdir)/concurrency/kernel.c'; fi`
+
 concurrency/libcfa_d_a-invoke.obj: concurrency/invoke.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_d_a_CFLAGS) $(CFLAGS) -MT concurrency/libcfa_d_a-invoke.obj -MD -MP -MF concurrency/$(DEPDIR)/libcfa_d_a-invoke.Tpo -c -o concurrency/libcfa_d_a-invoke.obj `if test -f 'concurrency/invoke.c'; then $(CYGPATH_W) 'concurrency/invoke.c'; else $(CYGPATH_W) '$(srcdir)/concurrency/invoke.c'; fi`
@@ -742,4 +792,18 @@
 @am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_a_CFLAGS) $(CFLAGS) -c -o containers/libcfa_a-vector.obj `if test -f 'containers/vector.c'; then $(CYGPATH_W) 'containers/vector.c'; else $(CYGPATH_W) '$(srcdir)/containers/vector.c'; fi`
 
+concurrency/libcfa_a-coroutines.o: concurrency/coroutines.c
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_a_CFLAGS) $(CFLAGS) -MT concurrency/libcfa_a-coroutines.o -MD -MP -MF concurrency/$(DEPDIR)/libcfa_a-coroutines.Tpo -c -o concurrency/libcfa_a-coroutines.o `test -f 'concurrency/coroutines.c' || echo '$(srcdir)/'`concurrency/coroutines.c
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) concurrency/$(DEPDIR)/libcfa_a-coroutines.Tpo concurrency/$(DEPDIR)/libcfa_a-coroutines.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='concurrency/coroutines.c' object='concurrency/libcfa_a-coroutines.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_a_CFLAGS) $(CFLAGS) -c -o concurrency/libcfa_a-coroutines.o `test -f 'concurrency/coroutines.c' || echo '$(srcdir)/'`concurrency/coroutines.c
+
+concurrency/libcfa_a-coroutines.obj: concurrency/coroutines.c
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_a_CFLAGS) $(CFLAGS) -MT concurrency/libcfa_a-coroutines.obj -MD -MP -MF concurrency/$(DEPDIR)/libcfa_a-coroutines.Tpo -c -o concurrency/libcfa_a-coroutines.obj `if test -f 'concurrency/coroutines.c'; then $(CYGPATH_W) 'concurrency/coroutines.c'; else $(CYGPATH_W) '$(srcdir)/concurrency/coroutines.c'; fi`
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) concurrency/$(DEPDIR)/libcfa_a-coroutines.Tpo concurrency/$(DEPDIR)/libcfa_a-coroutines.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='concurrency/coroutines.c' object='concurrency/libcfa_a-coroutines.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_a_CFLAGS) $(CFLAGS) -c -o concurrency/libcfa_a-coroutines.obj `if test -f 'concurrency/coroutines.c'; then $(CYGPATH_W) 'concurrency/coroutines.c'; else $(CYGPATH_W) '$(srcdir)/concurrency/coroutines.c'; fi`
+
 concurrency/libcfa_a-threads.o: concurrency/threads.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_a_CFLAGS) $(CFLAGS) -MT concurrency/libcfa_a-threads.o -MD -MP -MF concurrency/$(DEPDIR)/libcfa_a-threads.Tpo -c -o concurrency/libcfa_a-threads.o `test -f 'concurrency/threads.c' || echo '$(srcdir)/'`concurrency/threads.c
@@ -755,4 +819,18 @@
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_a_CFLAGS) $(CFLAGS) -c -o concurrency/libcfa_a-threads.obj `if test -f 'concurrency/threads.c'; then $(CYGPATH_W) 'concurrency/threads.c'; else $(CYGPATH_W) '$(srcdir)/concurrency/threads.c'; fi`
+
+concurrency/libcfa_a-kernel.o: concurrency/kernel.c
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_a_CFLAGS) $(CFLAGS) -MT concurrency/libcfa_a-kernel.o -MD -MP -MF concurrency/$(DEPDIR)/libcfa_a-kernel.Tpo -c -o concurrency/libcfa_a-kernel.o `test -f 'concurrency/kernel.c' || echo '$(srcdir)/'`concurrency/kernel.c
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) concurrency/$(DEPDIR)/libcfa_a-kernel.Tpo concurrency/$(DEPDIR)/libcfa_a-kernel.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='concurrency/kernel.c' object='concurrency/libcfa_a-kernel.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_a_CFLAGS) $(CFLAGS) -c -o concurrency/libcfa_a-kernel.o `test -f 'concurrency/kernel.c' || echo '$(srcdir)/'`concurrency/kernel.c
+
+concurrency/libcfa_a-kernel.obj: concurrency/kernel.c
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_a_CFLAGS) $(CFLAGS) -MT concurrency/libcfa_a-kernel.obj -MD -MP -MF concurrency/$(DEPDIR)/libcfa_a-kernel.Tpo -c -o concurrency/libcfa_a-kernel.obj `if test -f 'concurrency/kernel.c'; then $(CYGPATH_W) 'concurrency/kernel.c'; else $(CYGPATH_W) '$(srcdir)/concurrency/kernel.c'; fi`
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) concurrency/$(DEPDIR)/libcfa_a-kernel.Tpo concurrency/$(DEPDIR)/libcfa_a-kernel.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='concurrency/kernel.c' object='concurrency/libcfa_a-kernel.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_a_CFLAGS) $(CFLAGS) -c -o concurrency/libcfa_a-kernel.obj `if test -f 'concurrency/kernel.c'; then $(CYGPATH_W) 'concurrency/kernel.c'; else $(CYGPATH_W) '$(srcdir)/concurrency/kernel.c'; fi`
 
 concurrency/libcfa_a-invoke.obj: concurrency/invoke.c
Index: src/libcfa/concurrency/coroutines
===================================================================
--- src/libcfa/concurrency/coroutines	(revision 68e603106adbbaed3d4ca9b4424521a35e0d6396)
+++ src/libcfa/concurrency/coroutines	(revision 68e603106adbbaed3d4ca9b4424521a35e0d6396)
@@ -0,0 +1,120 @@
+//                              - *- Mode: CFA - *-
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// coroutines --
+//
+// Author           : Thierry Delisle
+// Created On       : Mon Nov 28 12:27:26 2016
+// Last Modified By : Thierry Delisle
+// Last Modified On : Mon Nov 28 12:27:26 2016
+// Update Count     : 0
+//
+
+#ifndef COROUTINES_H
+#define COROUTINES_H
+
+#include "assert"
+#include "invoke.h"
+
+//-----------------------------------------------------------------------------
+// Coroutine trait
+// Anything that implements this trait can be resumed.
+// Anything that is resumed is a coroutine.
+trait is_coroutine(dtype T) {
+      void main(T * this);
+      coroutine * get_coroutine(T * this);
+};
+
+#define DECL_COROUTINE(X) static inline coroutine* get_coroutine(X* this) { return &this->c; } void main(X* this);
+
+//-----------------------------------------------------------------------------
+// Ctors and dtors
+void ?{}(coStack_t * this);
+void ?{}(coroutine * this);
+void ^?{}(coStack_t * this);
+void ^?{}(coroutine * this);
+
+//-----------------------------------------------------------------------------
+// Public coroutine API
+static inline void suspend();
+
+forall(dtype T | is_coroutine(T))
+static inline void resume(T * cor);
+
+forall(dtype T | is_coroutine(T))
+void prime(T * cor);
+
+//-----------------------------------------------------------------------------
+// PRIVATE exposed because of inline
+
+// Start coroutine routines
+extern "C" {
+      forall(dtype T | is_coroutine(T))
+      void CtxInvokeCoroutine(T * this);
+
+      forall(dtype T | is_coroutine(T))
+      void CtxStart(T * this, void ( *invoke)(T *));
+}
+
+// Get current coroutine
+extern coroutine * current_coroutine; //PRIVATE, never use directly
+static inline coroutine * this_coroutine(void) {
+	return current_coroutine;
+}
+
+// Private wrappers for context switch and stack creation
+extern void corCxtSw(coroutine * src, coroutine * dst);
+extern void create_stack( coStack_t * this, unsigned int storageSize );
+
+// Suspend implementation inlined for performance
+static inline void suspend() {
+      coroutine * src = this_coroutine();		// optimization
+
+	assertf( src->last != 0,
+		"Attempt to suspend coroutine %.256s (%p) that has never been resumed.\n"
+		"Possible cause is a suspend executed in a member called by a coroutine user rather than by the coroutine main.",
+		src->name, src );
+	assertf( src->last->notHalted,
+		"Attempt by coroutine %.256s (%p) to suspend back to terminated coroutine %.256s (%p).\n"
+		"Possible cause is terminated coroutine's main routine has already returned.",
+		src->name, src, src->last->name, src->last );
+
+	corCxtSw( src, src->last );
+}
+
+// Resume implementation inlined for performance
+forall(dtype T | is_coroutine(T))
+static inline void resume(T * cor) {
+	coroutine * src = this_coroutine();		// optimization
+	coroutine * dst = get_coroutine(cor);
+
+      if( unlikely(!dst->stack.base) ) {
+		create_stack(&dst->stack, dst->stack.size);
+		CtxStart(cor, CtxInvokeCoroutine);
+	}
+
+      // not resuming self ?
+	if ( src != dst ) {
+		assertf( dst->notHalted ,
+			"Attempt by coroutine %.256s (%p) to resume terminated coroutine %.256s (%p).\n"
+			"Possible cause is terminated coroutine's main routine has already returned.",
+			src->name, src, dst->name, dst );
+
+            // set last resumer
+		dst->last = src;
+	} // if
+
+      // always done for performance testing
+	corCxtSw( src, dst );
+}
+
+#endif //COROUTINES_H
+
+// Local Variables: //
+// mode: c //
+// tab-width: 4 //
+// End: //
Index: src/libcfa/concurrency/coroutines.c
===================================================================
--- src/libcfa/concurrency/coroutines.c	(revision 68e603106adbbaed3d4ca9b4424521a35e0d6396)
+++ src/libcfa/concurrency/coroutines.c	(revision 68e603106adbbaed3d4ca9b4424521a35e0d6396)
@@ -0,0 +1,184 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// coroutines.c --
+//
+// Author           : Thierry Delisle
+// Created On       : Mon Nov 28 12:27:26 2016
+// Last Modified By : Thierry Delisle
+// Last Modified On : Mon Nov 28 12:27:26 2016
+// Update Count     : 0
+//
+
+extern "C" {
+#include <stddef.h>
+#include <malloc.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+}
+
+#include "coroutines"
+#include "libhdr.h"
+
+#define __CFA_INVOKE_PRIVATE__
+#include "invoke.h"
+
+//-----------------------------------------------------------------------------
+// Global state variables
+
+// minimum feasible stack size in bytes
+#define MinStackSize 1000
+static size_t pageSize = 0;				// architecture pagesize HACK, should go in proper runtime singleton
+
+//Extra private desctructor for the main
+//FIXME the main should not actually allocate a stack
+//Since the main is never resumed the extra stack does not cause 
+//any problem but it is wasted memory
+void ?{}(coStack_t* this, size_t size);
+void ?{}(coroutine* this, size_t size);
+
+//Main coroutine
+//FIXME do not construct a stack for the main
+coroutine main_coroutine = { 1000 };
+
+//Current coroutine
+//Will need to be in TLS when multi-threading is added
+coroutine* current_coroutine = &main_coroutine;
+
+//-----------------------------------------------------------------------------
+// Coroutine ctors and dtors
+void ?{}(coStack_t* this) {
+	this->size		= 10240;	// size of stack
+	this->storage	= NULL;	// pointer to stack
+	this->limit		= NULL;	// stack grows towards stack limit
+	this->base		= NULL;	// base of stack
+	this->context	= NULL;	// address of cfa_context_t
+	this->top		= NULL;	// address of top of storage
+	this->userStack	= false;	
+}
+
+void ?{}(coStack_t* this, size_t size) {
+	this{};
+	this->size = size;
+
+	create_stack(this, this->size);
+}
+
+void ?{}(coroutine* this) {
+	this->name = "Anonymous Coroutine";
+	this->errno_ = 0;
+	this->state = Start;
+      this->notHalted = true;
+	this->starter = NULL;
+	this->last = NULL;
+}
+
+void ?{}(coroutine* this, size_t size) {
+	this{};
+	(&this->stack){size};
+}
+
+void ^?{}(coStack_t* this) {
+	if ( ! this->userStack ) {
+		LIB_DEBUG_DO(
+			if ( mprotect( this->storage, pageSize, PROT_READ | PROT_WRITE ) == -1 ) {
+				abortf( "(coStack_t *)%p.^?{}() : internal error, mprotect failure, error(%d) %s.", this, errno, strerror( errno ) );
+			}
+		);
+		free( this->storage );
+	}
+}
+
+void ^?{}(coroutine* this) {}
+
+// Part of the Public API
+// Not inline since only ever called once per coroutine
+forall(dtype T | is_coroutine(T))
+void prime(T* cor) {
+	coroutine* this = get_coroutine(cor);
+	assert(this->state == Start);
+
+	this->state = Primed;
+	resume(cor);
+}
+
+// We need to call suspend from invoke.c, so we expose this wrapper that
+// is not inline (We can't inline Cforall in C)
+void suspend_no_inline(void) {
+	LIB_DEBUG_PRINTF("Suspending back : to %p from %p\n", this_coroutine(), this_coroutine() ? this_coroutine()->last : (void*)-1);
+
+	suspend();
+}
+
+void corCxtSw(coroutine* src, coroutine* dst) {
+	// THREAD_GETMEM( This )->disableInterrupts();
+
+	// set state of current coroutine to inactive
+	src->state = Inactive;
+
+	// set new coroutine that task is executing
+	current_coroutine = dst;			
+
+	// context switch to specified coroutine
+	CtxSwitch( src->stack.context, dst->stack.context );
+	// when CtxSwitch returns we are back in the src coroutine		
+
+	// set state of new coroutine to active
+	src->state = Active;
+
+	// THREAD_GETMEM( This )->enableInterrupts();
+} //ctxSwitchDirect
+
+void create_stack( coStack_t* this, unsigned int storageSize ) {
+	//TEMP HACK do this on proper kernel startup
+	if(pageSize == 0ul) pageSize = sysconf( _SC_PAGESIZE );
+
+	size_t cxtSize = libCeiling( sizeof(machine_context_t), 8 ); // minimum alignment
+
+	if ( (intptr_t)this->storage == 0 ) {
+		this->userStack = false;
+		this->size = libCeiling( storageSize, 16 );
+		// use malloc/memalign because "new" raises an exception for out-of-memory
+		
+		// assume malloc has 8 byte alignment so add 8 to allow rounding up to 16 byte alignment
+		LIB_DEBUG_DO( this->storage = memalign( pageSize, cxtSize + this->size + pageSize ) );
+		LIB_NO_DEBUG_DO( this->storage = malloc( cxtSize + this->size + 8 ) );
+
+		LIB_DEBUG_DO(
+			if ( mprotect( this->storage, pageSize, PROT_NONE ) == -1 ) {
+				abortf( "(uMachContext &)%p.createContext() : internal error, mprotect failure, error(%d) %s.", this, (int)errno, strerror( (int)errno ) );
+			} // if
+		);
+
+		if ( (intptr_t)this->storage == 0 ) {
+			abortf( "Attempt to allocate %d bytes of storage for coroutine or task execution-state but insufficient memory available.", this->size );
+		} // if
+
+		LIB_DEBUG_DO( this->limit = (char *)this->storage + pageSize );
+		LIB_NO_DEBUG_DO( this->limit = (char *)libCeiling( (unsigned long)this->storage, 16 ) ); // minimum alignment
+
+	} else {
+		assertf( ((size_t)this->storage & (libAlign() - 1)) != 0ul, "Stack storage %p for task/coroutine must be aligned on %d byte boundary.", this->storage, (int)libAlign() );
+		this->userStack = true;
+		this->size = storageSize - cxtSize;
+
+		if ( this->size % 16 != 0u ) this->size -= 8;
+
+		this->limit = (char *)libCeiling( (unsigned long)this->storage, 16 ); // minimum alignment
+	} // if
+	assertf( this->size >= MinStackSize, "Stack size %d provides less than minimum of %d bytes for a stack.", this->size, MinStackSize );
+
+	this->base = (char *)this->limit + this->size;
+	this->context = this->base;
+	this->top = (char *)this->context + cxtSize;
+}
+
+// Local Variables: //
+// mode: c //
+// tab-width: 4 //
+// End: //
Index: src/libcfa/concurrency/invoke.c
===================================================================
--- src/libcfa/concurrency/invoke.c	(revision 1ab7d3fef582ed4704963c9f8694076bc52aa699)
+++ src/libcfa/concurrency/invoke.c	(revision 68e603106adbbaed3d4ca9b4424521a35e0d6396)
@@ -14,4 +14,5 @@
 
 extern void __suspend_no_inline__F___1(void);
+extern void __scheduler_remove__F_P9sthread_h__1(struct thread_h*);
 
 void CtxInvokeCoroutine(
@@ -20,5 +21,5 @@
       void *this
 ) {
-      LIB_DEBUG_PRINTF("Invoke : Received %p (main %p, get_c %p)\n", this, main, get_coroutine);
+      // LIB_DEBUG_PRINTF("Invoke Coroutine : Received %p (main %p, get_c %p)\n", this, main, get_coroutine);
 
       struct coroutine* cor = get_coroutine( this );
@@ -31,4 +32,31 @@
 
       main( this );
+
+      //Final suspend, should never return
+      __suspend_no_inline__F___1();
+      assertf(false, "Resumed dead coroutine");
+}
+
+void CtxInvokeThread(
+      void (*main)(void *), 
+      struct thread_h *(*get_thread)(void *), 
+      void *this
+) {
+      // LIB_DEBUG_PRINTF("Invoke Thread : Received %p (main %p, get_t %p)\n", this, main, get_thread);
+
+      __suspend_no_inline__F___1();
+
+      struct thread_h* thrd = get_thread( this );
+      struct coroutine* cor = &thrd->c;
+      cor->state = Active;
+
+      // LIB_DEBUG_PRINTF("Invoke Thread : invoking main %p (args %p)\n", main, this);
+      main( this );
+
+      __scheduler_remove__F_P9sthread_h__1(thrd);
+
+      //Final suspend, should never return
+      __suspend_no_inline__F___1();
+      assertf(false, "Resumed dead thread");
 }
 
@@ -40,5 +68,5 @@
       void (*invoke)(void *)
 ) {
-      LIB_DEBUG_PRINTF("StartCoroutine : Passing in %p (main %p, get_c %p) to %p\n", this, main, get_coroutine, invoke);
+      // LIB_DEBUG_PRINTF("StartCoroutine : Passing in %p (main %p) to invoke (%p) from start (%p)\n", this, main, invoke, CtxStart);
 
       struct coStack_t* stack = &get_coroutine( this )->stack;
Index: src/libcfa/concurrency/invoke.h
===================================================================
--- src/libcfa/concurrency/invoke.h	(revision 1ab7d3fef582ed4704963c9f8694076bc52aa699)
+++ src/libcfa/concurrency/invoke.h	(revision 68e603106adbbaed3d4ca9b4424521a35e0d6396)
@@ -35,4 +35,8 @@
       };
 
+      struct thread_h {
+            struct coroutine c;
+      };
+
 #endif //_INVOKE_H_
 #else //! defined(__CFA_INVOKE_PRIVATE__)
Index: src/libcfa/concurrency/kernel
===================================================================
--- src/libcfa/concurrency/kernel	(revision 68e603106adbbaed3d4ca9b4424521a35e0d6396)
+++ src/libcfa/concurrency/kernel	(revision 68e603106adbbaed3d4ca9b4424521a35e0d6396)
@@ -0,0 +1,42 @@
+//                              -*- Mode: CFA -*-
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// threads --
+//
+// Author           : Thierry Delisle
+// Created On       : Tue Jan 17 12:27:26 2016
+// Last Modified By : Thierry Delisle
+// Last Modified On : --
+// Update Count     : 0
+//
+
+#ifndef KERNEL_H
+#define KERNEL_H
+
+#include <stdbool.h>
+
+struct processor {
+	struct proc_coroutine * cor;
+	unsigned int thread_index;
+	unsigned int thread_count;
+	struct thread_h * threads[10];
+	bool terminated;
+};
+
+void ?{}(processor * this);
+void ^?{}(processor * this);
+
+void scheduler_add( struct thread_h * thrd );
+void scheduler_remove( struct thread_h * thrd );
+void kernel_run( void );
+
+#endif //KERNEL_H
+
+// Local Variables: //
+// mode: c //
+// tab-width: 4 //
+// End: //
Index: src/libcfa/concurrency/kernel.c
===================================================================
--- src/libcfa/concurrency/kernel.c	(revision 68e603106adbbaed3d4ca9b4424521a35e0d6396)
+++ src/libcfa/concurrency/kernel.c	(revision 68e603106adbbaed3d4ca9b4424521a35e0d6396)
@@ -0,0 +1,175 @@
+//                              -*- Mode: CFA -*-
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// kernel.c --
+//
+// Author           : Thierry Delisle
+// Created On       : Tue Jan 17 12:27:26 2016
+// Last Modified By : Thierry Delisle
+// Last Modified On : --
+// Update Count     : 0
+//
+
+//Header
+#include "kernel"
+
+//C Includes
+#include <stddef.h>
+
+//CFA Includes
+#include "libhdr.h"
+#include "threads"
+
+//Private includes
+#define __CFA_INVOKE_PRIVATE__
+#include "invoke.h"
+
+processor systemProcessorStorage = {};
+processor * systemProcessor = &systemProcessorStorage;
+
+void ?{}(processor * this) {
+	this->cor = NULL;
+	this->thread_index = 0;
+	this->thread_count = 10;
+	this->terminated = false;
+
+	for(int i = 0; i < 10; i++) {
+		this->threads[i] = NULL;
+	}
+
+	LIB_DEBUG_PRINTF("Processor : ctor for core %p (core spots %d)\n", this, this->thread_count);
+}
+
+void ^?{}(processor * this) {
+
+}
+
+//-----------------------------------------------------------------------------
+// Processor coroutine
+struct proc_coroutine {
+	processor * proc;
+	coroutine c;
+};
+
+void ?{}(coroutine * this, processor * proc) {
+	this{};
+}
+
+DECL_COROUTINE(proc_coroutine)
+
+void ?{}(proc_coroutine * this, processor * proc) {
+	(&this->c){proc};
+	this->proc = proc;
+	proc->cor = this;
+}
+
+void ^?{}(proc_coroutine * this) {
+	^(&this->c){};
+}
+
+void CtxInvokeProcessor(processor * proc) {
+	proc_coroutine proc_cor_storage = {proc};
+	resume( &proc_cor_storage );
+}
+
+//-----------------------------------------------------------------------------
+// Processor running routines
+void main(proc_coroutine * cor);
+thread_h * nextThread(processor * this);
+void runThread(processor * this, thread_h * dst);
+void spin(processor * this, unsigned int * spin_count);
+
+void main(proc_coroutine * cor) {
+	processor * this;
+	this = cor->proc;
+
+	thread_h * readyThread = NULL;
+	for( unsigned int spin_count = 0; ! this->terminated; spin_count++ ) {
+		
+		readyThread = nextThread(this);
+
+		if(readyThread) {
+			runThread(this, readyThread);
+			spin_count = 0;
+		} else {
+			spin(this, &spin_count);
+		}		
+	}
+
+	LIB_DEBUG_PRINTF("Kernel : core %p terminated\n", this);
+}
+
+thread_h * nextThread(processor * this) {
+	for(int i = 0; i < this->thread_count; i++) {
+		this->thread_index = (this->thread_index + 1) % this->thread_count;	
+		
+		thread_h * thrd = this->threads[this->thread_index];
+		if(thrd) return thrd;
+	}
+
+	return NULL;
+}
+
+void runThread(processor * this, thread_h * dst) {
+	coroutine * proc_ctx = get_coroutine(this->cor);
+	coroutine * thrd_ctx = get_coroutine(dst);
+	thrd_ctx->last = proc_ctx;
+
+	// context switch to specified coroutine
+	// Which is now the current_coroutine
+	LIB_DEBUG_PRINTF("Kernel : switching to ctx %p (from %p, current %p)\n", thrd_ctx, proc_ctx, current_coroutine);
+	current_coroutine = thrd_ctx;
+	CtxSwitch( proc_ctx->stack.context, thrd_ctx->stack.context );
+	current_coroutine = proc_ctx;
+	LIB_DEBUG_PRINTF("Kernel : returned from ctx %p (to %p, current %p)\n", thrd_ctx, proc_ctx, current_coroutine);
+
+	// when CtxSwitch returns we are back in the processor coroutine
+}
+
+void spin(processor * this, unsigned int * spin_count) {
+	(*spin_count)++;
+}
+
+//-----------------------------------------------------------------------------
+// Kernel runner (Temporary)
+
+void scheduler_add( struct thread_h * thrd ) {
+	LIB_DEBUG_PRINTF("Kernel : scheduling %p on core %p (%d spots)\n", thrd, systemProcessor, systemProcessor->thread_count);
+	for(int i = 0; i < systemProcessor->thread_count; i++) {
+		if(systemProcessor->threads[i] == NULL) {
+			systemProcessor->threads[i] = thrd;
+			return;
+		}
+	}
+	assert(false);
+}
+
+void scheduler_remove( struct thread_h * thrd ) {
+	LIB_DEBUG_PRINTF("Kernel : unscheduling %p from core %p\n", thrd, systemProcessor);
+	for(int i = 0; i < systemProcessor->thread_count; i++) {
+		if(systemProcessor->threads[i] == thrd) {
+			systemProcessor->threads[i] = NULL;
+			break;
+		}
+	}
+	for(int i = 0; i < systemProcessor->thread_count; i++) {
+		if(systemProcessor->threads[i] != NULL) {
+			return;
+		}
+	}
+	LIB_DEBUG_PRINTF("Kernel : terminating core %p\n\n\n", systemProcessor);	
+	systemProcessor->terminated = true;
+}
+
+void kernel_run( void ) {
+	CtxInvokeProcessor(systemProcessor);
+}
+
+// Local Variables: //
+// mode: c //
+// tab-width: 4 //
+// End: //
Index: src/libcfa/concurrency/threads
===================================================================
--- src/libcfa/concurrency/threads	(revision 1ab7d3fef582ed4704963c9f8694076bc52aa699)
+++ src/libcfa/concurrency/threads	(revision 68e603106adbbaed3d4ca9b4424521a35e0d6396)
@@ -9,7 +9,7 @@
 //
 // Author           : Thierry Delisle
-// Created On       : Mon Nov 28 12:27:26 2016
+// Created On       : Tue Jan 17 12:27:26 2016
 // Last Modified By : Thierry Delisle
-// Last Modified On : Mon Nov 28 12:27:26 2016
+// Last Modified On : --
 // Update Count     : 0
 //
@@ -18,6 +18,8 @@
 #define THREADS_H
 
-#include "assert"       //
+#include "assert"
 #include "invoke.h"
+
+#include "coroutines"
 
 //-----------------------------------------------------------------------------
@@ -25,89 +27,44 @@
 // Anything that implements this trait can be resumed.
 // Anything that is resumed is a coroutine.
-trait is_coroutine(dtype T) {
-      void co_main(T* this);
-      coroutine* get_coroutine(T* this);
+trait is_thread(dtype T /*| sized(T)*/) {
+      void main(T* this);
+      thread_h* get_thread(T* this);
+	/*void ?{}(T*);
+	void ^?{}(T*);*/
 };
+
+forall(otype T | is_thread(T) )
+static inline coroutine* get_coroutine(T* this) {
+	return &get_thread(this)->c;
+}
+
+static inline coroutine* get_coroutine(thread_h* this) {
+	return &this->c;
+}
 
 //-----------------------------------------------------------------------------
 // Ctors and dtors
-void ?{}(coStack_t* this);
-void ?{}(coroutine* this);
-void ^?{}(coStack_t* this);
-void ^?{}(coroutine* this);
+void ?{}(thread_h* this);
+void ^?{}(thread_h* this);
 
 //-----------------------------------------------------------------------------
-// Public coroutine API
-static inline void suspend();
+// thread runner
+// Structure that actually start and stop threads
+forall(otype T | is_thread(T) )
+struct thread {
+	T handle;
+};
 
-forall(dtype T | is_coroutine(T))
-static inline void resume(T* cor);
+forall(otype T | is_thread(T) )
+void ?{}( thread(T)* this );
 
-forall(dtype T | is_coroutine(T))
-void prime(T* cor);
+forall(otype T, ttype P | is_thread(T) | { void ?{}(T*, P); } )
+void ?{}( thread(T)* this, P params );
+
+forall(otype T | is_thread(T) )
+void ^?{}( thread(T)* this );
 
 //-----------------------------------------------------------------------------
 // PRIVATE exposed because of inline
-
-// Start coroutine routines
-extern "C" {
-      forall(dtype T | is_coroutine(T))
-      void CtxInvokeCoroutine(T* this);
-
-      forall(dtype T | is_coroutine(T))
-      void CtxStart(T* this, void (*invoke)(T*));
-}
-
-// Get current coroutine
-extern coroutine* current_coroutine; //PRIVATE, never use directly
-static inline coroutine* this_coroutine(void) {
-	return current_coroutine;
-}
-
-// Private wrappers for context switch and stack creation
-extern void corCxtSw(coroutine* src, coroutine* dst);
-extern void create_stack( coStack_t* this, unsigned int storageSize );
-
-// Suspend implementation inlined for performance
-static inline void suspend() {
-      coroutine* src = this_coroutine();		// optimization
-
-	assertf( src->last != 0,
-		"Attempt to suspend coroutine %.256s (%p) that has never been resumed.\n"
-		"Possible cause is a suspend executed in a member called by a coroutine user rather than by the coroutine main.",
-		src->name, src );
-	assertf( src->last->notHalted,
-		"Attempt by coroutine %.256s (%p) to suspend back to terminated coroutine %.256s (%p).\n"
-		"Possible cause is terminated coroutine's main routine has already returned.",
-		src->name, src, src->last->name, src->last );
-
-	corCxtSw( src, src->last );
-}
-
-// Resume implementation inlined for performance
-forall(dtype T | is_coroutine(T))
-static inline void resume(T* cor) {
-	coroutine* src = this_coroutine();		// optimization
-	coroutine* dst = get_coroutine(cor);
-
-      if( unlikely(!dst->stack.base) ) {
-		create_stack(&dst->stack, dst->stack.size);
-		CtxStart(cor, CtxInvokeCoroutine);
-	}
-
-      // not resuming self ?
-	if ( src != dst ) {
-		assertf( dst->notHalted ,
-			"Attempt by coroutine %.256s (%p) to resume terminated coroutine %.256s (%p).\n"
-			"Possible cause is terminated coroutine's main routine has already returned.",
-			src->name, src, dst->name, dst );
-
-            // set last resumer
-		dst->last = src;
-	} // if
-
-      // always done for performance testing
-	corCxtSw( src, dst );
-}
 
 #endif //THREADS_H
Index: src/libcfa/concurrency/threads.c
===================================================================
--- src/libcfa/concurrency/threads.c	(revision 1ab7d3fef582ed4704963c9f8694076bc52aa699)
+++ src/libcfa/concurrency/threads.c	(revision 68e603106adbbaed3d4ca9b4424521a35e0d6396)
@@ -1,2 +1,3 @@
+//                              -*- Mode: CFA -*-
 //
 // Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
@@ -8,20 +9,13 @@
 //
 // Author           : Thierry Delisle
-// Created On       : Mon Nov 28 12:27:26 2016
+// Created On       : Tue Jan 17 12:27:26 2016
 // Last Modified By : Thierry Delisle
-// Last Modified On : Mon Nov 28 12:27:26 2016
+// Last Modified On : --
 // Update Count     : 0
 //
 
-extern "C" {
-#include <stddef.h>
-#include <malloc.h>
-#include <errno.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/mman.h>
-}
+#include "threads"
 
-#include "threads"
+#include "kernel"
 #include "libhdr.h"
 
@@ -29,150 +23,71 @@
 #include "invoke.h"
 
-//-----------------------------------------------------------------------------
-// Global state variables
-
-// minimum feasible stack size in bytes
-#define MinStackSize 1000
-static size_t pageSize = 0;				// architecture pagesize HACK, should go in proper runtime singleton
-
-//Extra private desctructor for the main
-//FIXME the main should not actually allocate a stack
-//Since the main is never resumed the extra stack does not cause 
-//any problem but it is wasted memory
-void ?{}(coStack_t* this, size_t size);
-void ?{}(coroutine* this, size_t size);
-
-//Main coroutine
-//FIXME do not construct a stack for the main
-coroutine main_coroutine = { 1000 };
-
-//Current coroutine
-//Will need to be in TLS when multi-threading is added
-coroutine* current_coroutine = &main_coroutine;
+#include <stdlib>
 
 //-----------------------------------------------------------------------------
-// Coroutine ctors and dtors
-void ?{}(coStack_t* this) {
-	this->size		= 10240;	// size of stack
-	this->storage	= NULL;	// pointer to stack
-	this->limit		= NULL;	// stack grows towards stack limit
-	this->base		= NULL;	// base of stack
-	this->context	= NULL;	// address of cfa_context_t
-	this->top		= NULL;	// address of top of storage
-	this->userStack	= false;	
+// Forward declarations
+forall(otype T | is_thread(T) )
+void start( thread(T)* this );
+
+forall(otype T | is_thread(T) )
+void stop( thread(T)* this );
+
+//-----------------------------------------------------------------------------
+// Thread ctors and dtors
+
+void ?{}(thread_h* this) {
+	(&this->c){};
 }
 
-void ?{}(coStack_t* this, size_t size) {
-	this{};
-	this->size = size;
-
-	create_stack(this, this->size);
+void ^?{}(thread_h* this) {
+	^(&this->c){};
 }
 
-void ?{}(coroutine* this) {
-	this->name = "Anonymous Coroutine";
-	this->errno_ = 0;
-	this->state = Start;
-      this->notHalted = true;
-	this->starter = NULL;
-	this->last = NULL;
+forall(otype T | is_thread(T) )
+void ?{}( thread(T)* this ) {
+	printf("thread() ctor\n");
+	(&this->handle){};
+	start(this);
 }
 
-void ?{}(coroutine* this, size_t size) {
-	this{};
-	(&this->stack){size};
+forall(otype T, ttype P | is_thread(T) | { void ?{}(T*, P); } )
+void ?{}( thread(T)* this, P params ) {
+	(&this->handle){ params };
+	start(this);
 }
 
-void ^?{}(coStack_t* this) {
-	if ( ! this->userStack ) {
-		LIB_DEBUG_DO(
-			if ( mprotect( this->storage, pageSize, PROT_READ | PROT_WRITE ) == -1 ) {
-				abortf( "(coStack_t *)%p.^?{}() : internal error, mprotect failure, error(%d) %s.", this, errno, strerror( errno ) );
-			}
-		);
-		free( this->storage );
-	}
+forall(otype T | is_thread(T) )
+void ^?{}( thread(T)* this ) {
+	stop(this);
+	^(&this->handle){};
 }
 
-void ^?{}(coroutine* this) {}
-
-// Part of the Public API
-// Not inline since only ever called once per coroutine
-forall(dtype T | is_coroutine(T))
-void prime(T* cor) {
-	coroutine* this = get_coroutine(cor);
-	assert(this->state == Start);
-
-	this->state = Primed;
-	resume(cor);
+//-----------------------------------------------------------------------------
+// Starting and stopping threads
+extern "C" {
+      forall(dtype T | is_thread(T))
+      void CtxInvokeThread(T * this);
 }
 
-// We need to call suspend from invoke.c, so we expose this wrapper that
-// is not inline (We can't inline Cforall in C)
-void suspend_no_inline(void) {
-	suspend();
+forall(otype T | is_thread(T))
+void start( thread(T)* this ) {
+	T* handle  = &this->handle;
+	coroutine* thrd_c = get_coroutine(handle);
+	thread_h*  thrd_h = get_thread   (handle);
+	thrd_c->last = this_coroutine();
+	current_coroutine = thrd_c;
+
+	// LIB_DEBUG_PRINTF("Thread start : %p (t %p, c %p)\n", handle, thrd_c, thrd_h);
+
+	create_stack(&thrd_c->stack, thrd_c->stack.size);
+	CtxStart(handle, CtxInvokeThread);
+	CtxSwitch( thrd_c->last->stack.context, thrd_c->stack.context );
+
+	scheduler_add(thrd_h);
 }
 
-void corCxtSw(coroutine* src, coroutine* dst) {
-	// THREAD_GETMEM( This )->disableInterrupts();
+forall(otype T | is_thread(T) )
+void stop( thread(T)* this ) {
 
-	// set state of current coroutine to inactive
-	src->state = Inactive;
-
-	// set new coroutine that task is executing
-	current_coroutine = dst;			
-
-	// context switch to specified coroutine
-	CtxSwitch( src->stack.context, dst->stack.context );
-	// when CtxSwitch returns we are back in the src coroutine		
-
-	// set state of new coroutine to active
-	src->state = Active;
-
-	// THREAD_GETMEM( This )->enableInterrupts();
-} //ctxSwitchDirect
-
-void create_stack( coStack_t* this, unsigned int storageSize ) {
-	//TEMP HACK do this on proper kernel startup
-	if(pageSize == 0ul) pageSize = sysconf( _SC_PAGESIZE );
-
-	size_t cxtSize = libCeiling( sizeof(machine_context_t), 8 ); // minimum alignment
-
-	if ( (intptr_t)this->storage == 0 ) {
-		this->userStack = false;
-		this->size = libCeiling( storageSize, 16 );
-		// use malloc/memalign because "new" raises an exception for out-of-memory
-		
-		// assume malloc has 8 byte alignment so add 8 to allow rounding up to 16 byte alignment
-		LIB_DEBUG_DO( this->storage = memalign( pageSize, cxtSize + this->size + pageSize ) );
-		LIB_NO_DEBUG_DO( this->storage = malloc( cxtSize + this->size + 8 ) );
-
-		LIB_DEBUG_DO(
-			if ( mprotect( this->storage, pageSize, PROT_NONE ) == -1 ) {
-				abortf( "(uMachContext &)%p.createContext() : internal error, mprotect failure, error(%d) %s.", this, (int)errno, strerror( (int)errno ) );
-			} // if
-		);
-
-		if ( (intptr_t)this->storage == 0 ) {
-			abortf( "Attempt to allocate %d bytes of storage for coroutine or task execution-state but insufficient memory available.", this->size );
-		} // if
-
-		LIB_DEBUG_DO( this->limit = (char *)this->storage + pageSize );
-		LIB_NO_DEBUG_DO( this->limit = (char *)libCeiling( (unsigned long)this->storage, 16 ) ); // minimum alignment
-
-	} else {
-		assertf( ((size_t)this->storage & (libAlign() - 1)) != 0ul, "Stack storage %p for task/coroutine must be aligned on %d byte boundary.", this->storage, (int)libAlign() );
-		this->userStack = true;
-		this->size = storageSize - cxtSize;
-
-		if ( this->size % 16 != 0u ) this->size -= 8;
-
-		this->limit = (char *)libCeiling( (unsigned long)this->storage, 16 ); // minimum alignment
-	} // if
-	assertf( this->size >= MinStackSize, "Stack size %d provides less than minimum of %d bytes for a stack.", this->size, MinStackSize );
-
-	this->base = (char *)this->limit + this->size;
-	this->context = this->base;
-	this->top = (char *)this->context + cxtSize;
 }
 
Index: src/tests/coroutine.c
===================================================================
--- src/tests/coroutine.c	(revision 1ab7d3fef582ed4704963c9f8694076bc52aa699)
+++ src/tests/coroutine.c	(revision 68e603106adbbaed3d4ca9b4424521a35e0d6396)
@@ -1,4 +1,4 @@
 #include <fstream>
-#include <threads>
+#include <coroutines>
 
 struct Fibonacci {
@@ -15,5 +15,5 @@
 }
 
-void co_main(Fibonacci* this) {
+void main(Fibonacci* this) {
 #ifdef MORE_DEBUG
       sout | "Starting main of coroutine " | this | endl;
