Index: libcfa/src/concurrency/cofor.cfa
===================================================================
--- libcfa/src/concurrency/cofor.cfa	(revision be3f163306e87aeb232bf497922012355b5762df)
+++ libcfa/src/concurrency/cofor.cfa	(revision 0d41b2e19de49d4b3c93a0edb0c94e41b1fa2dea)
@@ -4,10 +4,10 @@
 // cofor ( uC++ COFOR )
 
-thread cofor_runner {
+thread cofor_task {
 	ssize_t low, high;
 	__cofor_body_t loop_body;
 };
 
-static void ?{}( cofor_runner & this, ssize_t low, ssize_t high, __cofor_body_t loop_body ) {
+static void ?{}( cofor_task & this, ssize_t low, ssize_t high, __cofor_body_t loop_body ) {
 	this.low = low;
 	this.high = high;
@@ -15,5 +15,5 @@
 }
 
-void main( cofor_runner & this ) with( this ) {
+void main( cofor_task & this ) with( this ) {
 	for ( ssize_t i = low; i < high; i++ )
 		loop_body(i);
@@ -29,5 +29,5 @@
 	ssize_t i = 0;
 	ssize_t stride_iter = low;
-	cofor_runner * runners[ threads ];
+	cofor_task * runners[ threads ];
 	for ( i; threads ) {
 		runners[i] = alloc();
Index: libcfa/src/concurrency/cofor.hfa
===================================================================
--- libcfa/src/concurrency/cofor.hfa	(revision be3f163306e87aeb232bf497922012355b5762df)
+++ libcfa/src/concurrency/cofor.hfa	(revision 0d41b2e19de49d4b3c93a0edb0c94e41b1fa2dea)
@@ -1,3 +1,5 @@
 #include <thread.hfa>
+#include <locks.hfa>
+#include <list.hfa>
 
 //////////////////////////////////////////////////////////////////////////////////////////
@@ -14,4 +16,52 @@
 		__Cofor__( low, high, __CFA_loopLambda__ ); \
 	}
+
+struct runner_node {
+    void * value;
+    inline dlink(runner_node);
+};
+P9_EMBEDDED( runner_node, dlink(runner_node) )
+
+thread cofor_runner {
+	go_mutex mutex_lock;              // MX lock
+    dlist( runner_node ) items;
+    void (*func)(void *);
+    volatile bool done;
+};
+
+void ?{}( cofor_runner & this ) { this.done = false; }
+
+void main( cofor_runner & this ) with(this) {
+    while ( !done || !items`isEmpty ) {
+		lock( mutex_lock );
+        runner_node * node = &try_pop_front( items );
+		unlock( mutex_lock );
+        func( node->value );
+        free( node->value );
+        free( node );
+    }
+}
+
+void start_runners( cofor_runner * thds, unsigned nprocs, void (*func)(void *) ) {
+	for ( i; nprocs ) {
+		thds[i].func = func;
+	}
+}
+
+void end_runners( cofor_runner * thds, unsigned nprocs ) {
+	for ( i; nprocs ) {
+		thds[i].done = true;
+	}
+}
+
+void send_work( cofor_runner * thds, unsigned nprocs, unsigned & curr_proc, void * value ) {
+	runner_node * node = malloc();
+	(*node){};
+	node->value = value;
+	lock( thds[curr_proc].mutex_lock );
+	insert_last( thds[curr_proc].items, *node );
+	unlock( thds[curr_proc].mutex_lock );
+	curr_proc = ( curr_proc + 1 ) % nprocs;
+}
 
 //////////////////////////////////////////////////////////////////////////////////////////
@@ -42,3 +92,2 @@
     delete( this.runner );
 }
-
