Index: libcfa/src/concurrency/cofor.cfa
===================================================================
--- libcfa/src/concurrency/cofor.cfa	(revision e4c3819e836abd40e60fd4abf148f74913f1b85b)
+++ libcfa/src/concurrency/cofor.cfa	(revision e4c3819e836abd40e60fd4abf148f74913f1b85b)
@@ -0,0 +1,69 @@
+#include <cofor.hfa>
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// cofor ( uC++ COFOR )
+
+thread co_runner {
+	ssize_t low, high;
+	__cofor_body_t loop_body;
+};
+
+static void ?{}( co_runner & this, ssize_t low, ssize_t high, __cofor_body_t loop_body ) {
+	this.low = low;
+	this.high = high;
+	this.loop_body = loop_body;
+}
+
+void main( co_runner & this ) with( this ) {
+	for ( ssize_t i = low; i < high; i++ )
+		loop_body(i);
+}
+
+void cofor( ssize_t low, ssize_t high, __cofor_body_t loop_body ) libcfa_public {
+	ssize_t range = high - low;
+  if ( range <= 0 ) return;
+	ssize_t nprocs = get_proc_count( *active_cluster() );
+  if ( nprocs == 0 ) return;
+	ssize_t threads = range < nprocs ? range : nprocs;
+	ssize_t stride = range / threads + 1, extras = range % threads;
+	ssize_t i = 0;
+	ssize_t stride_iter = low;
+	co_runner * runners[ threads ];
+	for ( i; threads ) {
+		runners[i] = alloc();
+	}
+	for ( i = 0; i < extras; i += 1, stride_iter += stride ) {
+		(*runners[i]){ stride_iter, stride_iter + stride, loop_body };
+	}
+	stride -= 1;
+	for ( ; i < threads; i += 1, stride_iter += stride ) {
+		(*runners[i]){ stride_iter, stride_iter + stride, loop_body };
+	}
+	for ( i; threads ) {
+		delete( runners[i] );
+	}
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// parallel (COBEGIN/COEND)
+
+thread para_runner {
+	parallel_stmt_t body;
+	void * arg;
+};
+
+static void ?{}( para_runner & this, parallel_stmt_t body, void * arg ) { 
+	this.body = body;
+	this.arg = arg;
+}
+
+void main( para_runner & this ) with( this ) { body( arg ); }
+
+void parallel( parallel_stmt_t * stmts, void ** args, size_t num ) libcfa_public {
+	para_runner * runners[ num ];
+	for ( i; num )
+		(*(runners[i] = malloc())){ stmts[i], args[i] };
+	for ( i; num )
+		delete( runners[i] );
+}
+
Index: libcfa/src/concurrency/cofor.hfa
===================================================================
--- libcfa/src/concurrency/cofor.hfa	(revision 334e0cf23899bfd4df7f5e70210364de3265d541)
+++ libcfa/src/concurrency/cofor.hfa	(revision e4c3819e836abd40e60fd4abf148f74913f1b85b)
@@ -3,75 +3,19 @@
 //////////////////////////////////////////////////////////////////////////////////////////
 // cofor ( uC++ COFOR )
-typedef void (*cofor_body_t)( long );
-thread co_runner {
-    long lo, hi;
-    cofor_body_t loop_body;
-};
+typedef void (*__cofor_body_t)( ssize_t );
 
-void ?{}( co_runner & this, long lo, long hi, cofor_body_t loop_body ) {
-    this.lo = lo;
-    this.hi = hi;
-    this.loop_body = loop_body;
-}
+void cofor( ssize_t low, ssize_t high, __cofor_body_t loop_body );
 
-void main( co_runner & this ) with( this ) {
-    for ( long i = lo; i < hi; i++ )
-        loop_body(i);
-}
-
-void cofor( long lo, long hi, cofor_body_t loop_body ) {
-    long range = hi - lo;
-    if ( range <= 0 ) return;
-    long nprocs = get_proc_count( *active_cluster() );
-    if ( nprocs == 0 ) return;
-    long threads = range < nprocs ? range : nprocs;
-    long stride = range / threads + 1, extras = range % threads;
-    long i = 0;
-    long stride_iter = lo;
-    co_runner * runners[ threads ];
-    for ( i; threads ) {
-        runners[i] = alloc();
-    }
-    for ( i = 0; i < extras; i += 1, stride_iter += stride ) {
-        (*runners[i]){ stride_iter, stride_iter + stride, loop_body };
-    }
-    stride -= 1;
-    for ( ; i < threads; i += 1, stride_iter += stride ) {
-        (*runners[i]){ stride_iter, stride_iter + stride, loop_body };
-    }
-    for ( i; threads ) {
-        delete( runners[i] );
-    }
-}
-
-#define COFOR( lidname, low, high, body ) \
-    { \
-        void loopBody( long lidname ) { \
-            body \
-        } \
-        cofor( low, high, loopBody ); \
-    }
+#define COFOR( lidname, low, high, loopbody ) \
+	{ \
+		void __CFA_loopLambda__( ssize_t lidname ) { \
+			loopbody \
+		} \
+		cofor( low, high, __CFA_loopLambda__ ); \
+	}
 
 //////////////////////////////////////////////////////////////////////////////////////////
 // parallel (COBEGIN/COEND)
 typedef void (*parallel_stmt_t)( void * );
-thread para_runner {
-    parallel_stmt_t body;
-    void * arg;
-};
 
-void ?{}( para_runner & this, parallel_stmt_t body, void * arg ) { 
-    this.body = body;
-    this.arg = arg;
-}
-
-void main( para_runner & this ) with( this ) { body( arg ); }
-
-void parallel( parallel_stmt_t * stmts, void ** args, size_t num ) {
-    para_runner * runners[ num ];
-    for ( i; num )
-        (*(runners[i] = malloc())){ stmts[i], args[i] };
-    for ( i; num )
-        delete( runners[i] );
-}
-
+void parallel( parallel_stmt_t * stmts, void ** args, size_t num );
