Index: libcfa/src/Makefile.am
===================================================================
--- libcfa/src/Makefile.am	(revision 0640c3507d67bbce432c379b8c8ce8eef0e2e324)
+++ libcfa/src/Makefile.am	(revision 4a962d89fa077776f051d1c19453ecfdc43f8349)
@@ -114,5 +114,6 @@
 	concurrency/kernel/fwd.hfa \
 	concurrency/mutex_stmt.hfa \
-    concurrency/select.hfa
+    concurrency/select.hfa \
+    concurrency/channel.hfa
 
 inst_thread_headers_src = \
Index: libcfa/src/concurrency/channel.hfa
===================================================================
--- libcfa/src/concurrency/channel.hfa	(revision 4a962d89fa077776f051d1c19453ecfdc43f8349)
+++ libcfa/src/concurrency/channel.hfa	(revision 4a962d89fa077776f051d1c19453ecfdc43f8349)
@@ -0,0 +1,89 @@
+#include <locks.hfa>
+
+struct no_reacq_lock {
+    inline exp_backoff_then_block_lock;
+};
+
+// have to override these by hand to get around plan 9 inheritance bug where resolver can't find the appropriate routine to call
+static inline void   ?{}( no_reacq_lock & this ) { ((exp_backoff_then_block_lock &)this){}; }
+static inline bool   try_lock(no_reacq_lock & this) { return try_lock(((exp_backoff_then_block_lock &)this)); }
+static inline void   lock(no_reacq_lock & this) { lock(((exp_backoff_then_block_lock &)this)); }
+static inline void   unlock(no_reacq_lock & this) { unlock(((exp_backoff_then_block_lock &)this)); }
+static inline void   on_notify(no_reacq_lock & this, struct thread$ * t ) { on_notify(((exp_backoff_then_block_lock &)this), t); }
+static inline size_t on_wait(no_reacq_lock & this) { return on_wait(((exp_backoff_then_block_lock &)this)); }
+// override wakeup so that we don't reacquire the lock if using a condvar
+static inline void   on_wakeup( no_reacq_lock & this, size_t recursion ) {}
+
+forall( T ) {
+struct __attribute__ ((aligned (64))) channel {
+    size_t size;
+    size_t front, back, count;
+    T * buffer;
+    fast_cond_var( no_reacq_lock ) prods, cons;
+    no_reacq_lock mutex_lock;
+};
+
+static inline void ?{}( channel(T) &c, size_t _size ) with(c) {
+    size = _size;
+    front = back = count = 0;
+    buffer = anew( size );
+    prods{};
+    cons{};
+    mutex_lock{};
+}
+
+static inline void ?{}( channel(T) &c ){ ((channel(T) &)c){ 0 }; }
+static inline void ^?{}( channel(T) &c ) with(c) { delete( buffer ); }
+inline size_t get_count( channel(T) & chan ) with(chan) { return count; }
+inline size_t get_size( channel(T) & chan ) with(chan) { return size; }
+
+inline void insert_( channel(T) & chan, T elem ) with(chan) {
+    memcpy((void *)&buffer[back], (void *)&elem, sizeof(T));
+    count += 1;
+    back++;
+    if ( back == size ) back = 0;
+}
+
+
+inline void insert( channel(T) & chan, T elem ) with(chan) {
+    lock( mutex_lock );
+
+    // wait if buffer is full, work will be completed by someone else
+    if ( count == size ) { 
+        wait( prods, mutex_lock, (uintptr_t)&elem );
+        return;
+    } // if
+
+    if ( count == 0 && !empty( prods ) )
+        // do waiting consumer work
+        memcpy((void *)front( prods ), (void *)&elem, sizeof(T)); 
+    else insert_( chan, elem );
+    
+    notify_one( prods );
+    unlock( mutex_lock );
+}
+
+inline T remove( channel(T) & chan ) with(chan) {
+    lock( mutex_lock );
+    T retval;
+
+    // wait if buffer is empty, work will be completed by someone else
+    if (count == 0) { 
+        wait( prods, mutex_lock, (uintptr_t)&retval );
+        return retval;
+    }
+
+    // Remove from buffer
+    memcpy((void *)&retval, (void *)&buffer[front], sizeof(T));
+    count -= 1;
+    front = (front + 1) % size;
+
+    if (count == size - 1 && !empty( prods ) ) 
+        insert_( chan, *((T *)front( prods )) );  // do waiting producer work
+
+    notify_one( prods );
+    unlock( mutex_lock );
+    return retval;
+}
+
+} // forall( T )
Index: tests/concurrent/channels/chan_big.cfa
===================================================================
--- tests/concurrent/channels/chan_big.cfa	(revision 4a962d89fa077776f051d1c19453ecfdc43f8349)
+++ tests/concurrent/channels/chan_big.cfa	(revision 4a962d89fa077776f051d1c19453ecfdc43f8349)
@@ -0,0 +1,223 @@
+#include <locks.hfa>
+#include <fstream.hfa>
+#include <stdio.h>
+#include <string.h>
+#include <channel.hfa>
+#include <thread.hfa>
+#include <time.hfa>
+#include <stats.hfa>
+size_t Processors = 10, Channels = 10, Producers = 40, Consumers = 40, ChannelSize = 128;
+
+owner_lock o;
+
+unsigned long long total_operations = 0;
+
+#define BIG 1
+
+struct bigObject {
+    size_t a;
+    size_t b;
+    size_t c;
+    size_t d;
+    size_t e;
+    size_t f;
+    size_t g;
+    size_t h;
+};
+
+void ?{}( bigObject & this, size_t i ) with(this) { a = i; b = i; c = i; d = i; e = i; f = i; g = i; h = i; }
+void ?{}( bigObject & this ) { this{0}; }
+
+#ifdef BIG
+typedef channel( bigObject ) Channel;
+#else
+typedef channel Channel;
+#endif
+
+Channel * channels;
+
+volatile bool cons_done = false, prod_done = false;
+size_t cons_check = 0, prod_check = 0;
+
+thread Consumer {
+    size_t i;
+};
+static inline void ?{}( Consumer & c, size_t i, cluster & clu ) {
+    ((thread &)c){ clu };
+    c.i = i; 
+}
+void main(Consumer & this) {
+    unsigned long long runs = 0;
+    size_t my_check = 0;
+    for ( ;; ) {
+        if ( cons_done ) break;
+        #ifdef BIG
+        bigObject j = remove( channels[ this.i ] );
+        my_check = my_check ^ (j.a + j.b + j.c + j.d + j.d + j.e + j.f + j.g + j.h);
+        #else
+        size_t j = remove( channels[ this.i ] );
+        my_check = my_check ^ j;
+        #endif
+        
+        if ( !prod_done ) runs++;
+    }
+    lock(o);
+    total_operations += runs;
+    cons_check = cons_check ^ my_check;
+    // sout | "C: " | runs;
+    unlock(o);
+}
+
+thread Producer {
+    size_t i;
+};
+static inline void ?{}( Producer & p, size_t i, cluster & clu ) {
+    ((thread &)p){ clu };
+    p.i = i;
+}
+void main(Producer & this) {
+    unsigned long long runs = 0;
+    size_t my_check = 0;
+    for ( ;; ) {
+        if ( prod_done ) break;
+        #ifdef BIG
+        bigObject j{(size_t)runs};
+        insert( channels[ this.i ], j );
+        my_check = my_check ^ (j.a + j.b + j.c + j.d + j.d + j.e + j.f + j.g + j.h);
+        #else
+        insert( channels[ this.i ], (size_t)runs );
+        my_check = my_check ^ ((size_t)runs);
+        #endif
+        runs++;
+    }
+    lock(o);
+    total_operations += runs;
+    prod_check = prod_check ^ my_check;
+    // sout | "P: " | runs;
+    unlock(o);
+}
+
+
+int main( int argc, char *argv[] ) {
+    switch( argc ) {
+      case 6:
+		if ( strcmp( argv[5], "d" ) != 0 ) {			// default ?
+			if ( atoi( argv[5] ) < 1) goto Usage;
+			ChannelSize = atoi( argv[5] );
+		} // if
+      case 5:
+		if ( strcmp( argv[4], "d" ) != 0 ) {			// default ?
+			if ( atoi( argv[4] ) < 1 ) goto Usage;
+			Channels = atoi( argv[4] );
+		} // if
+      case 4:
+		if ( strcmp( argv[3], "d" ) != 0 ) {			// default ?
+			if ( atoi( argv[3] ) < 1 ) goto Usage;
+			Consumers = atoi( argv[3] );
+		} // if
+      case 3:
+		if ( strcmp( argv[2], "d" ) != 0 ) {			// default ?
+			if ( atoi( argv[2] ) < 1 ) goto Usage;
+			Producers = atoi( argv[2] );
+		} // if
+      case 2:
+		if ( strcmp( argv[1], "d" ) != 0 ) {			// default ?
+			if ( atoi( argv[1] ) < 1 ) goto Usage;
+			Processors = atoi( argv[1] );
+		} // if
+	  case 1:											// use defaults
+		break;
+	  default:
+	  Usage:
+		sout | "Usage: " | argv[0]
+             | " [ processors > 0 | d ]"
+             | " [ ProdsPerChan > 0 | d ]"
+             | " [ ConsPerChan > 0 | d ]"
+             | " [ channels > 0 | d ]";
+		exit( EXIT_FAILURE );
+    }
+
+    size_t Clusters = 1;
+    // create a cluster
+    cluster clus[Clusters];
+    processor * proc[Processors];
+    for ( i; Processors ) {
+        (*(proc[i] = alloc())){clus[i % Clusters]};
+    }
+
+    channels = anew( Channels );
+
+    // sout | "Processors: " | Processors | " ProdsPerChan: " | Producers | " ConsPerChan: " | Consumers | "Channels: " | Channels | " Channel Size: " | ChannelSize;
+    
+    for ( i; Channels ) {
+        channels[i]{ ChannelSize };
+    }
+
+    sout | "start";
+    Consumer * c[Consumers * Channels];
+    Producer * p[Producers * Channels];
+
+    for ( i; Consumers * Channels ) {
+        (*(c[i] = alloc())){ i % Channels, clus[i % Clusters] };
+    }
+
+    for ( i; Producers * Channels ) {
+        (*(p[i] = alloc())){ i % Channels, clus[i % Clusters] };
+    }
+
+    sleep(10`s);
+    prod_done = true;
+
+    for ( i; Producers * Channels ) {
+        delete(p[i]);
+    }
+
+    sout | "prods";
+    cons_done = true;
+    for ( i; Channels ) {
+        // sout | get_count( channels[i] );
+        if ( get_count( channels[i] ) < Consumers ){
+            #ifdef BIG
+            bigObject b{0};
+            #endif
+            for ( j; Consumers ) {
+                #ifdef BIG
+                insert( channels[i], b );
+                #else
+                insert( channels[i], 0 );
+                #endif
+            }
+        }
+    }
+    sout | "cons";
+    for ( i; Consumers * Channels ) {
+        delete(c[i]);
+    }
+
+    sout | "flush";
+    for ( i; Channels ) {
+        for ( ;; ) {
+            if ( get_count( channels[i] ) > 0 ) {
+                #ifdef BIG
+                bigObject j = remove( channels[ i ] );
+                cons_check = cons_check ^ (j.a + j.b + j.c + j.d + j.d + j.e + j.f + j.g + j.h);
+                #else
+                size_t j = remove( channels[ i ] );
+                cons_check = cons_check ^ j;
+                #endif
+            } else break;
+        }
+    }
+
+    adelete( channels );
+    // sout | "total channel ops: " | total_operations;
+    if ( cons_check != prod_check )
+        sout | "CHECKSUM MISMATCH !!!";
+    // print_stats_now( *active_cluster(), CFA_STATS_READY_Q);
+
+    for ( i; Processors ) {
+        delete(proc[i]);
+    }
+    sout | "done";
+    return 0;
+}
