Index: benchmark/readyQ/churn.cfa
===================================================================
--- benchmark/readyQ/churn.cfa	(revision 3df86ccdb84e0936b9d0bb87c4e1d4c237999422)
+++ benchmark/readyQ/churn.cfa	(revision 3df86ccdb84e0936b9d0bb87c4e1d4c237999422)
@@ -0,0 +1,101 @@
+#include "rq_bench.hfa"
+
+unsigned spot_cnt = 2;
+bench_sem * volatile * spots;
+
+thread BThrd {
+	unsigned long long count;
+	unsigned long long blocks;
+	bench_sem sem;
+};
+
+void ?{}( BThrd & this ) {
+	((thread&)this){ bench_cluster };
+	this.count  = 0;
+	this.blocks = 0;
+}
+
+void ^?{}( BThrd & mutex this ) {}
+
+void main( BThrd & this ) with( this ) {
+	wait( sem );
+	for() {
+		uint64_t r = thread_rand();
+		bench_sem * next = __atomic_exchange_n(&spots[r % spot_cnt], &sem, __ATOMIC_SEQ_CST);
+		if(next) post( *next );
+		blocks += wait( sem );
+		count ++;
+		if( clock_mode && stop) break;
+		if(!clock_mode && count >= stop_count) break;
+	}
+
+	__atomic_fetch_add(&threads_left, -1, __ATOMIC_SEQ_CST);
+}
+
+
+int main(int argc, char * argv[]) {
+	cfa_option opt[] = {
+		BENCH_OPT,
+		{ 's', "spots", "Number of spots in the system", spot_cnt }
+	};
+	BENCH_OPT_PARSE("cforall cycle benchmark");
+
+	{
+		unsigned long long global_counter = 0;
+		unsigned long long global_blocks  = 0;
+		Time start, end;
+		BenchCluster bc = { nprocs };
+		{
+			spots = aalloc(spot_cnt);
+			for(i; spot_cnt) {
+				spots[i] = 0p;
+			}
+
+			threads_left = nthreads;
+			BThrd * threads[nthreads];
+			for(i; nthreads ) {
+				threads[i] = malloc();
+				(*threads[i]){};
+			}
+			printf("Starting\n");
+
+			bool is_tty = isatty(STDOUT_FILENO);
+			start = timeHiRes();
+
+			for(i; nthreads) {
+				post( threads[i]->sem );
+			}
+			wait(start, is_tty);
+
+			stop = true;
+			end = timeHiRes();
+			printf("\nDone\n");
+
+			for(i; nthreads) {
+				post( threads[i]->sem );
+				BThrd & thrd = join( *threads[i] );
+				global_counter += thrd.count;
+				global_blocks  += thrd.blocks;
+				delete(threads[i]);
+			}
+
+			free(spots);
+		}
+
+		printf("Duration (ms)        : %'lf\n", (end - start)`dms);
+		printf("Number of processors : %'d\n", nprocs);
+		printf("Number of threads    : %'d\n", nthreads);
+		printf("Number of spots      : %'d\n", spot_cnt);
+		printf("Total Operations(ops): %'15llu\n", global_counter);
+		printf("Total blocks         : %'15llu\n", global_blocks);
+		printf("Ops per second       : %'18.2lf\n", ((double)global_counter) / (end - start)`ds);
+		printf("ns per ops           : %'18.2lf\n", (end - start)`dns / global_counter);
+		printf("Ops per threads      : %'15llu\n", global_counter / nthreads);
+		printf("Ops per procs        : %'15llu\n", global_counter / nprocs);
+		printf("Ops/sec/procs        : %'18.2lf\n", (((double)global_counter) / nprocs) / (end - start)`ds);
+		printf("ns per ops/procs     : %'18.2lf\n", (end - start)`dns / (global_counter / nprocs));
+		fflush(stdout);
+	}
+
+	return 0;
+}
