Index: benchmark/readyQ/churn.cfa
===================================================================
--- benchmark/readyQ/churn.cfa	(revision 49a1684c699b9f53c3cd9726cf01927c9d596a44)
+++ benchmark/readyQ/churn.cfa	(revision a647941f5133aff47f81f2b6299eed6538bdbd55)
@@ -44,5 +44,5 @@
 		{ 's', "spots", "Number of spots in the system", spot_cnt }
 	};
-	BENCH_OPT_PARSE("cforall cycle benchmark");
+	BENCH_OPT_PARSE("cforall churn benchmark");
 
 	{
Index: benchmark/readyQ/churn.cpp
===================================================================
--- benchmark/readyQ/churn.cpp	(revision a647941f5133aff47f81f2b6299eed6538bdbd55)
+++ benchmark/readyQ/churn.cpp	(revision a647941f5133aff47f81f2b6299eed6538bdbd55)
@@ -0,0 +1,109 @@
+#include "rq_bench.hpp"
+#include <libfibre/fibre.h>
+
+unsigned spot_cnt = 2;
+FredSemaphore * spots;
+
+struct Churner {
+	unsigned long long count = 0;
+	unsigned long long blocks = 0;
+	bool skip = false;
+	__lehmer64_state_t seed;
+	bench_sem self;
+};
+
+void churner_main( Churner * self ) {
+	fibre_park();
+	for(;;) {
+		unsigned r = __lehmer64( self->seed );
+		FredSemaphore & sem = spots[r % spot_cnt];
+		if(!self->skip) sem.V();
+		self->blocks += sem.P() == SemaphoreWasOpen ? 1 : 0;
+		self->skip = false;
+
+		self->count ++;
+		if( clock_mode && stop) break;
+		if(!clock_mode && self->count >= stop_count) break;
+	}
+
+	__atomic_fetch_add(&threads_left, -1, __ATOMIC_SEQ_CST);
+}
+
+int main(int argc, char * argv[]) {
+	option_t opt[] = {
+		BENCH_OPT,
+		{ 's', "spots", "Number of spots in the system", spot_cnt }
+	};
+	BENCH_OPT_PARSE("libfibre churn benchmark");
+
+	{
+		unsigned long long global_counter = 0;
+		unsigned long long global_blocks  = 0;
+		uint64_t start, end;
+		FibreInit(1, nprocs );
+		{
+			spots = new FredSemaphore[spot_cnt]();
+
+			threads_left = nthreads;
+			Churner * thddata = new Churner[nthreads]();
+			for(unsigned i = 0; i < nthreads; i++ ) {
+				Churner & t = thddata[i];
+				t.skip = i < spot_cnt;
+				t.seed = rand();
+			}
+			Fibre * threads[nthreads];
+			for(unsigned i = 0; i < nthreads; i++) {
+				threads[i] = new Fibre( reinterpret_cast<void (*)(void *)>(churner_main), &thddata[i] );
+			}
+			printf("Starting\n");
+
+			bool is_tty = isatty(STDOUT_FILENO);
+			start = timeHiRes();
+
+			for(unsigned i = 0; i < nthreads; i++ ) {
+				fibre_unpark( threads[i] );
+			}
+			wait<Fibre>(start, is_tty);
+
+			stop = true;
+			end = timeHiRes();
+			printf("\nDone\n");
+
+			for(unsigned i = 0; i < spot_cnt; i++) {
+				for(int j = 0; j < 10000; j++) spots[i].V();
+			}
+
+			printf("All Vd\n");
+
+			for(unsigned i = 0; i < nthreads; i++ ) {
+				fibre_join( threads[i], nullptr );
+				global_counter += thddata[i].count;
+				global_blocks  += thddata[i].blocks;
+			}
+
+			printf("Fibers Joined\n");
+
+			delete[](spots);
+			delete[](thddata);
+		}
+
+		printf("\nDone2\n");
+
+		printf("Duration (ms)        : %'ld\n", to_miliseconds(end - start));
+		printf("Number of processors : %'d\n", nprocs);
+		printf("Number of threads    : %'d\n", nthreads);
+		printf("Number of spots      : %'d\n", spot_cnt);
+		printf("Total Operations(ops): %'15llu\n", global_counter);
+		printf("Total blocks         : %'15llu\n", global_blocks);
+		printf("Ops per second       : %'18.2lf\n", ((double)global_counter) / to_fseconds(end - start));
+		printf("ns per ops           : %'18.2lf\n", ((double)(end - start)) / global_counter);
+		printf("Ops per threads      : %'15llu\n", global_counter / nthreads);
+		printf("Ops per procs        : %'15llu\n", global_counter / nprocs);
+		printf("Ops/sec/procs        : %'18.2lf\n", (((double)global_counter) / nprocs) / to_fseconds(end - start));
+		printf("ns per ops/procs     : %'18.2lf\n", ((double)(end - start)) / (global_counter / nprocs));
+		fflush(stdout);
+	}
+
+	return 0;
+
+}
Index: benchmark/readyQ/churn.go
===================================================================
--- benchmark/readyQ/churn.go	(revision a647941f5133aff47f81f2b6299eed6538bdbd55)
+++ benchmark/readyQ/churn.go	(revision a647941f5133aff47f81f2b6299eed6538bdbd55)
@@ -0,0 +1,98 @@
+package main
+
+import (
+	"context"
+	"flag"
+	"fmt"
+	"math/rand"
+	"sync"
+	"sync/atomic"
+	"time"
+	"golang.org/x/sync/semaphore"
+	"golang.org/x/text/language"
+	"golang.org/x/text/message"
+)
+
+func churner(result chan uint64, start *sync.WaitGroup, skip bool, spots [] * semaphore.Weighted) {
+	ctx := context.TODO()
+	s := rand.NewSource(time.Now().UnixNano())
+	rng := rand.New(s)
+
+	count := uint64(0)
+	start.Wait()
+	for true {
+
+		sem := spots[ rng.Intn(100) % len(spots) ];
+		if !skip { sem.Release(1); };
+		sem.Acquire(ctx,1);
+		skip = false;
+
+		count += 1
+		if  clock_mode && atomic.LoadInt32(&stop) == 1 { break }
+		if !clock_mode && count >= stop_count { break }
+	}
+
+	atomic.AddInt64(&threads_left, -1);
+	result <- count
+}
+
+func main() {
+	var spot_cnt int
+
+	spot_cntOpt := flag.Int("s", 1, "Number of spots in the system")
+
+	bench_init()
+
+	spot_cnt = *spot_cntOpt
+
+	threads_left = int64(nthreads)
+
+	result := make(chan uint64)
+	var wg sync.WaitGroup
+	wg.Add(1)
+
+	spots := make([] * semaphore.Weighted, spot_cnt)
+	for i := range spots {
+		ctx := context.TODO()
+		spots[i] = semaphore.NewWeighted(20000)
+		spots[i].Acquire(ctx, 20000)
+	}
+
+	for i := 0; i < nthreads; i++ {
+		go churner(result, &wg, i < len(spots), spots)
+	}
+	fmt.Printf("Starting\n");
+	atomic.StoreInt32(&stop, 0)
+	start := time.Now()
+	wg.Done();
+	wait(start, true);
+
+	atomic.StoreInt32(&stop, 1)
+	end := time.Now()
+	duration := end.Sub(start)
+
+	fmt.Printf("\nDone\n")
+
+	for i := range spots {
+		spots[i].Release(10000)
+	}
+
+	global_counter := uint64(0)
+	for i := 0; i < nthreads; i++ {
+		global_counter += <- result
+	}
+
+	p := message.NewPrinter(language.English)
+	p.Printf("Duration (ms)        : %d\n", duration.Milliseconds())
+	p.Printf("Number of processors : %d\n", nprocs);
+	p.Printf("Number of threads    : %d\n", nthreads);
+	p.Printf("Number of spots      : %d\n", spot_cnt);
+	p.Printf("Total Operations(ops): %15d\n", global_counter);
+	// p.Printf("Total blocks         : %15d\n", global_blocks);
+	p.Printf("Ops per second       : %18.2f\n", float64(global_counter) / duration.Seconds());
+	p.Printf("ns per ops           : %18.2f\n", float64(duration.Nanoseconds()) / float64(global_counter))
+	p.Printf("Ops per threads      : %15d\n", global_counter / uint64(nthreads))
+	p.Printf("Ops per procs        : %15d\n", global_counter / uint64(nprocs))
+	p.Printf("Ops/sec/procs        : %18.2f\n", (float64(global_counter) / float64(nprocs)) / duration.Seconds())
+	p.Printf("ns per ops/procs     : %18.2f\n", float64(duration.Nanoseconds()) / (float64(global_counter) / float64(nprocs)))
+}
