Index: benchmark/readyQ/cycle.cfa
===================================================================
--- benchmark/readyQ/cycle.cfa	(revision b35ab2d445f52b84ed4b7a48c4de785b5b3562b4)
+++ benchmark/readyQ/cycle.cfa	(revision 0b84b153230f10fe8e2d4ca0232b77830e1d7adb)
@@ -4,5 +4,4 @@
 	Partner * partner;
 	unsigned long long count;
-	bool first;
 };
 
@@ -12,12 +11,14 @@
 
 void main( Partner & this ) {
-	thread_loop {
+	this.count = 0;
+	for() {
 		park();
 		unpark( *this.partner );
+		this.count ++;
+		if( clock_mode && stop) break;
+		if(!clock_mode && this.count >= stop_count) break;
 	}
 
 	__atomic_fetch_add(&threads_left, -1, __ATOMIC_SEQ_CST);
-
-	if(this.first) park();
 }
 
@@ -41,5 +42,4 @@
 				unsigned pi = (i + nthreads) % tthreads;
 				threads[i].partner = &threads[pi];
-				threads[i].first = i < nthreads;
 			}
 			printf("Starting\n");
@@ -51,5 +51,5 @@
 				unpark( threads[i] );
 			}
-			wait(start, end, is_tty);
+			wait(start, is_tty);
 
 			stop = true;
Index: benchmark/readyQ/cycle.go
===================================================================
--- benchmark/readyQ/cycle.go	(revision b35ab2d445f52b84ed4b7a48c4de785b5b3562b4)
+++ benchmark/readyQ/cycle.go	(revision 0b84b153230f10fe8e2d4ca0232b77830e1d7adb)
@@ -0,0 +1,137 @@
+package main
+
+import (
+	"bufio"
+	"flag"
+	"fmt"
+	"os"
+	"runtime"
+	"sync/atomic"
+	"time"
+	"golang.org/x/text/language"
+	"golang.org/x/text/message"
+)
+
+var clock_mode bool
+var threads_left int64
+var stop int32
+var duration float64
+var stop_count uint64
+
+func fflush(f *bufio.Writer) {
+	defer f.Flush()
+	f.Write([]byte("\r"))
+}
+
+func wait(start time.Time, is_tty bool) {
+	f := bufio.NewWriter(os.Stdout)
+	tdur := time.Duration(duration)
+	for true {
+		time.Sleep(100 * time.Millisecond)
+		end := time.Now()
+		delta := end.Sub(start)
+		if is_tty {
+			fmt.Printf(" %.1f",delta.Seconds())
+			fflush(f)
+		}
+		if clock_mode && delta >= (tdur * time.Second) {
+			break
+		} else if !clock_mode && atomic.LoadInt64(&threads_left) == 0 {
+			break
+		}
+	}
+}
+
+func partner(result chan uint64, mine chan int, next chan int) {
+	count := uint64(0)
+	for true {
+		<- mine
+		next <- 0
+		count += 1
+		if  clock_mode && atomic.LoadInt32(&stop) == 1 { break }
+		if !clock_mode && count >= stop_count { break }
+	}
+
+	atomic.AddInt64(&threads_left, -1);
+	result <- count
+}
+
+func main() {
+	var nprocs int
+	var nthreads int
+	var ring_size int
+
+	nprocsOpt := flag.Int("p", 1, "The number of processors")
+	nthreadsOpt := flag.Int("t", 1, "The number of threads")
+	ring_sizeOpt := flag.Int("r", 2, "The number of threads per cycles")
+	durationOpt := flag.Float64("d", 0, "Duration of the experiment in seconds")
+	stopOpt := flag.Uint64("i", 0, "Duration of the experiment in iterations")
+
+	flag.Parse()
+
+	nprocs = *nprocsOpt
+	nthreads = *nthreadsOpt
+	ring_size = *ring_sizeOpt
+	duration = *durationOpt
+	stop_count = *stopOpt
+
+	if duration > 0 && stop_count > 0 {
+		panic(fmt.Sprintf("--duration and --iterations cannot be used together\n"))
+	} else if duration > 0 {
+		clock_mode = true
+		stop_count = 0xFFFFFFFFFFFFFFFF
+		fmt.Printf("Running for %f seconds\n", duration)
+	} else if stop_count > 0 {
+		clock_mode = false
+		fmt.Printf("Running for %d iterations\n", stop_count)
+	} else {
+		duration = 5
+		clock_mode = true
+		fmt.Printf("Running for %f seconds\n", duration)
+	}
+
+	runtime.GOMAXPROCS(nprocs)
+	tthreads := nthreads * ring_size
+	threads_left = int64(tthreads)
+
+	result := make(chan uint64)
+	channels := make([]chan int, tthreads)
+	for i := range channels {
+		channels[i] = make(chan int, 1)
+	}
+
+	for i := 0; i < tthreads; i++ {
+		pi := (i + nthreads) % tthreads
+		go partner(result, channels[i], channels[pi])
+	}
+	fmt.Printf("Starting\n");
+
+	atomic.StoreInt32(&stop, 0)
+	start := time.Now()
+	for i := 0; i < nthreads; i++ {
+		channels[i] <- 0
+	}
+	wait(start, true);
+
+	atomic.StoreInt32(&stop, 1)
+	end := time.Now()
+	delta := end.Sub(start)
+
+	fmt.Printf("\nDone\n")
+
+	global_counter := uint64(0)
+	for i := 0; i < tthreads; i++ {
+		global_counter += <- result
+	}
+
+	p := message.NewPrinter(language.English)
+	p.Printf("Took %f ms\n", delta.Seconds())
+	p.Printf("Yields per second   : %18.2f\n", float64(global_counter) / delta.Seconds())
+	p.Printf("ns per yields       : %18.2f\n", float64(delta.Nanoseconds()) / float64(global_counter))
+	p.Printf("Total yields        : %15d\n", global_counter)
+	p.Printf("Yields per threads  : %15d\n", global_counter / uint64(tthreads))
+	p.Printf("Yields per procs    : %15d\n", global_counter / uint64(nprocs))
+	p.Printf("Yields/sec/procs    : %18.2f\n", (float64(global_counter) / float64(nprocs)) / delta.Seconds())
+	p.Printf("ns per yields/procs : %18.2f\n", float64(delta.Nanoseconds()) / (float64(global_counter) / float64(nprocs)))
+
+}
Index: benchmark/readyQ/rq_bench.hfa
===================================================================
--- benchmark/readyQ/rq_bench.hfa	(revision b35ab2d445f52b84ed4b7a48c4de785b5b3562b4)
+++ benchmark/readyQ/rq_bench.hfa	(revision 0b84b153230f10fe8e2d4ca0232b77830e1d7adb)
@@ -17,5 +17,5 @@
 volatile unsigned long long threads_left;
 
-#define thread_loop for(this.count = 0; this.count < stop_count && !stop; this.count++)
+#define thread_loop for(this.count = 0;; this.count++)
 
 #define BENCH_OPT \
@@ -36,8 +36,11 @@
 			clock_mode = true; \
 			stop_count = 0xFFFFFFFFFFFFFFFF; \
+			printf("Running for %lf seconds\n", duration); \
 		} else if(stop_count > 0) { \
 			clock_mode = false; \
+			printf("Running for %lu iterations\n", stop_count); \
 		} else { \
 			duration = 5; clock_mode = true;\
+			printf("Running for %lf seconds\n", duration); \
 		} \
 	}
@@ -67,8 +70,8 @@
 }
 
-void wait(Time & start, Time & end, bool is_tty) {
+void wait(const Time & start, bool is_tty) {
 	for() {
 		sleep(100`ms);
-		end = getTimeNsec();
+		Time end = getTimeNsec();
 		Duration delta = end - start;
 		if(is_tty) {
