Index: benchmark/readyQ/yield.cfa
===================================================================
--- benchmark/readyQ/yield.cfa	(revision db89655bc5956ac9b93d6b72963a2fda2172d2a6)
+++ benchmark/readyQ/yield.cfa	(revision 9cf2b0f4dc8ecca0d2753969d3132960ce3071c6)
@@ -1,19 +1,3 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <limits.h>
-
-extern "C" {
-	#include <locale.h>
-	#include <getopt.h>
-}
-
-#include <unistd.h>
-
-#include <clock.hfa>
-#include <time.hfa>
-#include <stats.hfa>
-
-#include "../benchcltr.hfa"
+#include "rq_bench.hfa"
 
 extern bool traceHeapOn();
@@ -28,5 +12,5 @@
 void ?{}( Yielder & this ) {
 	this.counter = 0;
-	((thread&)this){ "Yielder Thread", *the_benchmark_cluster };
+	((thread&)this){ "Yielder Thread" };
 }
 
@@ -43,14 +27,8 @@
 
 int main(int argc, char * argv[]) {
-	unsigned num_io = 1;
-	io_context_params params;
-
 	cfa_option opt[] = {
-		BENCH_OPT_CFA
+		BENCH_OPT
 	};
-	int opt_cnt = sizeof(opt) / sizeof(cfa_option);
-
-	char **left;
-	parse_args( argc, argv, opt, opt_cnt, "[OPTIONS]...\ncforall yield benchmark", left );
+	BENCH_OPT_PARSE("cforall yield benchmark");
 
 	{
@@ -58,35 +36,33 @@
 
 		Time start, end;
-		BenchCluster cl = { num_io, params, CFA_STATS_READY_Q };
+		BenchCluster bc = { nprocs };
 		{
-			BenchProc procs[nprocs];
-			{
-				Yielder threads[nthreads];
-				printf("Starting\n");
+			Yielder threads[nthreads];
+			printf("Starting\n");
 
-				bool is_tty = isatty(STDOUT_FILENO);
-				start = timeHiRes();
-				run = true;
+			bool is_tty = isatty(STDOUT_FILENO);
+			start = timeHiRes();
+			run = true;
 
-				for(i; nthreads) {
-					unpark( threads[i] );
-				}
-				wait(duration, start, end, is_tty);
+			for(i; nthreads) {
+				unpark( threads[i] );
+			}
+			wait(start, is_tty);
 
-				run = false;
-				end = timeHiRes();
-				printf("\nDone\n");
-			}
+			run = false;
+			end = timeHiRes();
+			printf("\nDone\n");
 		}
 
-		printf("Duration (ms)       : %'ld\n", (end - start)`dms);
-		printf("Number of processors: %'d\n", nprocs);
-		printf("Number of threads   : %'d\n", nthreads);
-		printf("Total yields        : %'15llu\n", global_counter);
-		printf("Yields per second   : %'18.2lf\n", ((double)global_counter) / (end - start)`s);
-		printf("ns per yields       : %'18.2lf\n", ((double)(end - start)`ns) / global_counter);
-		printf("Yields per procs    : %'15llu\n", global_counter / nprocs);
-		printf("Yields/sec/procs    : %'18.2lf\n", (((double)global_counter) / nprocs) / (end - start)`s);
-		printf("ns per yields/procs : %'18.2lf\n", ((double)(end - start)`ns) / (global_counter / nprocs));
+		printf("Duration (ms)        : %'ld\n", (end - start)`dms);
+		printf("Number of processors : %'d\n", nprocs);
+		printf("Number of threads    : %'d\n", nthreads);
+		printf("Total Operations(ops): %'15llu\n", global_counter);
+		printf("Ops per second       : %'18.2lf\n", ((double)global_counter) / (end - start)`s);
+		printf("ns per ops           : %'18.2lf\n", (end - start)`dns / global_counter);
+		printf("Ops per threads      : %'15llu\n", global_counter / nthreads);
+		printf("Ops per procs        : %'15llu\n", global_counter / nprocs);
+		printf("Ops/sec/procs        : %'18.2lf\n", (((double)global_counter) / nprocs) / (end - start)`s);
+		printf("ns per ops/procs     : %'18.2lf\n", (end - start)`dns / (global_counter / nprocs));
 		fflush(stdout);
 	}
Index: benchmark/readyQ/yield.cpp
===================================================================
--- benchmark/readyQ/yield.cpp	(revision db89655bc5956ac9b93d6b72963a2fda2172d2a6)
+++ benchmark/readyQ/yield.cpp	(revision 9cf2b0f4dc8ecca0d2753969d3132960ce3071c6)
@@ -1,167 +1,67 @@
-#include <cassert>
-#include <cstdlib>
-#include <cstdio>
-#include <cstring>
-#include <climits>
-
-extern "C" {
-	#include <locale.h>
-	#include <getopt.h>
-}
-
-#include <unistd.h>
-
-#include <chrono>
-
-using Clock = std::chrono::high_resolution_clock;
-using duration_t = std::chrono::duration<double>;
-using std::chrono::nanoseconds;
-
-
-template<typename Ratio, typename T>
-T duration_cast(T seconds) {
-	return std::chrono::duration_cast<std::chrono::duration<T, Ratio>>(std::chrono::duration<T>(seconds)).count();
-}
+#include "rq_bench.hpp"
+#include <libfibre/fibre.h>
 
 volatile bool run = false;
 volatile unsigned long long global_counter;
 
-#include "libfibre/fibre.h"
 
-FredBarrier * barrier;
-struct __attribute__((aligned(128))) counter_t {
-	int value = 0;
-};
+void fibre_main() {
+	fibre_park();
+	unsigned long long count = 0;
+	for(;;) {
+		Fibre::forceYield();
+		count++;
+		if( clock_mode && stop) break;
+		if(!clock_mode && count >= stop_count) break;
+	}
 
-void fibre_main( counter_t * counter ) {
-	barrier->wait();
-	// /* paranoid */ assert( true == __atomic_load_n(&run, __ATOMIC_RELAXED) );
-
-	while(__atomic_load_n(&run, __ATOMIC_RELAXED)) {
-		Fibre::forceYield();
-		// fibre_yield();
-		counter->value++;
-	}
-	__atomic_fetch_add(&global_counter, counter->value, __ATOMIC_SEQ_CST);
+	__atomic_fetch_add(&global_counter, count, __ATOMIC_SEQ_CST);
+	__atomic_fetch_add(&threads_left, -1, __ATOMIC_SEQ_CST);
 }
 
 int main(int argc, char * argv[]) {
-	double duration = 5;
-	int nprocs = 1;
-	int nthreads = 1;
-
-	std::cout.imbue(std::locale(""));
-	setlocale(LC_ALL, "");
-
-	for(;;) {
-		static struct option options[] = {
-			{"duration",  required_argument, 0, 'd'},
-			{"nprocs",    required_argument, 0, 'p'},
-			{"nthreads",  required_argument, 0, 't'},
-			{0, 0, 0, 0}
-		};
-
-		int idx = 0;
-		int opt = getopt_long(argc, argv, "d:p:t:", options, &idx);
-
-		const char * arg = optarg ? optarg : "";
-		char * end;
-		switch(opt) {
-			case -1:
-				goto run;
-			// Numeric Arguments
-			case 'd':
-				duration = strtod(arg, &end);
-				if(*end != '\0') {
-					fprintf(stderr, "Duration must be a valid double, was %s\n", arg);
-					goto usage;
-				}
-				break;
-			case 't':
-				nthreads = strtoul(arg, &end, 10);
-				if(*end != '\0' || nthreads < 1) {
-					fprintf(stderr, "Number of threads must be a positive integer, was %s\n", arg);
-					goto usage;
-				}
-				break;
-			case 'p':
-				nprocs = strtoul(arg, &end, 10);
-				if(*end != '\0' || nprocs < 1) {
-					fprintf(stderr, "Number of processors must be a positive integer, was %s\n", arg);
-					goto usage;
-				}
-				break;
-			// Other cases
-			default: /* ? */
-				fprintf( stderr, "Unkown option '%c'\n", opt);
-			usage:
-				fprintf( stderr, "Usage: %s [options]\n", argv[0]);
-				fprintf( stderr, "\n" );
-				fprintf( stderr, "  -d, --duration=DURATION  Duration of the experiment, in seconds\n" );
-				fprintf( stderr, "  -t, --nthreads=NTHREADS  Number of kernel threads\n" );
-				fprintf( stderr, "  -q, --nqueues=NQUEUES    Number of queues per threads\n" );
-				exit(1);
-		}
-	}
-	run:
+	option_t opt[] = {
+		BENCH_OPT
+	};
+	BENCH_OPT_PARSE("libfibre yield benchmark");
 
 	{
 		printf("Running %d threads on %d processors for %lf seconds\n", nthreads, nprocs, duration);
 
-		FibreInit();
-		barrier = new FredBarrier(nthreads + 1);
+		FibreInit(1, nprocs);
+		uint64_t start, end;
 		{
-			Context::CurrCluster().addWorkers(nprocs);
-			{
-				counter_t counters[nthreads];
-				Fibre threads[nthreads];
-				for(int i = 0; i < nthreads; i++) {
-					threads[i].run(fibre_main, &counters[i]);
-				}
-				printf("Starting\n");
-				bool is_tty = isatty(STDOUT_FILENO);
-				auto before = Clock::now();
-				run = true;
+			threads_left = nthreads;
+			Fibre * threads[nthreads];
+			for(unsigned i = 0; i < nthreads; i++) {
+				threads[i] = new Fibre( reinterpret_cast<void (*)(void *)>(fibre_main), nullptr );
+			}
+			printf("Starting\n");
+			bool is_tty = isatty(STDOUT_FILENO);
+			start = timeHiRes();
 
-				barrier->wait();
-				for(;;) {
-					usleep(500'000);
-					auto now = Clock::now();
-					duration_t durr = now - before;
-					if( durr.count() > duration ) {
-						break;
-					}
-					if(is_tty) {
-						std::cout << "\r" << std::setprecision(4) << durr.count();
-						std::cout.flush();
-					}
-				}
+			for(unsigned i = 0; i < nthreads; i++ ) {
+				fibre_unpark( threads[i] );
+			}
+			wait<Fibre>(start, is_tty);
 
-				auto after = Clock::now();
-				duration_t durr = after - before;
-				duration = durr.count();
-				run = false;
-				printf("\nDone\n");
-				for(auto & thread : threads) {
-					thread.join();
-				}
-
-				// for(const auto & counter : counters) {
-				// 	std::cout << counter.value << std::endl;
-				// }
+			stop = true;
+			end = timeHiRes();
+			for(unsigned i = 0; i < nthreads; i++ ) {
+				fibre_join( threads[i], nullptr );
 			}
 		}
 
-		auto dur_nano = duration_cast<std::nano>(duration);
-		auto dur_dms  = duration_cast<std::milli>(duration);
-
-		printf("Duration (ms)       : %'.2lf\n", dur_dms );
-		printf("Total yields        : %'15llu\n", global_counter );
-		printf("Yields per procs    : %'15llu\n", global_counter / nprocs );
-		printf("Yields per second   : %'18.2lf\n", ((double)global_counter) / duration );
-		printf("Yields/sec/procs    : %'18.2lf\n", (((double)global_counter) / nprocs) / duration );
-		printf("ns per yields       : %'18.2lf\n", dur_nano / global_counter );
-		printf("ns per yields/procs : %'18.2lf\n", dur_nano / (global_counter / nprocs) );
-
+		printf("Duration (ms)        : %'ld\n", to_miliseconds(end - start));
+		printf("Number of processors : %'d\n", nprocs);
+		printf("Number of threads    : %'d\n", nthreads);
+		printf("Total Operations(ops): %'15llu\n", global_counter);
+		printf("Ops per second       : %'18.2lf\n", ((double)global_counter) / to_fseconds(end - start));
+		printf("ns per ops           : %'18.2lf\n", ((double)(end - start)) / global_counter);
+		printf("Ops per threads      : %'15llu\n", global_counter / nthreads);
+		printf("Ops per procs        : %'15llu\n", global_counter / nprocs);
+		printf("Ops/sec/procs        : %'18.2lf\n", (((double)global_counter) / nprocs) / to_fseconds(end - start));
+		printf("ns per ops/procs     : %'18.2lf\n", ((double)(end - start)) / (global_counter / nprocs));
+		fflush(stdout);
 	}
 }
Index: benchmark/readyQ/yield.rs
===================================================================
--- benchmark/readyQ/yield.rs	(revision db89655bc5956ac9b93d6b72963a2fda2172d2a6)
+++ benchmark/readyQ/yield.rs	(revision 9cf2b0f4dc8ecca0d2753969d3132960ce3071c6)
@@ -90,13 +90,13 @@
 	});
 
-	println!("Duration (ms)       : {}", (duration.as_millis()).to_formatted_string(&Locale::en));
-	println!("Number of processors: {}", (nprocs).to_formatted_string(&Locale::en));
-	println!("Number of threads   : {}", (nthreads).to_formatted_string(&Locale::en));
-	println!("Total yields        : {:>15}", (global_counter).to_formatted_string(&Locale::en));
-	println!("Yields per second   : {:>15}", (((global_counter as f64) / duration.as_secs() as f64) as u64).to_formatted_string(&Locale::en));
-	println!("ns per yields       : {:>15}", ((duration.as_nanos() as f64 / global_counter as f64) as u64).to_formatted_string(&Locale::en));
-	println!("Yields per threads  : {:>15}", (global_counter / nthreads as u64).to_formatted_string(&Locale::en));
-	println!("Yields per procs    : {:>15}", (global_counter / nprocs as u64).to_formatted_string(&Locale::en));
-	println!("Yields/sec/procs    : {:>15}", ((((global_counter as f64) / nprocs as f64) / duration.as_secs() as f64) as u64).to_formatted_string(&Locale::en));
-	println!("ns per yields/procs : {:>15}", ((duration.as_nanos() as f64 / (global_counter as f64 / nprocs as f64)) as u64).to_formatted_string(&Locale::en));
+	println!("Duration (ms)        : {}", (duration.as_millis()).to_formatted_string(&Locale::en));
+	println!("Number of processors : {}", (nprocs).to_formatted_string(&Locale::en));
+	println!("Number of threads    : {}", (nthreads).to_formatted_string(&Locale::en));
+	println!("Total Operations(ops): {:>15}", (global_counter).to_formatted_string(&Locale::en));
+	println!("Ops per second       : {:>15}", (((global_counter as f64) / duration.as_secs() as f64) as u64).to_formatted_string(&Locale::en));
+	println!("ns per ops           : {:>15}", ((duration.as_nanos() as f64 / global_counter as f64) as u64).to_formatted_string(&Locale::en));
+	println!("Ops per threads      : {:>15}", (global_counter / nthreads as u64).to_formatted_string(&Locale::en));
+	println!("Ops per procs        : {:>15}", (global_counter / nprocs as u64).to_formatted_string(&Locale::en));
+	println!("Ops/sec/procs        : {:>15}", ((((global_counter as f64) / nprocs as f64) / duration.as_secs() as f64) as u64).to_formatted_string(&Locale::en));
+	println!("ns per ops/procs     : {:>15}", ((duration.as_nanos() as f64 / (global_counter as f64 / nprocs as f64)) as u64).to_formatted_string(&Locale::en));
 }
