Index: benchmark/plot.py
===================================================================
--- benchmark/plot.py	(revision 5695645f00ea4722f1b0ce8ce491f494b95c8c55)
+++ benchmark/plot.py	(revision b738974f6500a69cfd3ab3b04e2e29d80bccf818)
@@ -33,5 +33,5 @@
 	"Ops per threads"       : Field('Ops'   , 0, False),
 	"ns per ops/procs"      : Field('ns'    , 0, False),
-	"Number of threads"     : Field('thrd'  , 1, False),
+	"Number of threads"     : Field(''      , 1, False),
 	"Total Operations(ops)" : Field('Ops'   , 0, False),
 	"Ops/sec/procs"         : Field('Ops'   , 0, False),
@@ -40,10 +40,13 @@
 	"Cycle size (# thrds)"  : Field('thrd'  , 1, False),
 	"Duration (ms)"         : Field('ms'    , 0, False),
-	"Target QPS"            : Field('QPS'   , 0, False),
-	"Actual QPS"            : Field('QPS'   , 0, False),
+	"Target QPS"            : Field(''      , 0, False),
+	"Actual QPS"            : Field(''      , 0, False),
+	"Average Read Latency"  : Field('us'    , 0, True),
 	"Median Read Latency"   : Field('us'    , 0, True),
 	"Tail Read Latency"     : Field('us'    , 0, True),
+	"Average Update Latency": Field('us'    , 0, True),
 	"Median Update Latency" : Field('us'    , 0, True),
 	"Tail Update Latency"   : Field('us'    , 0, True),
+	"Update Ratio"          : Field('\%'    , 0, False),
 }
 
@@ -92,5 +95,5 @@
 	print("Making Plots")
 
-	for name, data in series.items():
+	for name, data in sorted(series.items()):
 		_col = next(colors)
 		plt.scatter(data['x'], data['y'], color=_col, label=name, marker='x')
Index: benchmark/process-mutilate.py
===================================================================
--- benchmark/process-mutilate.py	(revision 5695645f00ea4722f1b0ce8ce491f494b95c8c55)
+++ benchmark/process-mutilate.py	(revision b738974f6500a69cfd3ab3b04e2e29d80bccf818)
@@ -14,4 +14,5 @@
 parser = argparse.ArgumentParser(description='Python Script to convert output from mutilate to rmit like output')
 parser.add_argument('--out', nargs='?', type=argparse.FileType('w'), default=sys.stdout)
+parser.add_argument('--var', nargs='?', type=str, default='Target QPS')
 try:
 	options =  parser.parse_args()
@@ -31,4 +32,5 @@
 
 	try:
+		latAvs = fields[6]
 		lat50s = fields[6]
 		lat99s = fields[9]
@@ -37,4 +39,5 @@
 
 	try:
+		latAv = locale.atof(latAvs)
 		lat50 = locale.atof(lat50s)
 		lat99 = locale.atof(lat99s)
@@ -58,8 +61,8 @@
 		try:
 			if   line.startswith("read"):
-				rlat50, rlat99 = precentile(line)
+				rlatAv, rlat50, rlat99 = precentile(line)
 
 			elif line.startswith("update"):
-				ulat50, ulat99 = precentile(line)
+				ulatAv, ulat50, ulat99 = precentile(line)
 
 			elif line.startswith("Total QPS"):
@@ -84,4 +87,5 @@
 
 	try:
+		out['Average Read Latency'] = rlatAv
 		out['Median Read Latency'] = rlat50
 		out['Tail Read Latency'] = rlat99
@@ -90,4 +94,5 @@
 
 	try:
+		out['Average Update Latency'] = ulatAv
 		out['Median Update Latency'] = ulat50
 		out['Tail Update Latency'] = ulat99
@@ -112,5 +117,5 @@
 			continue
 
-		d = { 'Target QPS': int(rate) }
+		d = { options.var : int(rate) }
 
 		w = extract( f, d )
Index: benchmark/readyQ/churn.cfa
===================================================================
--- benchmark/readyQ/churn.cfa	(revision 5695645f00ea4722f1b0ce8ce491f494b95c8c55)
+++ benchmark/readyQ/churn.cfa	(revision b738974f6500a69cfd3ab3b04e2e29d80bccf818)
@@ -44,5 +44,5 @@
 		{ 's', "spots", "Number of spots in the system", spot_cnt }
 	};
-	BENCH_OPT_PARSE("cforall cycle benchmark");
+	BENCH_OPT_PARSE("cforall churn benchmark");
 
 	{
Index: benchmark/readyQ/churn.cpp
===================================================================
--- benchmark/readyQ/churn.cpp	(revision b738974f6500a69cfd3ab3b04e2e29d80bccf818)
+++ benchmark/readyQ/churn.cpp	(revision b738974f6500a69cfd3ab3b04e2e29d80bccf818)
@@ -0,0 +1,105 @@
+#include "rq_bench.hpp"
+#include <libfibre/fibre.h>
+
+unsigned spot_cnt = 2;
+FredSemaphore * spots;
+
+struct Churner {
+	unsigned long long count = 0;
+	unsigned long long blocks = 0;
+	bool skip = false;
+	__lehmer64_state_t seed;
+	bench_sem self;
+};
+
+void churner_main( Churner * self ) {
+	fibre_park();
+	for(;;) {
+		unsigned r = __lehmer64( self->seed );
+		FredSemaphore & sem = spots[r % spot_cnt];
+		if(!self->skip) sem.V();
+		self->blocks += sem.P() == SemaphoreWasOpen ? 1 : 0;
+		self->skip = false;
+
+		self->count ++;
+		if( clock_mode && stop) break;
+		if(!clock_mode && self->count >= stop_count) break;
+	}
+
+	__atomic_fetch_add(&threads_left, -1, __ATOMIC_SEQ_CST);
+}
+
+int main(int argc, char * argv[]) {
+	option_t opt[] = {
+		BENCH_OPT,
+		{ 's', "spots", "Number of spots in the system", spot_cnt }
+	};
+	BENCH_OPT_PARSE("libfibre churn benchmark");
+
+	{
+		unsigned long long global_counter = 0;
+		unsigned long long global_blocks  = 0;
+		uint64_t start, end;
+		FibreInit(1, nprocs );
+		{
+			spots = new FredSemaphore[spot_cnt]();
+
+			threads_left = nthreads;
+			Churner * thddata = new Churner[nthreads]();
+			for(unsigned i = 0; i < nthreads; i++ ) {
+				Churner & t = thddata[i];
+				t.skip = i < spot_cnt;
+				t.seed = rand();
+			}
+			Fibre * threads[nthreads];
+			for(unsigned i = 0; i < nthreads; i++) {
+				threads[i] = new Fibre( reinterpret_cast<void (*)(void *)>(churner_main), &thddata[i] );
+			}
+			printf("Starting\n");
+
+			bool is_tty = isatty(STDOUT_FILENO);
+			start = timeHiRes();
+
+			for(unsigned i = 0; i < nthreads; i++ ) {
+				fibre_unpark( threads[i] );
+			}
+			wait<Fibre>(start, is_tty);
+
+			stop = true;
+			end = timeHiRes();
+			printf("\nDone\n");
+
+			for(unsigned i = 0; i < spot_cnt; i++) {
+				for(int j = 0; j < 10000; j++) spots[i].V();
+			}
+
+			for(unsigned i = 0; i < nthreads; i++ ) {
+				fibre_join( threads[i], nullptr );
+				global_counter += thddata[i].count;
+				global_blocks  += thddata[i].blocks;
+			}
+
+			delete[](spots);
+			delete[](thddata);
+		}
+
+		printf("\nDone2\n");
+
+		printf("Duration (ms)        : %'ld\n", to_miliseconds(end - start));
+		printf("Number of processors : %'d\n", nprocs);
+		printf("Number of threads    : %'d\n", nthreads);
+		printf("Number of spots      : %'d\n", spot_cnt);
+		printf("Total Operations(ops): %'15llu\n", global_counter);
+		printf("Total blocks         : %'15llu\n", global_blocks);
+		printf("Ops per second       : %'18.2lf\n", ((double)global_counter) / to_fseconds(end - start));
+		printf("ns per ops           : %'18.2lf\n", ((double)(end - start)) / global_counter);
+		printf("Ops per threads      : %'15llu\n", global_counter / nthreads);
+		printf("Ops per procs        : %'15llu\n", global_counter / nprocs);
+		printf("Ops/sec/procs        : %'18.2lf\n", (((double)global_counter) / nprocs) / to_fseconds(end - start));
+		printf("ns per ops/procs     : %'18.2lf\n", ((double)(end - start)) / (global_counter / nprocs));
+		fflush(stdout);
+	}
+
+	return 0;
+
+}
Index: benchmark/readyQ/churn.go
===================================================================
--- benchmark/readyQ/churn.go	(revision b738974f6500a69cfd3ab3b04e2e29d80bccf818)
+++ benchmark/readyQ/churn.go	(revision b738974f6500a69cfd3ab3b04e2e29d80bccf818)
@@ -0,0 +1,99 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"math/rand"
+	"runtime"
+	"sync"
+	"sync/atomic"
+	"time"
+	"golang.org/x/text/language"
+	"golang.org/x/text/message"
+)
+
+func churner(result chan uint64, start *sync.WaitGroup, spots [] chan struct {}) {
+	s := rand.NewSource(time.Now().UnixNano())
+	rng := rand.New(s)
+
+	count := uint64(0)
+	start.Wait()
+	for true {
+
+		sem := spots[ rng.Intn(100) % len(spots) ];
+		sem <- (struct {}{})
+		<- sem;
+
+		count += 1
+		if  clock_mode && atomic.LoadInt32(&stop) == 1 { break }
+		if !clock_mode && count >= stop_count { break }
+	}
+
+	atomic.AddInt64(&threads_left, -1);
+	result <- count
+}
+
+func main() {
+	var spot_cnt int
+
+	spot_cntOpt := flag.Int("s", 1, "Number of spots in the system")
+
+	bench_init()
+
+	spot_cnt = *spot_cntOpt
+
+	threads_left = int64(nthreads)
+
+	result := make(chan uint64)
+	var wg sync.WaitGroup
+	wg.Add(1)
+
+	spots := make([] chan struct {}, spot_cnt)
+	for i := range spots {
+		spots[i] = make(chan struct {}, 1)
+	}
+
+	for i := 0; i < nthreads; i++ {
+		go churner(result, &wg, spots)
+	}
+	fmt.Printf("Starting\n");
+	atomic.StoreInt32(&stop, 0)
+	start := time.Now()
+	wg.Done();
+	wait(start, true);
+
+	atomic.StoreInt32(&stop, 1)
+	end := time.Now()
+	duration := end.Sub(start)
+
+	fmt.Printf("\nDone\n")
+
+	for atomic.LoadInt64(&threads_left) != 0 {
+		for i := range spots {
+			select {
+			case spots[i] <- (struct {}{}):
+			default:
+			}
+			runtime.Gosched()
+		}
+	}
+
+	global_counter := uint64(0)
+	for i := 0; i < nthreads; i++ {
+		global_counter += <- result
+	}
+
+	p := message.NewPrinter(language.English)
+	p.Printf("Duration (ms)        : %d\n", duration.Milliseconds())
+	p.Printf("Number of processors : %d\n", nprocs);
+	p.Printf("Number of threads    : %d\n", nthreads);
+	p.Printf("Number of spots      : %d\n", spot_cnt);
+	p.Printf("Total Operations(ops): %15d\n", global_counter);
+	// p.Printf("Total blocks         : %15d\n", global_blocks);
+	p.Printf("Ops per second       : %18.2f\n", float64(global_counter) / duration.Seconds());
+	p.Printf("ns per ops           : %18.2f\n", float64(duration.Nanoseconds()) / float64(global_counter))
+	p.Printf("Ops per threads      : %15d\n", global_counter / uint64(nthreads))
+	p.Printf("Ops per procs        : %15d\n", global_counter / uint64(nprocs))
+	p.Printf("Ops/sec/procs        : %18.2f\n", (float64(global_counter) / float64(nprocs)) / duration.Seconds())
+	p.Printf("ns per ops/procs     : %18.2f\n", float64(duration.Nanoseconds()) / (float64(global_counter) / float64(nprocs)))
+}
Index: benchmark/readyQ/yield.cfa
===================================================================
--- benchmark/readyQ/yield.cfa	(revision 5695645f00ea4722f1b0ce8ce491f494b95c8c55)
+++ benchmark/readyQ/yield.cfa	(revision b738974f6500a69cfd3ab3b04e2e29d80bccf818)
@@ -1,92 +1,68 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <limits.h>
-
-extern "C" {
-	#include <locale.h>
-	#include <getopt.h>
-}
-
-#include <unistd.h>
-
-#include <clock.hfa>
-#include <time.hfa>
-#include <stats.hfa>
-
-#include "../benchcltr.hfa"
-
-extern bool traceHeapOn();
-
-
-volatile bool run = false;
-volatile unsigned long long global_counter;
+#include "rq_bench.hfa"
 
 thread __attribute__((aligned(128))) Yielder {
-	unsigned long long counter;
+	unsigned long long count;
 };
 void ?{}( Yielder & this ) {
-	this.counter = 0;
-	((thread&)this){ "Yielder Thread", *the_benchmark_cluster };
+	((thread&)this){ "Yielder Thread", bench_cluster };
+	this.count = 0;
 }
 
 void main( Yielder & this ) {
 	park();
-	/* paranoid */ assert( true == __atomic_load_n(&run, __ATOMIC_RELAXED) );
+	for() {
+		yield();
+		this.count++;
+		if( clock_mode && stop) break;
+		if(!clock_mode && this.count >= stop_count) break;
+	}
 
-	while(__atomic_load_n(&run, __ATOMIC_RELAXED)) {
-		yield();
-		this.counter++;
-	}
-	__atomic_fetch_add(&global_counter, this.counter, __ATOMIC_SEQ_CST);
+	__atomic_fetch_add(&threads_left, -1, __ATOMIC_SEQ_CST);
 }
 
 int main(int argc, char * argv[]) {
-	unsigned num_io = 1;
-	io_context_params params;
-
 	cfa_option opt[] = {
-		BENCH_OPT_CFA
+		BENCH_OPT
 	};
-	int opt_cnt = sizeof(opt) / sizeof(cfa_option);
-
-	char **left;
-	parse_args( argc, argv, opt, opt_cnt, "[OPTIONS]...\ncforall yield benchmark", left );
+	BENCH_OPT_PARSE("cforall yield benchmark");
 
 	{
-		printf("Running %d threads on %d processors for %f seconds\n", nthreads, nprocs, duration);
+		unsigned long long global_counter = 0;
 
 		Time start, end;
-		BenchCluster cl = { num_io, params, CFA_STATS_READY_Q };
+		BenchCluster bc = { nprocs };
 		{
-			BenchProc procs[nprocs];
-			{
-				Yielder threads[nthreads];
-				printf("Starting\n");
+			threads_left = nthreads;
+			Yielder threads[nthreads];
+			printf("Starting\n");
 
-				bool is_tty = isatty(STDOUT_FILENO);
-				start = timeHiRes();
-				run = true;
+			bool is_tty = isatty(STDOUT_FILENO);
+			start = timeHiRes();
 
-				for(i; nthreads) {
-					unpark( threads[i] );
-				}
-				wait(duration, start, end, is_tty);
+			for(i; nthreads) {
+				unpark( threads[i] );
+			}
+			wait(start, is_tty);
 
-				run = false;
-				end = timeHiRes();
-				printf("\nDone\n");
+			stop = true;
+			end = timeHiRes();
+			printf("\nDone\n");
+
+			for(i; nthreads) {
+				Yielder & y = join( threads[i] );
+				global_counter += y.count;
 			}
 		}
 
-		printf("Duration (ms)       : %'ld\n", (end - start)`dms);
-		printf("Number of processors: %'d\n", nprocs);
-		printf("Number of threads   : %'d\n", nthreads);
-		printf("Total yields        : %'15llu\n", global_counter);
-		printf("Yields per second   : %'18.2lf\n", ((double)global_counter) / (end - start)`s);
-		printf("ns per yields       : %'18.2lf\n", ((double)(end - start)`ns) / global_counter);
-		printf("Yields per procs    : %'15llu\n", global_counter / nprocs);
-		printf("Yields/sec/procs    : %'18.2lf\n", (((double)global_counter) / nprocs) / (end - start)`s);
-		printf("ns per yields/procs : %'18.2lf\n", ((double)(end - start)`ns) / (global_counter / nprocs));
+		printf("Duration (ms)        : %'ld\n", (end - start)`dms);
+		printf("Number of processors : %'d\n", nprocs);
+		printf("Number of threads    : %'d\n", nthreads);
+		printf("Total Operations(ops): %'15llu\n", global_counter);
+		printf("Ops per second       : %'18.2lf\n", ((double)global_counter) / (end - start)`s);
+		printf("ns per ops           : %'18.2lf\n", (end - start)`dns / global_counter);
+		printf("Ops per threads      : %'15llu\n", global_counter / nthreads);
+		printf("Ops per procs        : %'15llu\n", global_counter / nprocs);
+		printf("Ops/sec/procs        : %'18.2lf\n", (((double)global_counter) / nprocs) / (end - start)`s);
+		printf("ns per ops/procs     : %'18.2lf\n", (end - start)`dns / (global_counter / nprocs));
 		fflush(stdout);
 	}
Index: benchmark/readyQ/yield.cpp
===================================================================
--- benchmark/readyQ/yield.cpp	(revision 5695645f00ea4722f1b0ce8ce491f494b95c8c55)
+++ benchmark/readyQ/yield.cpp	(revision b738974f6500a69cfd3ab3b04e2e29d80bccf818)
@@ -1,167 +1,67 @@
-#include <cassert>
-#include <cstdlib>
-#include <cstdio>
-#include <cstring>
-#include <climits>
-
-extern "C" {
-	#include <locale.h>
-	#include <getopt.h>
-}
-
-#include <unistd.h>
-
-#include <chrono>
-
-using Clock = std::chrono::high_resolution_clock;
-using duration_t = std::chrono::duration<double>;
-using std::chrono::nanoseconds;
-
-
-template<typename Ratio, typename T>
-T duration_cast(T seconds) {
-	return std::chrono::duration_cast<std::chrono::duration<T, Ratio>>(std::chrono::duration<T>(seconds)).count();
-}
+#include "rq_bench.hpp"
+#include <libfibre/fibre.h>
 
 volatile bool run = false;
 volatile unsigned long long global_counter;
 
-#include "libfibre/fibre.h"
 
-FredBarrier * barrier;
-struct __attribute__((aligned(128))) counter_t {
-	int value = 0;
-};
+void fibre_main() {
+	fibre_park();
+	unsigned long long count = 0;
+	for(;;) {
+		Fibre::forceYield();
+		count++;
+		if( clock_mode && stop) break;
+		if(!clock_mode && count >= stop_count) break;
+	}
 
-void fibre_main( counter_t * counter ) {
-	barrier->wait();
-	// /* paranoid */ assert( true == __atomic_load_n(&run, __ATOMIC_RELAXED) );
-
-	while(__atomic_load_n(&run, __ATOMIC_RELAXED)) {
-		Fibre::forceYield();
-		// fibre_yield();
-		counter->value++;
-	}
-	__atomic_fetch_add(&global_counter, counter->value, __ATOMIC_SEQ_CST);
+	__atomic_fetch_add(&global_counter, count, __ATOMIC_SEQ_CST);
+	__atomic_fetch_add(&threads_left, -1, __ATOMIC_SEQ_CST);
 }
 
 int main(int argc, char * argv[]) {
-	double duration = 5;
-	int nprocs = 1;
-	int nthreads = 1;
-
-	std::cout.imbue(std::locale(""));
-	setlocale(LC_ALL, "");
-
-	for(;;) {
-		static struct option options[] = {
-			{"duration",  required_argument, 0, 'd'},
-			{"nprocs",    required_argument, 0, 'p'},
-			{"nthreads",  required_argument, 0, 't'},
-			{0, 0, 0, 0}
-		};
-
-		int idx = 0;
-		int opt = getopt_long(argc, argv, "d:p:t:", options, &idx);
-
-		const char * arg = optarg ? optarg : "";
-		char * end;
-		switch(opt) {
-			case -1:
-				goto run;
-			// Numeric Arguments
-			case 'd':
-				duration = strtod(arg, &end);
-				if(*end != '\0') {
-					fprintf(stderr, "Duration must be a valid double, was %s\n", arg);
-					goto usage;
-				}
-				break;
-			case 't':
-				nthreads = strtoul(arg, &end, 10);
-				if(*end != '\0' || nthreads < 1) {
-					fprintf(stderr, "Number of threads must be a positive integer, was %s\n", arg);
-					goto usage;
-				}
-				break;
-			case 'p':
-				nprocs = strtoul(arg, &end, 10);
-				if(*end != '\0' || nprocs < 1) {
-					fprintf(stderr, "Number of processors must be a positive integer, was %s\n", arg);
-					goto usage;
-				}
-				break;
-			// Other cases
-			default: /* ? */
-				fprintf( stderr, "Unkown option '%c'\n", opt);
-			usage:
-				fprintf( stderr, "Usage: %s [options]\n", argv[0]);
-				fprintf( stderr, "\n" );
-				fprintf( stderr, "  -d, --duration=DURATION  Duration of the experiment, in seconds\n" );
-				fprintf( stderr, "  -t, --nthreads=NTHREADS  Number of kernel threads\n" );
-				fprintf( stderr, "  -q, --nqueues=NQUEUES    Number of queues per threads\n" );
-				exit(1);
-		}
-	}
-	run:
+	option_t opt[] = {
+		BENCH_OPT
+	};
+	BENCH_OPT_PARSE("libfibre yield benchmark");
 
 	{
 		printf("Running %d threads on %d processors for %lf seconds\n", nthreads, nprocs, duration);
 
-		FibreInit();
-		barrier = new FredBarrier(nthreads + 1);
+		FibreInit(1, nprocs);
+		uint64_t start, end;
 		{
-			Context::CurrCluster().addWorkers(nprocs);
-			{
-				counter_t counters[nthreads];
-				Fibre threads[nthreads];
-				for(int i = 0; i < nthreads; i++) {
-					threads[i].run(fibre_main, &counters[i]);
-				}
-				printf("Starting\n");
-				bool is_tty = isatty(STDOUT_FILENO);
-				auto before = Clock::now();
-				run = true;
+			threads_left = nthreads;
+			Fibre * threads[nthreads];
+			for(unsigned i = 0; i < nthreads; i++) {
+				threads[i] = new Fibre( reinterpret_cast<void (*)(void *)>(fibre_main), nullptr );
+			}
+			printf("Starting\n");
+			bool is_tty = isatty(STDOUT_FILENO);
+			start = timeHiRes();
 
-				barrier->wait();
-				for(;;) {
-					usleep(500'000);
-					auto now = Clock::now();
-					duration_t durr = now - before;
-					if( durr.count() > duration ) {
-						break;
-					}
-					if(is_tty) {
-						std::cout << "\r" << std::setprecision(4) << durr.count();
-						std::cout.flush();
-					}
-				}
+			for(unsigned i = 0; i < nthreads; i++ ) {
+				fibre_unpark( threads[i] );
+			}
+			wait<Fibre>(start, is_tty);
 
-				auto after = Clock::now();
-				duration_t durr = after - before;
-				duration = durr.count();
-				run = false;
-				printf("\nDone\n");
-				for(auto & thread : threads) {
-					thread.join();
-				}
-
-				// for(const auto & counter : counters) {
-				// 	std::cout << counter.value << std::endl;
-				// }
+			stop = true;
+			end = timeHiRes();
+			for(unsigned i = 0; i < nthreads; i++ ) {
+				fibre_join( threads[i], nullptr );
 			}
 		}
 
-		auto dur_nano = duration_cast<std::nano>(duration);
-		auto dur_dms  = duration_cast<std::milli>(duration);
-
-		printf("Duration (ms)       : %'.2lf\n", dur_dms );
-		printf("Total yields        : %'15llu\n", global_counter );
-		printf("Yields per procs    : %'15llu\n", global_counter / nprocs );
-		printf("Yields per second   : %'18.2lf\n", ((double)global_counter) / duration );
-		printf("Yields/sec/procs    : %'18.2lf\n", (((double)global_counter) / nprocs) / duration );
-		printf("ns per yields       : %'18.2lf\n", dur_nano / global_counter );
-		printf("ns per yields/procs : %'18.2lf\n", dur_nano / (global_counter / nprocs) );
-
+		printf("Duration (ms)        : %'ld\n", to_miliseconds(end - start));
+		printf("Number of processors : %'d\n", nprocs);
+		printf("Number of threads    : %'d\n", nthreads);
+		printf("Total Operations(ops): %'15llu\n", global_counter);
+		printf("Ops per second       : %'18.2lf\n", ((double)global_counter) / to_fseconds(end - start));
+		printf("ns per ops           : %'18.2lf\n", ((double)(end - start)) / global_counter);
+		printf("Ops per threads      : %'15llu\n", global_counter / nthreads);
+		printf("Ops per procs        : %'15llu\n", global_counter / nprocs);
+		printf("Ops/sec/procs        : %'18.2lf\n", (((double)global_counter) / nprocs) / to_fseconds(end - start));
+		printf("ns per ops/procs     : %'18.2lf\n", ((double)(end - start)) / (global_counter / nprocs));
+		fflush(stdout);
 	}
 }
Index: benchmark/readyQ/yield.rs
===================================================================
--- benchmark/readyQ/yield.rs	(revision 5695645f00ea4722f1b0ce8ce491f494b95c8c55)
+++ benchmark/readyQ/yield.rs	(revision b738974f6500a69cfd3ab3b04e2e29d80bccf818)
@@ -90,13 +90,13 @@
 	});
 
-	println!("Duration (ms)       : {}", (duration.as_millis()).to_formatted_string(&Locale::en));
-	println!("Number of processors: {}", (nprocs).to_formatted_string(&Locale::en));
-	println!("Number of threads   : {}", (nthreads).to_formatted_string(&Locale::en));
-	println!("Total yields        : {:>15}", (global_counter).to_formatted_string(&Locale::en));
-	println!("Yields per second   : {:>15}", (((global_counter as f64) / duration.as_secs() as f64) as u64).to_formatted_string(&Locale::en));
-	println!("ns per yields       : {:>15}", ((duration.as_nanos() as f64 / global_counter as f64) as u64).to_formatted_string(&Locale::en));
-	println!("Yields per threads  : {:>15}", (global_counter / nthreads as u64).to_formatted_string(&Locale::en));
-	println!("Yields per procs    : {:>15}", (global_counter / nprocs as u64).to_formatted_string(&Locale::en));
-	println!("Yields/sec/procs    : {:>15}", ((((global_counter as f64) / nprocs as f64) / duration.as_secs() as f64) as u64).to_formatted_string(&Locale::en));
-	println!("ns per yields/procs : {:>15}", ((duration.as_nanos() as f64 / (global_counter as f64 / nprocs as f64)) as u64).to_formatted_string(&Locale::en));
+	println!("Duration (ms)        : {}", (duration.as_millis()).to_formatted_string(&Locale::en));
+	println!("Number of processors : {}", (nprocs).to_formatted_string(&Locale::en));
+	println!("Number of threads    : {}", (nthreads).to_formatted_string(&Locale::en));
+	println!("Total Operations(ops): {:>15}", (global_counter).to_formatted_string(&Locale::en));
+	println!("Ops per second       : {:>15}", (((global_counter as f64) / duration.as_secs() as f64) as u64).to_formatted_string(&Locale::en));
+	println!("ns per ops           : {:>15}", ((duration.as_nanos() as f64 / global_counter as f64) as u64).to_formatted_string(&Locale::en));
+	println!("Ops per threads      : {:>15}", (global_counter / nthreads as u64).to_formatted_string(&Locale::en));
+	println!("Ops per procs        : {:>15}", (global_counter / nprocs as u64).to_formatted_string(&Locale::en));
+	println!("Ops/sec/procs        : {:>15}", ((((global_counter as f64) / nprocs as f64) / duration.as_secs() as f64) as u64).to_formatted_string(&Locale::en));
+	println!("ns per ops/procs     : {:>15}", ((duration.as_nanos() as f64 / (global_counter as f64 / nprocs as f64)) as u64).to_formatted_string(&Locale::en));
 }
Index: benchmark/rmit.py
===================================================================
--- benchmark/rmit.py	(revision 5695645f00ea4722f1b0ce8ce491f494b95c8c55)
+++ benchmark/rmit.py	(revision b738974f6500a69cfd3ab3b04e2e29d80bccf818)
@@ -63,36 +63,90 @@
 	return eval(fmt)
 
+# Evaluate all the options
+# options can be of the for key = val or key = some_math(other_key)
+# produce a list of all the options to replace some_math(other_key) with actual value
 def eval_options(opts):
+	# Find all the options with dependencies
 	dependents = [d for d in opts.values() if type(d) is DependentOpt]
+
+	# we need to find all the straglers
 	processed = []
-	nopts = []
+
+	# extract all the necessary inputs
+	input_keys = {}
 	for d in dependents:
+		# Mark the dependent as seen
 		processed.append(d.key)
-		lists = []
+
+		# process each of the dependencies
 		for dvar in d.vars:
+			# Check that it depends on something that exists
 			if not dvar in opts.keys():
 				print('ERROR: extra pattern option {}:{} uses unknown key {}'.format(d.key,d.value,dvar), file=sys.stderr)
 				sys.exit(1)
 
-			lists.append([(dvar, o) for o in opts[dvar]])
+			# Check that it's not nested
+			if type(dvar) is DependentOpt:
+				print('ERROR: dependent options cannot be nested {}:{} uses key {}'.format(d.key,d.value,dvar), file=sys.stderr)
+				sys.exit(1)
+
+			# Add the values to the input keys
+			if dvar not in input_keys:
+				input_keys[dvar] = opts[dvar]
+			else :
+				if input_keys[dvar] != opts[dvar]:
+					print('INTERNAL ERROR: repeat input do not match {}:{} vs {}'.format(dvar,opts[dvar],input_keys[dvar]), file=sys.stderr)
+					sys.exit(1)
+
+			# Mark the input as seen
 			processed.append(dvar)
 
-		kopt = []
-		for vals in list(itertools.product(*lists)):
-			res = ['-{}'.format(d.key), "{}".format(eval_one(d.value, vals))]
-			for k, v in vals:
-				res.extend(['-{}'.format(k), "{}".format(v)])
-			kopt.append(res)
-		nopts.append(kopt)
-
-
-	for k, vals in opts.items():
-		if k not in processed:
-			kopt = []
-			for v in vals:
-				kopt.append(['-{}'.format(k), "{}".format(v)])
-			nopts.append(kopt)
-
-	return nopts
+	# add in all the straglers they should cause too many problems
+	for k, v in opts.items():
+		if type(v) is DependentOpt:
+			continue
+
+		if k in processed:
+			# consistency check
+			if k not in input_keys:
+				print('INTERNAL ERROR: key \'{}\' marked as processed but not in input_keys'.format(k), file=sys.stderr)
+				sys.exit(1)
+			continue
+
+		# consistency check
+		if k in input_keys:
+			print('INTERNAL ERROR: key \'{}\' in input_keys but not marked as processed'.format(k), file=sys.stderr)
+			sys.exit(1)
+
+		# add the straggler
+		input_keys[k] = v
+
+	# flatten the dict into a list of pairs so it's easier to work with
+	input_list = []
+	for k, v in input_keys.items():
+		input_list.append([(k, o) for o in v])
+
+	# evaluate all the dependents
+	# they are not allowed to produce new values so it's a one-to-one mapping from here
+	evaluated = []
+	for inputs in list(itertools.product(*input_list)):
+		this_eval = list(inputs)
+		for d in dependents:
+			this_eval.append((d.key, eval_one(d.value, inputs)))
+
+		evaluated.append(this_eval)
+
+	# reformat everything to a list of arguments
+	formated = []
+	for o in evaluated:
+		inner = []
+		for k,v in o:
+			inner.append("-{}".format(k))
+			inner.append("{}".format(v))
+
+		# print(inner)
+		formated.append(inner)
+
+	return formated
 
 # returns the first option with key 'opt'
@@ -122,9 +176,9 @@
 	known_hosts = {
 		"jax": {
-			range(  1,  24) : "48-71",
-			range( 25,  48) : "48-71,144-167",
-			range( 49,  96) : "48-95,144-191",
-			range( 97, 144) : "24-95,120-191",
-			range(145, 192) : "0-95,96-191",
+			range(  1,  25) : "48-71",
+			range( 25,  49) : "48-71,144-167",
+			range( 49,  97) : "48-95,144-191",
+			range( 97, 145) : "24-95,120-191",
+			range(145, 193) : "0-95,96-191",
 		},
 	}
@@ -184,6 +238,4 @@
 
 	except:
-		print('ERROR: invalid arguments', file=sys.stderr)
-		parser.print_help(sys.stderr)
 		sys.exit(1)
 
@@ -215,5 +267,5 @@
 	# Figure out all the combinations to run
 	actions = []
-	for p in itertools.product(range(options.trials), commands, *opts):
+	for p in itertools.product(range(options.trials), commands, opts):
 		act = [p[1]]
 		for o in p[2:]:
@@ -281,3 +333,3 @@
 
 	if options.file != sys.stdout:
-		print("Done");                                                                                ")
+		print("Done                                                                                ")
Index: src/ControlStruct/MultiLevelExit.cpp
===================================================================
--- src/ControlStruct/MultiLevelExit.cpp	(revision 5695645f00ea4722f1b0ce8ce491f494b95c8c55)
+++ src/ControlStruct/MultiLevelExit.cpp	(revision b738974f6500a69cfd3ab3b04e2e29d80bccf818)
@@ -594,4 +594,10 @@
 		}
 
+		// check if loop node and if so add else clause if it exists
+		const WhileDoStmt * whilePtr = dynamic_cast<const WhileDoStmt *>(kid.get());
+		if ( whilePtr && whilePtr->else_) ret.push_back(whilePtr->else_);
+		const ForStmt * forPtr = dynamic_cast<const ForStmt *>(kid.get());
+		if ( forPtr && forPtr->else_) ret.push_back(forPtr->else_);
+
 		if ( ! break_label.empty() ) {
 			ret.push_back( labelledNullStmt( ret.back()->location, break_label ) );
