Index: benchmark/readyQ/yield.cfa
===================================================================
--- benchmark/readyQ/yield.cfa	(revision 9cf2b0f4dc8ecca0d2753969d3132960ce3071c6)
+++ benchmark/readyQ/yield.cfa	(revision b738974f6500a69cfd3ab3b04e2e29d80bccf818)
@@ -1,27 +1,22 @@
 #include "rq_bench.hfa"
 
-extern bool traceHeapOn();
-
-
-volatile bool run = false;
-volatile unsigned long long global_counter;
-
 thread __attribute__((aligned(128))) Yielder {
-	unsigned long long counter;
+	unsigned long long count;
 };
 void ?{}( Yielder & this ) {
-	this.counter = 0;
-	((thread&)this){ "Yielder Thread" };
+	((thread&)this){ "Yielder Thread", bench_cluster };
+	this.count = 0;
 }
 
 void main( Yielder & this ) {
 	park();
-	/* paranoid */ assert( true == __atomic_load_n(&run, __ATOMIC_RELAXED) );
+	for() {
+		yield();
+		this.count++;
+		if( clock_mode && stop) break;
+		if(!clock_mode && this.count >= stop_count) break;
+	}
 
-	while(__atomic_load_n(&run, __ATOMIC_RELAXED)) {
-		yield();
-		this.counter++;
-	}
-	__atomic_fetch_add(&global_counter, this.counter, __ATOMIC_SEQ_CST);
+	__atomic_fetch_add(&threads_left, -1, __ATOMIC_SEQ_CST);
 }
 
@@ -33,9 +28,10 @@
 
 	{
-		printf("Running %d threads on %d processors for %f seconds\n", nthreads, nprocs, duration);
+		unsigned long long global_counter = 0;
 
 		Time start, end;
 		BenchCluster bc = { nprocs };
 		{
+			threads_left = nthreads;
 			Yielder threads[nthreads];
 			printf("Starting\n");
@@ -43,5 +39,4 @@
 			bool is_tty = isatty(STDOUT_FILENO);
 			start = timeHiRes();
-			run = true;
 
 			for(i; nthreads) {
@@ -50,7 +45,12 @@
 			wait(start, is_tty);
 
-			run = false;
+			stop = true;
 			end = timeHiRes();
 			printf("\nDone\n");
+
+			for(i; nthreads) {
+				Yielder & y = join( threads[i] );
+				global_counter += y.count;
+			}
 		}
 
