Index: benchmark/benchcltr.hfa
===================================================================
--- benchmark/benchcltr.hfa	(revision 2649ff9a75bc80845220784ad67e9fd55f1df612)
+++ benchmark/benchcltr.hfa	(revision 2649ff9a75bc80845220784ad67e9fd55f1df612)
@@ -0,0 +1,28 @@
+#pragma once
+
+#include <assert.h>
+#include <kernel.hfa>
+#include <thread.hfa>
+
+struct cluster * the_benchmark_cluster = 0p;
+struct BenchCluster {
+      cluster self;
+};
+
+void ?{}( BenchCluster & this ) {
+      (this.self){ "Benchmark Cluster" };
+
+      assert( the_benchmark_cluster == 0p );
+      the_benchmark_cluster = &this.self;
+}
+
+struct BenchProc {
+	processor self;
+};
+
+void ?{}( BenchProc & this ) {
+      assert( the_benchmark_cluster != 0p );
+	(this.self){ "Benchmark Processor", *the_benchmark_cluster };
+}
+
+
Index: benchmark/readyQ/yield.cfa
===================================================================
--- benchmark/readyQ/yield.cfa	(revision 2649ff9a75bc80845220784ad67e9fd55f1df612)
+++ benchmark/readyQ/yield.cfa	(revision 2649ff9a75bc80845220784ad67e9fd55f1df612)
@@ -0,0 +1,138 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+
+extern "C" {
+	#include <locale.h>
+	#include <getopt.h>
+}
+
+#include <unistd.h>
+
+#include <clock.hfa>
+#include <time.hfa>
+
+#include "../benchcltr.hfa"
+
+extern bool traceHeapOn();
+
+
+volatile bool run = false;
+volatile unsigned long long global_counter;
+
+thread Yielder {
+	unsigned long long counter;
+};
+void ?{}( Yielder & this ) {
+	this.counter = 0;
+	((thread&)this){ "Yielder Thread", *the_benchmark_cluster };
+}
+
+void main( Yielder & this ) {
+	park( __cfaabi_dbg_ctx );
+	/* paranoid */ assert( true == __atomic_load_n(&run, __ATOMIC_RELAXED) );
+
+	while(__atomic_load_n(&run, __ATOMIC_RELAXED)) {
+		yield();
+		this.counter++;
+	}
+	__atomic_fetch_add(&global_counter, this.counter, __ATOMIC_SEQ_CST);
+}
+
+int main(int argc, char * argv[]) {
+	double duration = 5;
+	int nprocs = 1;
+	int nthreads = 1;
+
+
+
+	for(;;) {
+		static struct option options[] = {
+			{"duration",  required_argument, 0, 'd'},
+			{"nprocs",    required_argument, 0, 'p'},
+			{"nthreads",  required_argument, 0, 't'},
+			{0, 0, 0, 0}
+		};
+
+		int idx = 0;
+		int opt = getopt_long(argc, argv, "d:p:t:", options, &idx);
+
+		char * arg = optarg ? optarg : "";
+		size_t len = 0;
+		char * end;
+		switch(opt) {
+			case -1:
+				goto run;
+			// Numeric Arguments
+			case 'd':
+				duration = strtod(arg, &end);
+				if(*end != '\0') {
+					fprintf(stderr, "Duration must be a valid double, was %s\n", arg);
+					goto usage;
+				}
+				break;
+			case 't':
+				nthreads = strtoul(arg, &end, 10);
+				if(*end != '\0' || nthreads < 1) {
+					fprintf(stderr, "Number of threads must be a positive integer, was %s\n", arg);
+					goto usage;
+				}
+				break;
+			case 'p':
+				nprocs = strtoul(arg, &end, 10);
+				if(*end != '\0' || nprocs < 1) {
+					fprintf(stderr, "Number of processors must be a positive integer, was %s\n", arg);
+					goto usage;
+				}
+				break;
+			// Other cases
+			default: /* ? */
+				fprintf( stderr, "Unkown option '%c'\n", opt);
+			usage:
+				fprintf( stderr, "Usage: %s [options]\n", argv[0]);
+				fprintf( stderr, "\n" );
+				fprintf( stderr, "  -d, --duration=DURATION  Duration of the experiment, in seconds\n" );
+				fprintf( stderr, "  -t, --nthreads=NTHREADS  Number of kernel threads\n" );
+				fprintf( stderr, "  -q, --nqueues=NQUEUES    Number of queues per threads\n" );
+				exit(1);
+		}
+	}
+	run:
+
+	{
+		printf("Running %d threads on %d processors for %lf seconds\n", nthreads, nprocs, duration);
+
+		Time start, end;
+		BenchCluster cl;
+		#if !defined(__CFA_NO_STATISTICS__)
+			print_stats_at_exit( cl.self );
+		#endif
+		{
+			BenchProc procs[nprocs];
+			{
+				Yielder threads[nthreads];
+				printf("Starting\n");
+				start = getTime();
+				run = true;
+
+				for(i; nthreads) {
+					unpark( threads[i] __cfaabi_dbg_ctx2 );
+				}
+				do {
+					sleep(500`ms);
+					end = getTime();
+				} while( (end - start) < duration`s );
+
+				run = false;
+				end = getTime();
+				printf("Done\n");
+			}
+		}
+
+		printf("Took %'ld ms\n", (end - start)`ms);
+		printf("Total yields      : %'15llu\n", global_counter);
+		printf("yields per second : %'18.2lf\n", ((double)global_counter) / (end - start)`s);
+		printf("ns per yields     : %'18.2lf\n", ((double)(end - start)`ns) / global_counter);
+	}
+}
