Index: benchmark/benchcltr.hfa
===================================================================
--- benchmark/benchcltr.hfa	(revision 564148f8cd1b3c426655d3055974efb5e3824df3)
+++ benchmark/benchcltr.hfa	(revision 9791ab583604047f0117e3c8a46c7ab7914c7ba7)
@@ -10,6 +10,6 @@
 };
 
-void ?{}( BenchCluster & this ) {
-      (this.self){ "Benchmark Cluster" };
+void ?{}( BenchCluster & this, int flags ) {
+      (this.self){ "Benchmark Cluster", flags };
 
       assert( the_benchmark_cluster == 0p );
@@ -26,3 +26,15 @@
 }
 
-
+void wait(double duration, Time & start, Time & end, bool is_tty) {
+      for() {
+            sleep(500`ms);
+            end = getTime();
+            if( (end - start) >= duration`s ) {
+                  break;
+            }
+            if(is_tty) {
+                  printf("\r%.4lf", (double)(end - start)`s);
+                  fflush(stdout);
+            }
+      }
+}
Index: benchmark/io/readv.cfa
===================================================================
--- benchmark/io/readv.cfa	(revision 564148f8cd1b3c426655d3055974efb5e3824df3)
+++ benchmark/io/readv.cfa	(revision 9791ab583604047f0117e3c8a46c7ab7914c7ba7)
@@ -17,4 +17,6 @@
 #include <time.hfa>
 
+#include "../benchcltr.hfa"
+
 extern bool traceHeapOn();
 extern ssize_t cfa_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags);
@@ -26,21 +28,12 @@
 unsigned long int buflen = 50;
 
-cluster * the_cluster;
-
-thread Reader {};
+thread __attribute__((aligned(128))) Reader {};
 void ?{}( Reader & this ) {
-	((thread&)this){ "Reader Thread", *the_cluster };
-}
-
-struct my_processor {
-	processor p;
-};
-
-void ?{}( my_processor & this ) {
-	(this.p){ "I/O Processor", *the_cluster };
+	((thread&)this){ "Reader Thread", *the_benchmark_cluster };
 }
 
 void main( Reader & ) {
-	while(!__atomic_load_n(&run, __ATOMIC_RELAXED)) yield();
+	park( __cfaabi_dbg_ctx );
+	/* paranoid */ assert( true == __atomic_load_n(&run, __ATOMIC_RELAXED) );
 
 	char data[buflen];
@@ -153,24 +146,26 @@
 	{
 		Time start, end;
-		cluster cl = { "IO Cluster", flags };
-		the_cluster = &cl;
+		BenchCluster cl = { flags };
 		#if !defined(__CFA_NO_STATISTICS__)
-			print_stats_at_exit( cl );
+			print_stats_at_exit( cl.self );
 		#endif
 		{
-			my_processor procs[nprocs];
+			BenchProc procs[nprocs];
 			{
 				Reader threads[nthreads];
 
 				printf("Starting\n");
+				bool is_tty = isatty(STDOUT_FILENO);
 				start = getTime();
 				run = true;
-				do {
-					sleep(500`ms);
-					end = getTime();
-				} while( (end - start) < duration`s );
+
+				for(i; nthreads) {
+					unpark( threads[i] __cfaabi_dbg_ctx2 );
+				}
+				wait(duration, start, end, is_tty);
+
 				run = false;
 				end = getTime();
-				printf("Done\n");
+				printf("\nDone\n");
 			}
 		}
Index: benchmark/readyQ/yield.cfa
===================================================================
--- benchmark/readyQ/yield.cfa	(revision 564148f8cd1b3c426655d3055974efb5e3824df3)
+++ benchmark/readyQ/yield.cfa	(revision 9791ab583604047f0117e3c8a46c7ab7914c7ba7)
@@ -105,5 +105,5 @@
 
 		Time start, end;
-		BenchCluster cl;
+		BenchCluster cl = { 0 };
 		#if !defined(__CFA_NO_STATISTICS__)
 			print_stats_at_exit( cl.self );
@@ -113,6 +113,7 @@
 			{
 				Yielder threads[nthreads];
+				printf("Starting\n");
+
 				bool is_tty = isatty(STDOUT_FILENO);
-				printf("Starting\n");
 				start = getTime();
 				run = true;
@@ -121,19 +122,9 @@
 					unpark( threads[i] __cfaabi_dbg_ctx2 );
 				}
-				for() {
-					sleep(500`ms);
-					end = getTime();
-					if( (end - start) >= duration`s ) {
-						break;
-					}
-					if(is_tty) {
-						printf("\r%.4lf", (double)(end - start)`s);
-						fflush(stdout);
-					}
-				}
+				wait(duration, start, end, is_tty);
 
 				run = false;
 				end = getTime();
-				printf("Done\n");
+				printf("\nDone\n");
 			}
 		}
