Index: benchmark/benchcltr.hfa
===================================================================
--- benchmark/benchcltr.hfa	(revision 566fde0b28f01b803e3c2de9c36338aaa80bdeff)
+++ benchmark/benchcltr.hfa	(revision cb8560391e7b57b539b8deb6a5eb90a63d7a5bcf)
@@ -4,15 +4,66 @@
 #include <kernel.hfa>
 #include <thread.hfa>
+#include <stats.hfa>
 
+#define BENCH_OPT_SHORT "d:p:t:SP"
+#define BENCH_OPT_LONG \
+	{"duration",     required_argument, 0, 'd'}, \
+	{"nthreads",     required_argument, 0, 't'}, \
+	{"nprocs",       required_argument, 0, 'p'}, \
+	{"nostats",      no_argument      , 0, 'S'}, \
+	{"procstat",     no_argument      , 0, 'P'},
+
+#define BENCH_DECL \
+	double duration = 5; \
+	int nprocs = 1; \
+	int nthreads = 1;
+
+#define BENCH_OPT_CASE \
+	case 'd': \
+		duration = strtod(arg, &end); \
+		if(*end != '\0') { \
+			fprintf(stderr, "Duration must be a valid double, was %s\n", arg); \
+			goto usage; \
+		} \
+		break; \
+	case 't': \
+		nthreads = strtoul(arg, &end, 10); \
+		if(*end != '\0' || nthreads < 1) { \
+			fprintf(stderr, "Number of threads must be a positive integer, was %s\n", arg); \
+			goto usage; \
+		} \
+		break; \
+	case 'p': \
+		nprocs = strtoul(arg, &end, 10); \
+		if(*end != '\0' || nprocs < 1) { \
+			fprintf(stderr, "Number of processors must be a positive integer, was %s\n", arg); \
+			goto usage; \
+		} \
+		break; \
+	case 'S': \
+		silent = true; \
+		break; \
+	case 'P': \
+		procstats = true; \
+		break;
+
+bool silent = false;
+bool procstats = false;
 struct cluster * the_benchmark_cluster = 0p;
 struct BenchCluster {
-      cluster self;
+	cluster self;
 };
 
-void ?{}( BenchCluster & this, int flags ) {
-      (this.self){ "Benchmark Cluster", flags };
+void ?{}( BenchCluster & this, int flags, int stats ) {
+	(this.self){ "Benchmark Cluster", flags };
 
-      assert( the_benchmark_cluster == 0p );
-      the_benchmark_cluster = &this.self;
+	assert( the_benchmark_cluster == 0p );
+	the_benchmark_cluster = &this.self;
+
+	#if !defined(__CFA_NO_STATISTICS__)
+		if( !silent ) {
+			print_stats_at_exit( this.self, stats );
+		}
+	#endif
 }
 
@@ -22,20 +73,36 @@
 
 void ?{}( BenchProc & this ) {
-      assert( the_benchmark_cluster != 0p );
+	assert( the_benchmark_cluster != 0p );
 	(this.self){ "Benchmark Processor", *the_benchmark_cluster };
+
+	#if !defined(__CFA_NO_STATISTICS__)
+		if( procstats ) {
+			print_stats_at_exit( this.self, the_benchmark_cluster->print_stats );
+		}
+	#endif
 }
 
 void wait(double duration, Time & start, Time & end, bool is_tty) {
-      for() {
-            sleep(100`ms);
-            end = getTimeNsec();
-            Duration delta = end - start;
-            if(is_tty) {
-                  printf("\r%.1f", delta`ds);
-                  fflush(stdout);
-            }
-            if( delta >= duration`s ) {
-                  break;
-            }
-      }
+	for() {
+		sleep(100`ms);
+		end = getTimeNsec();
+		Duration delta = end - start;
+		if(is_tty) {
+			printf("\r%.1f", delta`ds);
+			fflush(stdout);
+		}
+		if( delta >= duration`s ) {
+			break;
+		}
+	}
 }
+
+void bench_usage( char * argv [] ) {
+	fprintf( stderr, "Usage: %s : [options]\n", argv[0] );
+	fprintf( stderr, "\n" );
+	fprintf( stderr, "  -d, --duration=DURATION  Duration of the experiment, in seconds\n" );
+	fprintf( stderr, "  -t, --nthreads=NTHREADS  Number of user threads\n" );
+	fprintf( stderr, "  -p, --nprocs=NPROCS      Number of kernel threads\n" );
+	fprintf( stderr, "  -S, --nostats            Don't print cluster stats\n" );
+	fprintf( stderr, "  -P, --procstat           Print processor stats" );
+}
Index: benchmark/io/readv.cfa
===================================================================
--- benchmark/io/readv.cfa	(revision 566fde0b28f01b803e3c2de9c36338aaa80bdeff)
+++ benchmark/io/readv.cfa	(revision cb8560391e7b57b539b8deb6a5eb90a63d7a5bcf)
@@ -50,9 +50,5 @@
 
 int main(int argc, char * argv[]) {
-	double duration   = 5.0;
-	unsigned long int nthreads = 2;
-	unsigned long int nprocs   = 1;
-	bool silent = false;
-	bool procstats = false;
+	BENCH_DECL
 	unsigned flags = 0;
 	unsigned sublen = 16;
@@ -61,9 +57,5 @@
 	for(;;) {
 		static struct option options[] = {
-			{"duration",     required_argument, 0, 'd'},
-			{"nthreads",     required_argument, 0, 't'},
-			{"nprocs",       required_argument, 0, 'p'},
-			{"nostats",      no_argument      , 0, 'S'},
-			{"procstat",     no_argument      , 0, 'P'},
+			BENCH_OPT_LONG
 			{"bufsize",      required_argument, 0, 'b'},
 			{"userthread",   no_argument      , 0, 'u'},
@@ -74,5 +66,5 @@
 
 		int idx = 0;
-		int opt = getopt_long(argc, argv, "d:t:p:SPb:usl:", options, &idx);
+		int opt = getopt_long(argc, argv, BENCH_OPT_SHORT "b:usl:", options, &idx);
 
 		const char * arg = optarg ? optarg : "";
@@ -82,32 +74,5 @@
 			case -1:
 				break arg_loop;
-			// Numeric Arguments
-			case 'd':
-				duration = strtod(arg, &end);
-				if(*end != '\0') {
-					fprintf(stderr, "Duration must be a valid double, was %s\n", arg);
-					goto usage;
-				}
-				break;
-			case 't':
-				nthreads = strtoul(arg, &end, 10);
-				if(*end != '\0' || nthreads < 1) {
-					fprintf(stderr, "Number of threads must be a positive integer, was %s\n", arg);
-					goto usage;
-				}
-				break;
-			case 'p':
-				nprocs = strtoul(arg, &end, 10);
-				if(*end != '\0' || nprocs < 1) {
-					fprintf(stderr, "Number of processors must be a positive integer, was %s\n", arg);
-					goto usage;
-				}
-				break;
-			case 'S':
-				silent = true;
-				break;
-			case 'P':
-				procstats = true;
-				break;
+			BENCH_OPT_CASE
 			case 'b':
 				buflen = strtoul(arg, &end, 10);
@@ -131,15 +96,8 @@
 				flags |= (sublen << CFA_CLUSTER_IO_BUFFLEN_OFFSET);
 				break;
-			// Other cases
 			default: /* ? */
 				fprintf(stderr, "%d\n", opt);
 			usage:
-				fprintf( stderr, "Usage: %s : [options]\n", argv[0] );
-				fprintf( stderr, "\n" );
-				fprintf( stderr, "  -d, --duration=DURATION  Duration of the experiment, in seconds\n" );
-				fprintf( stderr, "  -t, --nthreads=NTHREADS  Number of user threads\n" );
-				fprintf( stderr, "  -p, --nprocs=NPROCS      Number of kernel threads\n" );
-				fprintf( stderr, "  -S, --nostats            Don't print cluster stats\n" );
-				fprintf( stderr, "  -P, --procstat           Print processor stats" );
+				bench_usage( argv );
 				fprintf( stderr, "  -b, --buflen=SIZE        Number of bytes to read per request\n" );
 				fprintf( stderr, "  -u, --userthread         If set, cluster uses user-thread to poll I/O\n" );
@@ -155,23 +113,11 @@
 	}
 
-	printf("Running %lu threads, reading %lu bytes each, over %lu processors for %lf seconds\n", nthreads, buflen, nprocs, duration);
+	printf("Running %d threads, reading %lu bytes each, over %d processors for %f seconds\n", nthreads, buflen, nprocs, duration);
 
 	{
 		Time start, end;
-		BenchCluster cl = { flags };
-		#if !defined(__CFA_NO_STATISTICS__)
-			if( !silent ) {
-				print_stats_at_exit( cl.self, CFA_STATS_READY_Q | CFA_STATS_IO );
-			}
-		#endif
+		BenchCluster cl = { flags, CFA_STATS_READY_Q | CFA_STATS_IO };
 		{
 			BenchProc procs[nprocs];
-			#if !defined(__CFA_NO_STATISTICS__)
-				if( procstats ) {
-					for(i; nprocs) {
-						print_stats_at_exit( procs[i].self, CFA_STATS_READY_Q | CFA_STATS_IO );
-					}
-				}
-			#endif
 			{
 				Reader threads[nthreads];
Index: benchmark/readyQ/yield.cfa
===================================================================
--- benchmark/readyQ/yield.cfa	(revision 566fde0b28f01b803e3c2de9c36338aaa80bdeff)
+++ benchmark/readyQ/yield.cfa	(revision cb8560391e7b57b539b8deb6a5eb90a63d7a5bcf)
@@ -43,68 +43,25 @@
 
 int main(int argc, char * argv[]) {
-	double duration = 5;
-	int nprocs = 1;
-	int nthreads = 1;
-	bool silent = false;
-	bool procstats = false;
+	BENCH_DECL
 
 	for(;;) {
 		static struct option options[] = {
-			{"duration",  required_argument, 0, 'd'},
-			{"nprocs",    required_argument, 0, 'p'},
-			{"nthreads",  required_argument, 0, 't'},
-			{"nostats",   no_argument      , 0, 'S'},
-			{"procstat",  no_argument      , 0, 'P'},
+			BENCH_OPT_LONG
 			{0, 0, 0, 0}
 		};
 
 		int idx = 0;
-		int opt = getopt_long(argc, argv, "d:p:t:SP", options, &idx);
+		int opt = getopt_long(argc, argv, BENCH_OPT_SHORT, options, &idx);
 
-		char * arg = optarg ? optarg : "";
-		size_t len = 0;
+		const char * arg = optarg ? optarg : "";
 		char * end;
 		switch(opt) {
 			case -1:
 				goto run;
-			// Numeric Arguments
-			case 'd':
-				duration = strtod(arg, &end);
-				if(*end != '\0') {
-					fprintf(stderr, "Duration must be a valid double, was %s\n", arg);
-					goto usage;
-				}
-				break;
-			case 't':
-				nthreads = strtoul(arg, &end, 10);
-				if(*end != '\0' || nthreads < 1) {
-					fprintf(stderr, "Number of threads must be a positive integer, was %s\n", arg);
-					goto usage;
-				}
-				break;
-			case 'p':
-				nprocs = strtoul(arg, &end, 10);
-				if(*end != '\0' || nprocs < 1) {
-					fprintf(stderr, "Number of processors must be a positive integer, was %s\n", arg);
-					goto usage;
-				}
-				break;
-			case 'S':
-				silent = true;
-				break;
-			case 'P':
-				procstats = true;
-				break;
-			// Other cases
+			BENCH_OPT_CASE
 			default: /* ? */
 				fprintf( stderr, "Unkown option '%c'\n", opt);
 			usage:
-				fprintf( stderr, "Usage: %s [options]\n", argv[0]);
-				fprintf( stderr, "\n" );
-				fprintf( stderr, "  -d, --duration=DURATION  Duration of the experiment, in seconds\n" );
-				fprintf( stderr, "  -t, --nthreads=NTHREADS  Number of kernel threads\n" );
-				fprintf( stderr, "  -q, --nqueues=NQUEUES    Number of queues per threads\n" );
-				fprintf( stderr, "  -S, --nostats            Don't print cluster stats\n" );
-				fprintf( stderr, "  -P, --procstat           Print processor stats" );
+				bench_usage( argv );
 				exit(1);
 		}
@@ -113,22 +70,10 @@
 
 	{
-		printf("Running %d threads on %d processors for %lf seconds\n", nthreads, nprocs, duration);
+		printf("Running %d threads on %d processors for %f seconds\n", nthreads, nprocs, duration);
 
 		Time start, end;
-		BenchCluster cl = { 0 };
-		#if !defined(__CFA_NO_STATISTICS__)
-			if( !silent ) {
-				print_stats_at_exit( cl.self, CFA_STATS_READY_Q );
-			}
-		#endif
+		BenchCluster cl = { 0, CFA_STATS_READY_Q };
 		{
 			BenchProc procs[nprocs];
-			#if !defined(__CFA_NO_STATISTICS__)
-				if( procstats ) {
-					for(i; nprocs) {
-						print_stats_at_exit( procs[i].self, CFA_STATS_READY_Q );
-					}
-				}
-			#endif
 			{
 				Yielder threads[nthreads];
