Index: benchmark/benchcltr.hfa
===================================================================
--- benchmark/benchcltr.hfa	(revision 6bd2cfb14fd5626001c1d2b3b915b6ea8cc6da54)
+++ benchmark/benchcltr.hfa	(revision 01c625651c455d1883e6338c9211e466f3d32d46)
@@ -1,8 +1,16 @@
 #pragma once
+#include <assert.h>
+#include <stdint.h>
 
-#include <assert.h>
-#include <kernel.hfa>
-#include <thread.hfa>
-#include <stats.hfa>
+#ifdef __cforall
+	#include <kernel.hfa>
+	#include <thread.hfa>
+	#include <stats.hfa>
+#else
+#include <time.h>										// timespec
+#include <sys/time.h>									// timeval
+
+enum { TIMEGRAN = 1000000000LL };					// nanosecond granularity, except for timeval
+#endif
 
 #define BENCH_OPT_SHORT "d:p:t:SPV"
@@ -55,4 +63,6 @@
 bool procstats = false;
 bool viewhalts = false;
+
+#ifdef __cforall
 struct cluster * the_benchmark_cluster = 0p;
 struct BenchCluster {
@@ -60,6 +70,6 @@
 };
 
-void ?{}( BenchCluster & this, int flags, int stats ) {
-	(this.self){ "Benchmark Cluster", flags };
+void ?{}( BenchCluster & this, int num_io, const io_context_params & io_params, int stats ) {
+	(this.self){ "Benchmark Cluster", num_io, io_params };
 
 	assert( the_benchmark_cluster == 0p );
@@ -105,4 +115,32 @@
 	}
 }
+#else
+uint64_t getTimeNsec() {
+	timespec curr;
+	clock_gettime( CLOCK_REALTIME, &curr );
+	return (int64_t)curr.tv_sec * TIMEGRAN + curr.tv_nsec;
+}
+
+uint64_t to_miliseconds( uint64_t durtn ) { return durtn / (TIMEGRAN / 1000LL); }
+double to_fseconds(uint64_t durtn ) { return durtn / (double)TIMEGRAN; }
+uint64_t from_fseconds(double sec) { return sec * TIMEGRAN; }
+
+
+void wait_duration(double duration, uint64_t & start, uint64_t & end, bool is_tty) {
+	for(;;) {
+		usleep(100000);
+		end = getTimeNsec();
+		uint64_t delta = end - start;
+		/*if(is_tty)*/ {
+			printf(" %.1f\r", to_fseconds(delta));
+			fflush(stdout);
+		}
+		if( delta >= from_fseconds(duration) ) {
+			break;
+		}
+	}
+}
+#endif
+
 
 void bench_usage( char * argv [] ) {
Index: benchmark/io/readv.cfa
===================================================================
--- benchmark/io/readv.cfa	(revision 6bd2cfb14fd5626001c1d2b3b915b6ea8cc6da54)
+++ benchmark/io/readv.cfa	(revision 01c625651c455d1883e6338c9211e466f3d32d46)
@@ -40,5 +40,11 @@
 int do_read(int fd, struct iovec * iov) {
 	// extern ssize_t cfa_preadv2(int, const struct iovec *, int, off_t, int, int = 0, Duration = -1`s, io_cancellation * = 0p, io_context * = 0p);
-	int sflags = 0;
+	int sflags = 0
+	#if defined(CFA_HAVE_IOSQE_ASYNC)
+		| CFA_IO_ASYNC
+	#else
+	#warning no CFA_IO_ASYNC support
+	#endif
+	;
 	if(fixed_file) {
 		sflags |= CFA_IO_FIXED_FD1;
@@ -130,5 +136,4 @@
 				bench_usage( argv );
 				fprintf( stderr, "  -b, --buflen=SIZE        Number of bytes to read per request\n" );
-				fprintf( stderr, "  -u, --userthread         If set, cluster uses user-thread to poll I/O\n" );
 				fprintf( stderr, "  -s, --submitthread       If set, cluster uses polling thread to submit I/O\n" );
 				fprintf( stderr, "  -e, --eagersubmit        If set, cluster submits I/O eagerly but still aggregates submits\n" );
@@ -139,4 +144,7 @@
 		}
 	}
+
+	if(params.poll_submit  ) fixed_file = true;
+	if(params.poll_complete) file_flags |= O_DIRECT;
 
 	int lfd = open(__FILE__, file_flags);
Index: benchmark/readyQ/yield.cfa
===================================================================
--- benchmark/readyQ/yield.cfa	(revision 6bd2cfb14fd5626001c1d2b3b915b6ea8cc6da54)
+++ benchmark/readyQ/yield.cfa	(revision 01c625651c455d1883e6338c9211e466f3d32d46)
@@ -44,4 +44,6 @@
 int main(int argc, char * argv[]) {
 	BENCH_DECL
+	unsigned num_io = 1;
+	io_context_params params;
 
 	for(;;) {
@@ -73,5 +75,5 @@
 
 		Time start, end;
-		BenchCluster cl = { 0, CFA_STATS_READY_Q };
+		BenchCluster cl = { num_io, params, CFA_STATS_READY_Q };
 		{
 			BenchProc procs[nprocs];
