Index: nchmark/io/batch-readv.c
===================================================================
--- benchmark/io/batch-readv.c	(revision 7b91c0e992ed493cc46d297ec3fe313c381a8dbc)
+++ 	(revision )
@@ -1,189 +1,0 @@
-// Program to test the optimial batchsize in a single threaded process
-extern "C" {
-	#include <getopt.h>
-	#include <locale.h>
-	#include <time.h>										// timespec
-	#include <sys/time.h>									// timeval
-}
-
-enum { TIMEGRAN = 1000000000LL };					// nanosecond granularity, except for timeval
-
-#include <omp.h>
-
-#include "io_uring.h"
-
-
-int myfd;
-
-long long unsigned submits   = 0;
-long long unsigned completes = 0;
-
-void submit_and_drain(struct iovec * iov, int n) {
-	for(int i = 0; i < n; i++) {
-		struct io_uring_sqe * sqe =  &self.io.submit_q.sqes[ 0 ];
-
-		sqe->opcode = IORING_OP_READV;
-		#if !defined(IOSQE_ASYNC)
-			sqe->flags = 0;
-		#else
-			sqe->flags = IOSQE_ASYNC;
-		#endif
-		sqe->ioprio = 0;
-		sqe->fd = myfd;
-		sqe->off = 0;
-		sqe->addr = (__u64)iov;
-		sqe->len = 1;
-		sqe->rw_flags = 0;
-		sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0;
-	}
-
-	volatile uint32_t * tail = self.io.submit_q.tail;
-	__atomic_fetch_add(tail, n, __ATOMIC_SEQ_CST);
-
-	int ret = syscall( __NR_io_uring_enter, self.io.fd, n, n, IORING_ENTER_GETEVENTS, nullptr, 0);
-	if( ret < 0 ) {
-		switch((int)errno) {
-		case EAGAIN:
-		case EINTR:
-		default:
-			fprintf(stderr, "KERNEL ERROR: IO_URING WAIT - %s\n", strerror(errno) );
-			abort();
-		}
-	}
-
-	submits += ret;
-
-	uint32_t chead = *self.io.completion_q.head;
-	uint32_t ctail = *self.io.completion_q.tail;
-	const uint32_t mask = *self.io.completion_q.mask;
-
-	// Memory barrier
-	__atomic_thread_fence( __ATOMIC_SEQ_CST );
-
-	uint32_t count = ctail - chead;
-	__atomic_fetch_add( self.io.completion_q.head, count, __ATOMIC_RELAXED );
-	completes += count;
-}
-
-uint64_t getTimeNsec() {
-	timespec curr;
-	clock_gettime( CLOCK_REALTIME, &curr );
-	return (int64_t)curr.tv_sec * TIMEGRAN + curr.tv_nsec;
-}
-
-uint64_t to_miliseconds( uint64_t durtn ) { return durtn / (TIMEGRAN / 1000LL); }
-double to_fseconds(uint64_t durtn ) { return durtn / (double)TIMEGRAN; }
-uint64_t from_fseconds(double sec) { return sec * TIMEGRAN; }
-
-int main(int argc, char * argv[]) {
-	int buflen = 50;
-	int batch  = 1;
-	double duration = 5;
-
-	setlocale(LC_ALL, "");
-
-	for(;;) {
-		static struct option options[] = {
-			{"duration",     required_argument, 0, 'd'},
-			{"batchsize",   required_argument, 0, 'b'},
-			{"buflen",      required_argument, 0, 'l'},
-			{0, 0, 0, 0}
-		};
-
-		int idx = 0;
-		int opt = getopt_long(argc, argv, "d:l:b:", options, &idx);
-
-		const char * arg = optarg ? optarg : "";
-		char * end;
-		switch(opt) {
-			// Exit Case
-			case -1:
-				goto arg_loop;
-			case 'd': \
-				duration = strtod(arg, &end); \
-				if(*end != '\0') { \
-					fprintf(stderr, "Duration must be a valid double, was %s\n", arg); \
-					goto usage; \
-				} \
-				break;
-			case 'l':
-				buflen = strtoul(arg, &end, 10);
-				if(*end != '\0' && buflen < 10) {
-					fprintf(stderr, "Buffer size must be at least 10, was %s\n", arg);
-					goto usage;
-				}
-			case 'b':
-				batch = strtoul(arg, &end, 10);
-				if(*end != '\0' && batch < 0) {
-					fprintf(stderr, "Batch size must be at least 1, was %s\n", arg);
-					goto usage;
-				}
-				break;
-			default: /* ? */
-				fprintf(stderr, "%d\n", opt);
-			usage:
-				fprintf( stderr, "  -l, --buflen=SIZE        Number of bytes to read per request\n" );
-				fprintf( stderr, "  -b, --batchsize=COUNT    Number of request to batch together\n" );
-				exit(EXIT_FAILURE);
-		}
-	}
-	arg_loop:
-
-	myfd = open(__FILE__, 0);
-
-	init_uring(2048);
-
-	// Allocate the sqe
-	uint32_t idx = 0;
-
-	// Return the sqe
-	struct io_uring_sqe * sqe =  &self.io.submit_q.sqes[ idx & (*self.io.submit_q.mask)];
-
-	char data[buflen];
-	struct iovec iov = { data, (size_t)buflen };
-
-	sqe->opcode = IORING_OP_READV;
-	#if !defined(IOSQE_ASYNC)
-		sqe->flags = 0;
-	#else
-		sqe->flags = IOSQE_ASYNC;
-	#endif
-	sqe->ioprio = 0;
-	sqe->fd = myfd;
-	sqe->off = 0;
-	sqe->addr = (__u64)&iov;
-	sqe->len = 1;
-	sqe->rw_flags = 0;
-	sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0;
-
-	// Append to the list of ready entries
-	for(unsigned i = 0; i < *self.io.submit_q.num; i++) {
-		self.io.submit_q.array[ i ] = 0;
-	}
-
-	printf("Running for %f second, reading %d bytes in batches of %d\n", duration, buflen, batch);
-	uint64_t start = getTimeNsec();
-	uint64_t end   = getTimeNsec();
-	uint64_t prev  = getTimeNsec();
-	for(;;) {
-		submit_and_drain(&iov, batch);
-		end = getTimeNsec();
-		uint64_t delta = end - start;
-		if( to_fseconds(end - prev) > 0.1 ) {
-			printf(" %.1f\r", to_fseconds(delta));
-			fflush(stdout);
-			prev = end;
-		}
-		if( delta >= from_fseconds(duration) ) {
-			break;
-		}
-	}
-
-	printf("Took %'ld ms\n", to_miliseconds(end - start));
-	printf("Submitted        %'llu\n", submits);
-	printf("Completed        %'llu\n", completes);
-	printf("Submitted / sec  %'.f\n", submits   / to_fseconds(end - start));
-	printf("Completed / sec  %'.f\n", completes / to_fseconds(end - start));
-	printf("ns per Submitted %'.f\n", 1000000000.0 * to_fseconds(end - start) / (submits   / batch) );
-	printf("ns per Completed %'.f\n", 1000000000.0 * to_fseconds(end - start) / (completes / batch) );
-}
Index: example/io/batch-readv.c
===================================================================
--- example/io/batch-readv.c	(revision 7d01186d31d05cc33be5a19d1380981483dab022)
+++ example/io/batch-readv.c	(revision 7d01186d31d05cc33be5a19d1380981483dab022)
@@ -0,0 +1,189 @@
+// Program to test the optimial batchsize in a single threaded process
+extern "C" {
+	#include <getopt.h>
+	#include <locale.h>
+	#include <time.h>										// timespec
+	#include <sys/time.h>									// timeval
+}
+
+enum { TIMEGRAN = 1000000000LL };					// nanosecond granularity, except for timeval
+
+#include <omp.h>
+
+#include "io_uring.h"
+
+
+int myfd;
+
+long long unsigned submits   = 0;
+long long unsigned completes = 0;
+
+void submit_and_drain(struct iovec * iov, int n) {
+	for(int i = 0; i < n; i++) {
+		struct io_uring_sqe * sqe =  &self.io.submit_q.sqes[ 0 ];
+
+		sqe->opcode = IORING_OP_READV;
+		#if !defined(IOSQE_ASYNC)
+			sqe->flags = 0;
+		#else
+			sqe->flags = IOSQE_ASYNC;
+		#endif
+		sqe->ioprio = 0;
+		sqe->fd = myfd;
+		sqe->off = 0;
+		sqe->addr = (__u64)iov;
+		sqe->len = 1;
+		sqe->rw_flags = 0;
+		sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0;
+	}
+
+	volatile uint32_t * tail = self.io.submit_q.tail;
+	__atomic_fetch_add(tail, n, __ATOMIC_SEQ_CST);
+
+	int ret = syscall( __NR_io_uring_enter, self.io.fd, n, n, IORING_ENTER_GETEVENTS, nullptr, 0);
+	if( ret < 0 ) {
+		switch((int)errno) {
+		case EAGAIN:
+		case EINTR:
+		default:
+			fprintf(stderr, "KERNEL ERROR: IO_URING WAIT - %s\n", strerror(errno) );
+			abort();
+		}
+	}
+
+	submits += ret;
+
+	uint32_t chead = *self.io.completion_q.head;
+	uint32_t ctail = *self.io.completion_q.tail;
+	const uint32_t mask = *self.io.completion_q.mask;
+
+	// Memory barrier
+	__atomic_thread_fence( __ATOMIC_SEQ_CST );
+
+	uint32_t count = ctail - chead;
+	__atomic_fetch_add( self.io.completion_q.head, count, __ATOMIC_RELAXED );
+	completes += count;
+}
+
+uint64_t getTimeNsec() {
+	timespec curr;
+	clock_gettime( CLOCK_REALTIME, &curr );
+	return (int64_t)curr.tv_sec * TIMEGRAN + curr.tv_nsec;
+}
+
+uint64_t to_miliseconds( uint64_t durtn ) { return durtn / (TIMEGRAN / 1000LL); }
+double to_fseconds(uint64_t durtn ) { return durtn / (double)TIMEGRAN; }
+uint64_t from_fseconds(double sec) { return sec * TIMEGRAN; }
+
+int main(int argc, char * argv[]) {
+	int buflen = 50;
+	int batch  = 1;
+	double duration = 5;
+
+	setlocale(LC_ALL, "");
+
+	for(;;) {
+		static struct option options[] = {
+			{"duration",     required_argument, 0, 'd'},
+			{"batchsize",   required_argument, 0, 'b'},
+			{"buflen",      required_argument, 0, 'l'},
+			{0, 0, 0, 0}
+		};
+
+		int idx = 0;
+		int opt = getopt_long(argc, argv, "d:l:b:", options, &idx);
+
+		const char * arg = optarg ? optarg : "";
+		char * end;
+		switch(opt) {
+			// Exit Case
+			case -1:
+				goto arg_loop;
+			case 'd': \
+				duration = strtod(arg, &end); \
+				if(*end != '\0') { \
+					fprintf(stderr, "Duration must be a valid double, was %s\n", arg); \
+					goto usage; \
+				} \
+				break;
+			case 'l':
+				buflen = strtoul(arg, &end, 10);
+				if(*end != '\0' && buflen < 10) {
+					fprintf(stderr, "Buffer size must be at least 10, was %s\n", arg);
+					goto usage;
+				}
+			case 'b':
+				batch = strtoul(arg, &end, 10);
+				if(*end != '\0' && batch < 0) {
+					fprintf(stderr, "Batch size must be at least 1, was %s\n", arg);
+					goto usage;
+				}
+				break;
+			default: /* ? */
+				fprintf(stderr, "%d\n", opt);
+			usage:
+				fprintf( stderr, "  -l, --buflen=SIZE        Number of bytes to read per request\n" );
+				fprintf( stderr, "  -b, --batchsize=COUNT    Number of request to batch together\n" );
+				exit(EXIT_FAILURE);
+		}
+	}
+	arg_loop:
+
+	myfd = open(__FILE__, 0);
+
+	init_uring(2048);
+
+	// Allocate the sqe
+	uint32_t idx = 0;
+
+	// Return the sqe
+	struct io_uring_sqe * sqe =  &self.io.submit_q.sqes[ idx & (*self.io.submit_q.mask)];
+
+	char data[buflen];
+	struct iovec iov = { data, (size_t)buflen };
+
+	sqe->opcode = IORING_OP_READV;
+	#if !defined(IOSQE_ASYNC)
+		sqe->flags = 0;
+	#else
+		sqe->flags = IOSQE_ASYNC;
+	#endif
+	sqe->ioprio = 0;
+	sqe->fd = myfd;
+	sqe->off = 0;
+	sqe->addr = (__u64)&iov;
+	sqe->len = 1;
+	sqe->rw_flags = 0;
+	sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0;
+
+	// Append to the list of ready entries
+	for(unsigned i = 0; i < *self.io.submit_q.num; i++) {
+		self.io.submit_q.array[ i ] = 0;
+	}
+
+	printf("Running for %f second, reading %d bytes in batches of %d\n", duration, buflen, batch);
+	uint64_t start = getTimeNsec();
+	uint64_t end   = getTimeNsec();
+	uint64_t prev  = getTimeNsec();
+	for(;;) {
+		submit_and_drain(&iov, batch);
+		end = getTimeNsec();
+		uint64_t delta = end - start;
+		if( to_fseconds(end - prev) > 0.1 ) {
+			printf(" %.1f\r", to_fseconds(delta));
+			fflush(stdout);
+			prev = end;
+		}
+		if( delta >= from_fseconds(duration) ) {
+			break;
+		}
+	}
+
+	printf("Took %'ld ms\n", to_miliseconds(end - start));
+	printf("Submitted        %'llu\n", submits);
+	printf("Completed        %'llu\n", completes);
+	printf("Submitted / sec  %'.f\n", submits   / to_fseconds(end - start));
+	printf("Completed / sec  %'.f\n", completes / to_fseconds(end - start));
+	printf("ns per Submitted %'.f\n", 1000000000.0 * to_fseconds(end - start) / (submits   / batch) );
+	printf("ns per Completed %'.f\n", 1000000000.0 * to_fseconds(end - start) / (completes / batch) );
+}
