Index: example/io/batch-readv.c
===================================================================
--- example/io/batch-readv.c	(revision 4706098cc45d914487666cefb2aec99d746d41e5)
+++ example/io/batch-readv.c	(revision 4706098cc45d914487666cefb2aec99d746d41e5)
@@ -0,0 +1,189 @@
+// Program to test the optimial batchsize in a single threaded process
+extern "C" {
+	#include <getopt.h>
+	#include <locale.h>
+	#include <time.h>										// timespec
+	#include <sys/time.h>									// timeval
+}
+
+enum { TIMEGRAN = 1000000000LL };					// nanosecond granularity, except for timeval
+
+#include <omp.h>
+
+#include "io_uring.h"
+
+
+int myfd;
+
+long long unsigned submits   = 0;
+long long unsigned completes = 0;
+
+void submit_and_drain(struct iovec * iov, int n) {
+	for(int i = 0; i < n; i++) {
+		struct io_uring_sqe * sqe =  &self.io.submit_q.sqes[ 0 ];
+
+		sqe->opcode = IORING_OP_READV;
+		#if !defined(IOSQE_ASYNC)
+			sqe->flags = 0;
+		#else
+			sqe->flags = IOSQE_ASYNC;
+		#endif
+		sqe->ioprio = 0;
+		sqe->fd = myfd;
+		sqe->off = 0;
+		sqe->addr = (__u64)iov;
+		sqe->len = 1;
+		sqe->rw_flags = 0;
+		sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0;
+	}
+
+	volatile uint32_t * tail = self.io.submit_q.tail;
+	__atomic_fetch_add(tail, n, __ATOMIC_SEQ_CST);
+
+	int ret = syscall( __NR_io_uring_enter, self.io.fd, n, n, IORING_ENTER_GETEVENTS, nullptr, 0);
+	if( ret < 0 ) {
+		switch((int)errno) {
+		case EAGAIN:
+		case EINTR:
+		default:
+			fprintf(stderr, "KERNEL ERROR: IO_URING WAIT - %s\n", strerror(errno) );
+			abort();
+		}
+	}
+
+	submits += ret;
+
+	uint32_t chead = *self.io.completion_q.head;
+	uint32_t ctail = *self.io.completion_q.tail;
+	const uint32_t mask = *self.io.completion_q.mask;
+
+	// Memory barrier
+	__atomic_thread_fence( __ATOMIC_SEQ_CST );
+
+	uint32_t count = ctail - chead;
+	__atomic_fetch_add( self.io.completion_q.head, count, __ATOMIC_RELAXED );
+	completes += count;
+}
+
+uint64_t getTimeNsec() {
+	timespec curr;
+	clock_gettime( CLOCK_REALTIME, &curr );
+	return (int64_t)curr.tv_sec * TIMEGRAN + curr.tv_nsec;
+}
+
+uint64_t to_miliseconds( uint64_t durtn ) { return durtn / (TIMEGRAN / 1000LL); }
+double to_fseconds(uint64_t durtn ) { return durtn / (double)TIMEGRAN; }
+uint64_t from_fseconds(double sec) { return sec * TIMEGRAN; }
+
+int main(int argc, char * argv[]) {
+	int buflen = 50;
+	int batch  = 1;
+	double duration = 5;
+
+	setlocale(LC_ALL, "");
+
+	for(;;) {
+		static struct option options[] = {
+			{"duration",     required_argument, 0, 'd'},
+			{"batchsize",   required_argument, 0, 'b'},
+			{"buflen",      required_argument, 0, 'l'},
+			{0, 0, 0, 0}
+		};
+
+		int idx = 0;
+		int opt = getopt_long(argc, argv, "d:l:b:", options, &idx);
+
+		const char * arg = optarg ? optarg : "";
+		char * end;
+		switch(opt) {
+			// Exit Case
+			case -1:
+				goto arg_loop;
+			case 'd': \
+				duration = strtod(arg, &end); \
+				if(*end != '\0') { \
+					fprintf(stderr, "Duration must be a valid double, was %s\n", arg); \
+					goto usage; \
+				} \
+				break;
+			case 'l':
+				buflen = strtoul(arg, &end, 10);
+				if(*end != '\0' && buflen < 10) {
+					fprintf(stderr, "Buffer size must be at least 10, was %s\n", arg);
+					goto usage;
+				}
+			case 'b':
+				batch = strtoul(arg, &end, 10);
+				if(*end != '\0' && batch < 0) {
+					fprintf(stderr, "Batch size must be at least 1, was %s\n", arg);
+					goto usage;
+				}
+				break;
+			default: /* ? */
+				fprintf(stderr, "%d\n", opt);
+			usage:
+				fprintf( stderr, "  -l, --buflen=SIZE        Number of bytes to read per request\n" );
+				fprintf( stderr, "  -b, --batchsize=COUNT    Number of request to batch together\n" );
+				exit(EXIT_FAILURE);
+		}
+	}
+	arg_loop:
+
+	myfd = open(__FILE__, 0);
+
+	init_uring(2048);
+
+	// Allocate the sqe
+	uint32_t idx = 0;
+
+	// Return the sqe
+	struct io_uring_sqe * sqe =  &self.io.submit_q.sqes[ idx & (*self.io.submit_q.mask)];
+
+	char data[buflen];
+	struct iovec iov = { data, (size_t)buflen };
+
+	sqe->opcode = IORING_OP_READV;
+	#if !defined(IOSQE_ASYNC)
+		sqe->flags = 0;
+	#else
+		sqe->flags = IOSQE_ASYNC;
+	#endif
+	sqe->ioprio = 0;
+	sqe->fd = myfd;
+	sqe->off = 0;
+	sqe->addr = (__u64)&iov;
+	sqe->len = 1;
+	sqe->rw_flags = 0;
+	sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0;
+
+	// Append to the list of ready entries
+	for(unsigned i = 0; i < *self.io.submit_q.num; i++) {
+		self.io.submit_q.array[ i ] = 0;
+	}
+
+	printf("Running for %f second, reading %d bytes in batches of %d\n", duration, buflen, batch);
+	uint64_t start = getTimeNsec();
+	uint64_t end   = getTimeNsec();
+	uint64_t prev  = getTimeNsec();
+	for(;;) {
+		submit_and_drain(&iov, batch);
+		end = getTimeNsec();
+		uint64_t delta = end - start;
+		if( to_fseconds(end - prev) > 0.1 ) {
+			printf(" %.1f\r", to_fseconds(delta));
+			fflush(stdout);
+			prev = end;
+		}
+		if( delta >= from_fseconds(duration) ) {
+			break;
+		}
+	}
+
+	printf("Took %'ld ms\n", to_miliseconds(end - start));
+	printf("Submitted        %'llu\n", submits);
+	printf("Completed        %'llu\n", completes);
+	printf("Submitted / sec  %'.f\n", submits   / to_fseconds(end - start));
+	printf("Completed / sec  %'.f\n", completes / to_fseconds(end - start));
+	printf("ns per Submitted %'.f\n", 1000000000.0 * to_fseconds(end - start) / (submits   / batch) );
+	printf("ns per Completed %'.f\n", 1000000000.0 * to_fseconds(end - start) / (completes / batch) );
+}
