Index: Jenkins/FullBuild
===================================================================
--- Jenkins/FullBuild	(revision 182256b4aa148c77a64e9731dedbd8a503973f2f)
+++ Jenkins/FullBuild	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -21,11 +21,11 @@
 					gcc_7_x86_old: { trigger_build( 'gcc-7',   'x86', false ) },
 					gcc_6_x86_old: { trigger_build( 'gcc-6',   'x86', false ) },
-					gcc_9_x64_old: { trigger_build( 'gcc-9',   'x64', true  ) },
-					gcc_8_x64_old: { trigger_build( 'gcc-8',   'x64', true  ) },
-					gcc_7_x64_old: { trigger_build( 'gcc-7',   'x64', true  ) },
-					gcc_6_x64_old: { trigger_build( 'gcc-6',   'x64', true  ) },
-					gcc_5_x64_old: { trigger_build( 'gcc-5',   'x64', true  ) },
-					clang_x64_old: { trigger_build( 'clang',   'x64', true  ) },
-					clang_x64_new: { trigger_build( 'clang',   'x64', false ) },
+					gcc_9_x64_new: { trigger_build( 'gcc-9',   'x64', true  ) },
+					gcc_8_x64_new: { trigger_build( 'gcc-8',   'x64', true  ) },
+					gcc_7_x64_new: { trigger_build( 'gcc-7',   'x64', true  ) },
+					gcc_6_x64_new: { trigger_build( 'gcc-6',   'x64', true  ) },
+					gcc_5_x64_new: { trigger_build( 'gcc-5',   'x64', true  ) },
+					clang_x64_new: { trigger_build( 'clang',   'x64', true  ) },
+					clang_x64_old: { trigger_build( 'clang',   'x64', false ) },
 				)
 			}
@@ -66,4 +66,9 @@
 
 def trigger_build(String cc, String arch, boolean new_ast) {
+	// Randomly delay the builds by a random amount to avoid hitting the SC server to hard
+	sleep(time: 5 * Math.random(), unit:"MINUTES")
+
+	// Run the build
+	// Don't propagate, it doesn't play nice with our email setup
 	def result = build job: 'Cforall/master', 		\
 		parameters: [						\
Index: benchmark/io/http/http_ring.cpp
===================================================================
--- benchmark/io/http/http_ring.cpp	(revision 182256b4aa148c77a64e9731dedbd8a503973f2f)
+++ benchmark/io/http/http_ring.cpp	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -20,4 +20,5 @@
 		socklen_t *addrlen;
 		int flags;
+		unsigned cnt;
 	} acpt;
 
@@ -67,4 +68,8 @@
 thread_local stats_block_t stats;
 stats_block_t global_stats;
+
+thread_local struct __attribute__((aligned(128))) {
+	size_t to_submit = 0;
+} local;
 
 // Get an array of current connections
@@ -192,4 +197,5 @@
 	static void submit(struct io_uring * ring, struct io_uring_sqe * sqe, connection * conn) {
 		(void)ring;
+		local.to_submit++;
 		#ifdef USE_ASYNC
 			io_uring_sqe_set_flags(sqe, IOSQE_ASYNC);
@@ -406,5 +412,5 @@
 		switch(state) {
 		case ACCEPTING:
-			connection::accept(ring, opt);
+			// connection::accept(ring, opt);
 			newconn(ring, res);
 			break;
@@ -420,4 +426,8 @@
 
 //=========================================================
+extern "C" {
+	#include <sys/eventfd.h>  // use for termination
+}
+
 // Main loop of the WebServer
 // Effectively uses one thread_local copy of everything per kernel thread
@@ -427,4 +437,16 @@
 	struct io_uring * ring = opt.ring;
 
+	int blockfd = eventfd(0, 0);
+	if (blockfd < 0) {
+		fprintf( stderr, "eventfd create error: (%d) %s\n", (int)errno, strerror(errno) );
+		exit(EXIT_FAILURE);
+	}
+
+	int ret = io_uring_register_eventfd(ring, blockfd);
+	if (ret < 0) {
+		fprintf( stderr, "io_uring S&W error: (%d) %s\n", (int)-ret, strerror(-ret) );
+		exit(EXIT_FAILURE);
+	}
+
 	// Track the shutdown using a event_fd
 	char endfd_buf[8];
@@ -433,5 +455,7 @@
 	// Accept our first connection
 	// May not take effect until io_uring_submit_and_wait
-	connection::accept(ring, opt);
+	for(unsigned i = 0; i < opt.acpt.cnt; i++) {
+		connection::accept(ring, opt);
+	}
 
 	int reset = 1;       // Counter to print stats once in a while
@@ -441,5 +465,6 @@
 	while(!done) {
 		// Submit all the answers we have and wait for responses
-		int ret = io_uring_submit_and_wait(ring, 1);
+		int ret = io_uring_submit(ring);
+		local.to_submit = 0;
 
 		// check errors
@@ -452,4 +477,14 @@
 		sqes += ret;
 		call++;
+
+
+		eventfd_t val;
+		ret = eventfd_read(blockfd, &val);
+
+		// check errors
+		if (ret < 0) {
+			fprintf( stderr, "eventfd read error: (%d) %s\n", (int)errno, strerror(errno) );
+			exit(EXIT_FAILURE);
+		}
 
 		struct io_uring_cqe *cqe;
@@ -463,4 +498,6 @@
 				break;
 			}
+
+			if(local.to_submit > 30) break;
 
 			auto req = (class connection *)cqe->user_data;
@@ -509,5 +546,4 @@
 	#include <pthread.h>      // for pthreads
 	#include <signal.h>       // for signal(SIGPIPE, SIG_IGN);
-	#include <sys/eventfd.h>  // use for termination
 	#include <sys/socket.h>   // for sockets in general
 	#include <netinet/in.h>   // for sockaddr_in, AF_INET
@@ -528,4 +564,5 @@
 	unsigned entries = 256;     // number of entries per ring/kernel thread
 	unsigned backlog = 262144;  // backlog argument to listen
+	unsigned preaccept = 1;     // start by accepting X per threads
 	bool attach = false;        // Whether or not to attach all the rings
 	bool sqpoll = false;        // Whether or not to use SQ Polling
@@ -534,5 +571,5 @@
 	// Arguments Parsing
 	int c;
-	while ((c = getopt (argc, argv, "t:p:e:b:aS")) != -1) {
+	while ((c = getopt (argc, argv, "t:p:e:b:c:aS")) != -1) {
 		switch (c)
 		{
@@ -548,4 +585,7 @@
 		case 'b':
 			backlog = atoi(optarg);
+			break;
+		case 'c':
+			preaccept = atoi(optarg);
 			break;
 		case 'a':
@@ -681,4 +721,5 @@
 		thrd_opts[i].acpt.addrlen = (socklen_t*)&addrlen;
 		thrd_opts[i].acpt.flags   = 0;
+		thrd_opts[i].acpt.cnt     = preaccept;
 		thrd_opts[i].endfd        = efd;
 		thrd_opts[i].ring         = &thrd_rings[i].storage;
Index: benchmark/io/http/main.cfa
===================================================================
--- benchmark/io/http/main.cfa	(revision 182256b4aa148c77a64e9731dedbd8a503973f2f)
+++ benchmark/io/http/main.cfa	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -29,28 +29,4 @@
 
 //=============================================================================================
-// Globals
-//=============================================================================================
-struct ServerProc {
-	processor self;
-};
-
-void ?{}( ServerProc & this ) {
-	/* paranoid */ assert( options.clopts.instance != 0p );
-	(this.self){ "Benchmark Processor", *options.clopts.instance };
-
-	#if !defined(__CFA_NO_STATISTICS__)
-		if( options.clopts.procstats ) {
-			print_stats_at_exit( this.self, options.clopts.instance->print_stats );
-		}
-		if( options.clopts.viewhalts ) {
-			print_halts( this.self );
-		}
-	#endif
-}
-
-extern void init_protocol(void);
-extern void deinit_protocol(void);
-
-//=============================================================================================
 // Stats Printer
 //============================================================================================='
@@ -58,7 +34,9 @@
 thread StatsPrinter {};
 
-void ?{}( StatsPrinter & this ) {
-	((thread&)this){ "Stats Printer Thread" };
-}
+void ?{}( StatsPrinter & this, cluster & cl ) {
+	((thread&)this){ "Stats Printer Thread", cl };
+}
+
+void ^?{}( StatsPrinter & mutex this ) {}
 
 void main(StatsPrinter & this) {
@@ -71,7 +49,64 @@
 		sleep(10`s);
 
-		print_stats_now( *options.clopts.instance, CFA_STATS_READY_Q | CFA_STATS_IO );
-	}
-}
+		print_stats_now( *active_cluster(), CFA_STATS_READY_Q | CFA_STATS_IO );
+	}
+}
+
+//=============================================================================================
+// Globals
+//=============================================================================================
+struct ServerCluster {
+	cluster self;
+	processor    * procs;
+	// io_context   * ctxs;
+	StatsPrinter * prnt;
+
+};
+
+void ?{}( ServerCluster & this ) {
+	(this.self){ "Server Cluster", options.clopts.params };
+
+	this.procs = alloc(options.clopts.nprocs);
+	for(i; options.clopts.nprocs) {
+		(this.procs[i]){ "Benchmark Processor", this.self };
+
+		#if !defined(__CFA_NO_STATISTICS__)
+			if( options.clopts.procstats ) {
+				print_stats_at_exit( *this.procs, this.self.print_stats );
+			}
+			if( options.clopts.viewhalts ) {
+				print_halts( *this.procs );
+			}
+		#endif
+	}
+
+	if(options.stats) {
+		this.prnt = alloc();
+		(*this.prnt){ this.self };
+	} else {
+		this.prnt = 0p;
+	}
+
+	#if !defined(__CFA_NO_STATISTICS__)
+		print_stats_at_exit( this.self, CFA_STATS_READY_Q | CFA_STATS_IO );
+	#endif
+
+	options.clopts.instance[options.clopts.cltr_cnt] = &this.self;
+	options.clopts.cltr_cnt++;
+}
+
+void ^?{}( ServerCluster & this ) {
+	delete(this.prnt);
+
+	for(i; options.clopts.nprocs) {
+		^(this.procs[i]){};
+	}
+	free(this.procs);
+
+	^(this.self){};
+}
+
+extern void init_protocol(void);
+extern void deinit_protocol(void);
 
 //=============================================================================================
@@ -137,11 +172,4 @@
 	// Run Server Cluster
 	{
-		cluster cl = { "Server Cluster", options.clopts.params };
-		#if !defined(__CFA_NO_STATISTICS__)
-			print_stats_at_exit( cl, CFA_STATS_READY_Q | CFA_STATS_IO );
-		#endif
-		options.clopts.instance = &cl;
-
-
 		int pipe_cnt = options.clopts.nworkers * 2;
 		int pipe_off;
@@ -153,11 +181,10 @@
 		}
 
-		if(options.file_cache.path && options.file_cache.fixed_fds) {
-			register_fixed_files(cl, fds, pipe_off);
-		}
+		// if(options.file_cache.path && options.file_cache.fixed_fds) {
+		// 	register_fixed_files(cl, fds, pipe_off);
+		// }
 
 		{
-			ServerProc procs[options.clopts.nprocs];
-			StatsPrinter printer;
+			ServerCluster cl[options.clopts.nclusters];
 
 			init_protocol();
@@ -180,9 +207,18 @@
 					unpark( workers[i] );
 				}
-				sout | options.clopts.nworkers | "workers started on" | options.clopts.nprocs | "processors";
+				sout | options.clopts.nworkers | "workers started on" | options.clopts.nprocs | "processors /" | options.clopts.nclusters | "clusters";
+				for(i; options.clopts.nclusters) {
+					sout | options.clopts.thrd_cnt[i] | nonl;
+				}
+				sout | nl;
 				{
 					char buffer[128];
-					while(int ret = cfa_read(0, buffer, 128, 0, -1`s, 0p, 0p); ret != 0) {
+					for() {
+						int ret = cfa_read(0, buffer, 128, 0);
+						if(ret == 0) break;
 						if(ret < 0) abort( "main read error: (%d) %s\n", (int)errno, strerror(errno) );
+						sout | "User wrote '" | "" | nonl;
+						write(sout, buffer, ret - 1);
+						sout | "'";
 					}
 
@@ -193,5 +229,4 @@
 				for(i; options.clopts.nworkers) {
 					workers[i].done = true;
-					cancel(workers[i].cancel);
 				}
 				sout | "done";
@@ -221,5 +256,5 @@
 			sout | "done";
 
-			sout | "Stopping processors..." | nonl; flush( sout );
+			sout | "Stopping processors/clusters..." | nonl; flush( sout );
 		}
 		sout | "done";
Index: benchmark/io/http/options.cfa
===================================================================
--- benchmark/io/http/options.cfa	(revision 182256b4aa148c77a64e9731dedbd8a503973f2f)
+++ benchmark/io/http/options.cfa	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -13,4 +13,5 @@
 #include <kernel.hfa>
 #include <parseargs.hfa>
+#include <stdlib.hfa>
 
 #include <stdlib.h>
@@ -19,4 +20,5 @@
 Options options @= {
 	false, // log
+	false, // stats
 
 	{ // file_cache
@@ -36,4 +38,5 @@
 
 	{ // cluster
+		1,     // nclusters;
 		1,     // nprocs;
 		1,     // nworkers;
@@ -46,11 +49,9 @@
 
 void parse_options( int argc, char * argv[] ) {
-	bool subthrd = false;
-	bool eagrsub = false;
-	bool fixedfd = false;
-	bool sqkpoll = false;
-	bool iokpoll = false;
-	unsigned sublen = 16;
+	// bool fixedfd = false;
+	// bool sqkpoll = false;
+	// bool iokpoll = false;
 	unsigned nentries = 16;
+	bool isolate = false;
 
 
@@ -59,5 +60,7 @@
 		{ 'c', "cpus",           "Number of processors to use", options.clopts.nprocs},
 		{ 't', "threads",        "Number of worker threads to use", options.clopts.nworkers},
+		{'\0', "isolate",        "Create one cluster per processor", isolate, parse_settrue},
 		{'\0', "log",            "Enable logs", options.log, parse_settrue},
+		{'\0', "stats",          "Enable statistics", options.stats, parse_settrue},
 		{'\0', "accept-backlog", "Maximum number of pending accepts", options.socket.backlog},
 		{'\0', "request_len",    "Maximum number of bytes in the http request, requests with more data will be answered with Http Code 414", options.socket.buflen},
@@ -65,11 +68,8 @@
 		{'\0', "cache-size",     "Size of the cache to use, if set to small, will uses closes power of 2", options.file_cache.size },
 		{'\0', "list-files",     "List the files in the specified path and exit", options.file_cache.list, parse_settrue },
-		{ 's', "submitthread",   "If set, cluster uses polling thread to submit I/O", subthrd, parse_settrue },
-		{ 'e', "eagersubmit",    "If set, cluster submits I/O eagerly but still aggregates submits", eagrsub, parse_settrue},
-		{ 'f', "fixed-fds",      "If set, files are open eagerly and pre-registered with the cluster", fixedfd, parse_settrue},
-		{ 'k', "kpollsubmit",    "If set, cluster uses IORING_SETUP_SQPOLL, implies -f", sqkpoll, parse_settrue },
-		{ 'i', "kpollcomplete",  "If set, cluster uses IORING_SETUP_IOPOLL", iokpoll, parse_settrue },
-		{'\0', "submitlength",   "Max number of submitions that can be submitted together", sublen },
-		{'\0', "numentries",     "Number of I/O entries", nentries },
+		// { 'f', "fixed-fds",      "If set, files are open eagerly and pre-registered with the cluster", fixedfd, parse_settrue},
+		// { 'k', "kpollsubmit",    "If set, cluster uses IORING_SETUP_SQPOLL, implies -f", sqkpoll, parse_settrue },
+		// { 'i', "kpollcomplete",  "If set, cluster uses IORING_SETUP_IOPOLL", iokpoll, parse_settrue },
+		{'e', "numentries",     "Number of I/O entries", nentries },
 
 	};
@@ -91,24 +91,28 @@
 		nentries = v;
 	}
+	if(isolate) {
+		options.clopts.nclusters = options.clopts.nprocs;
+		options.clopts.nprocs = 1;
+	}
 	options.clopts.params.num_entries = nentries;
-
-	options.clopts.params.poller_submits = subthrd;
-	options.clopts.params.eager_submits  = eagrsub;
-
-	if( fixedfd ) {
-		options.file_cache.fixed_fds = true;
+	options.clopts.instance = alloc(options.clopts.nclusters);
+	options.clopts.thrd_cnt = alloc(options.clopts.nclusters);
+	options.clopts.cltr_cnt = 0;
+	for(i; options.clopts.nclusters) {
+		options.clopts.thrd_cnt[i] = 0;
 	}
 
-	if( sqkpoll ) {
-		options.clopts.params.poll_submit = true;
-		options.file_cache.fixed_fds = true;
-	}
 
-	if( iokpoll ) {
-		options.clopts.params.poll_complete = true;
-		options.file_cache.open_flags |= O_DIRECT;
-	}
+	// if( fixedfd ) {
+	// 	options.file_cache.fixed_fds = true;
+	// }
 
-	options.clopts.params.num_ready = sublen;
+	// if( sqkpoll ) {
+	// 	options.file_cache.fixed_fds = true;
+	// }
+
+	// if( iokpoll ) {
+	// 	options.file_cache.open_flags |= O_DIRECT;
+	// }
 
 	if( left[0] == 0p ) { return; }
Index: benchmark/io/http/options.hfa
===================================================================
--- benchmark/io/http/options.hfa	(revision 182256b4aa148c77a64e9731dedbd8a503973f2f)
+++ benchmark/io/http/options.hfa	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -9,4 +9,5 @@
 struct Options {
 	bool log;
+	bool stats;
 
 	struct {
@@ -26,4 +27,5 @@
 
 	struct {
+		int nclusters;
 		int nprocs;
 		int nworkers;
@@ -31,5 +33,7 @@
 		bool procstats;
 		bool viewhalts;
-		cluster * instance;
+		cluster ** instance;
+		size_t   * thrd_cnt;
+		size_t     cltr_cnt;
 	} clopts;
 };
Index: benchmark/io/http/protocol.cfa
===================================================================
--- benchmark/io/http/protocol.cfa	(revision 182256b4aa148c77a64e9731dedbd8a503973f2f)
+++ benchmark/io/http/protocol.cfa	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -20,19 +20,18 @@
 #include "options.hfa"
 
-const char * volatile date = 0p;
-
-const char * http_msgs[] = {
-	"HTTP/1.1 200 OK\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: %zu \n\n",
-	"HTTP/1.1 400 Bad Request\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
-	"HTTP/1.1 404 Not Found\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
-	"HTTP/1.1 405 Method Not Allowed\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
-	"HTTP/1.1 408 Request Timeout\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
-	"HTTP/1.1 413 Payload Too Large\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
-	"HTTP/1.1 414 URI Too Long\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
-};
+#define PLAINTEXT_1WRITE
+#define PLAINTEXT_NOCOPY
+
+struct https_msg_str {
+	char msg[512];
+	size_t len;
+};
+
+const https_msg_str * volatile http_msgs[KNOWN_CODES] = { 0 };
 
 _Static_assert( KNOWN_CODES == (sizeof(http_msgs ) / sizeof(http_msgs [0])));
 
-const int http_codes[] = {
+const int http_codes[KNOWN_CODES] = {
+	200,
 	200,
 	400,
@@ -53,6 +52,5 @@
 	while(len > 0) {
 		// Call write
-		int ret = cfa_write(fd, it, len, 0, -1`s, 0p, 0p);
-		// int ret = write(fd, it, len);
+		int ret = cfa_send(fd, it, len, 0, CFA_IO_LAZY);
 		if( ret < 0 ) {
 			if( errno == ECONNRESET || errno == EPIPE ) return -ECONNRESET;
@@ -72,20 +70,43 @@
 	/* paranoid */ assert( code < KNOWN_CODES && code != OK200 );
 	int idx = (int)code;
-	return answer( fd, http_msgs[idx], strlen( http_msgs[idx] ) );
+	return answer( fd, http_msgs[idx]->msg, http_msgs[idx]->len );
 }
 
 int answer_header( int fd, size_t size ) {
-	const char * fmt = http_msgs[OK200];
-	int len = 200;
-	char buffer[len];
-	len = snprintf(buffer, len, fmt, date, size);
+	char buffer[512];
+	char * it = buffer;
+	memcpy(it, http_msgs[OK200]->msg, http_msgs[OK200]->len);
+	it += http_msgs[OK200]->len;
+	int len = http_msgs[OK200]->len;
+	len += snprintf(it, 512 - len, "%d \n\n", size);
 	return answer( fd, buffer, len );
 }
 
-int answer_plain( int fd, char buffer[], size_t size ) {
-	int ret = answer_header(fd, size);
+#if defined(PLAINTEXT_NOCOPY)
+int answer_plaintext( int fd ) {
+	return answer(fd, http_msgs[OK200_PlainText]->msg, http_msgs[OK200_PlainText]->len + 1); // +1 cause snprintf doesn't count nullterminator
+}
+#elif defined(PLAINTEXT_1WRITE)
+int answer_plaintext( int fd ) {
+	char text[] = "Hello, World!\n";
+	char buffer[512 + sizeof(text)];
+	char * it = buffer;
+	memcpy(it, http_msgs[OK200]->msg, http_msgs[OK200]->len);
+	it += http_msgs[OK200]->len;
+	int len = http_msgs[OK200]->len;
+	int r = snprintf(it, 512 - len, "%d \n\n", sizeof(text));
+	it += r;
+	len += r;
+	memcpy(it, text, sizeof(text));
+	return answer(fd, buffer, len + sizeof(text));
+}
+#else
+int answer_plaintext( int fd ) {
+	char text[] = "Hello, World!\n";
+	int ret = answer_header(fd, sizeof(text));
 	if( ret < 0 ) return ret;
-	return answer(fd, buffer, size);
-}
+	return answer(fd, text, sizeof(text));
+}
+#endif
 
 int answer_empty( int fd ) {
@@ -94,5 +115,5 @@
 
 
-[HttpCode code, bool closed, * const char file, size_t len] http_read(int fd, []char buffer, size_t len, io_cancellation * cancel) {
+[HttpCode code, bool closed, * const char file, size_t len] http_read(int fd, []char buffer, size_t len) {
 	char * it = buffer;
 	size_t count = len - 1;
@@ -100,5 +121,5 @@
 	READ:
 	for() {
-		int ret = cfa_read(fd, (void*)it, count, 0, -1`s, cancel, 0p);
+		int ret = cfa_recv(fd, (void*)it, count, 0, CFA_IO_LAZY);
 		// int ret = read(fd, (void*)it, count);
 		if(ret == 0 ) return [OK200, true, 0, 0];
@@ -139,6 +160,5 @@
 	ssize_t ret;
 	SPLICE1: while(count > 0) {
-		ret = cfa_splice(ans_fd, &offset, pipe[1], 0p, count, sflags, 0, -1`s, 0p, 0p);
-		// ret = splice(ans_fd, &offset, pipe[1], 0p, count, sflags);
+		ret = cfa_splice(ans_fd, &offset, pipe[1], 0p, count, sflags, CFA_IO_LAZY);
 		if( ret < 0 ) {
 			if( errno != EAGAIN && errno != EWOULDBLOCK) continue SPLICE1;
@@ -152,6 +172,5 @@
 		size_t in_pipe = ret;
 		SPLICE2: while(in_pipe > 0) {
-			ret = cfa_splice(pipe[0], 0p, fd, 0p, in_pipe, sflags, 0, -1`s, 0p, 0p);
-			// ret = splice(pipe[0], 0p, fd, 0p, in_pipe, sflags);
+			ret = cfa_splice(pipe[0], 0p, fd, 0p, in_pipe, sflags, CFA_IO_LAZY);
 			if( ret < 0 ) {
 				if( errno != EAGAIN && errno != EWOULDBLOCK) continue SPLICE2;
@@ -173,6 +192,17 @@
 #include <thread.hfa>
 
+const char * original_http_msgs[] = {
+	"HTTP/1.1 200 OK\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: ",
+	"HTTP/1.1 200 OK\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 15\n\nHello, World!\n",
+	"HTTP/1.1 400 Bad Request\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
+	"HTTP/1.1 404 Not Found\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
+	"HTTP/1.1 405 Method Not Allowed\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
+	"HTTP/1.1 408 Request Timeout\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
+	"HTTP/1.1 413 Payload Too Large\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
+	"HTTP/1.1 414 URI Too Long\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
+};
+
 struct date_buffer {
-	char buff[100];
+	https_msg_str strs[KNOWN_CODES];
 };
 
@@ -183,8 +213,8 @@
 
 void ?{}( DateFormater & this ) {
-	((thread&)this){ "Server Date Thread", *options.clopts.instance };
+	((thread&)this){ "Server Date Thread", *options.clopts.instance[0] };
 	this.idx = 0;
-	memset( this.buffers[0].buff, 0, sizeof(this.buffers[0]) );
-	memset( this.buffers[1].buff, 0, sizeof(this.buffers[1]) );
+	memset( &this.buffers[0], 0, sizeof(this.buffers[0]) );
+	memset( &this.buffers[1], 0, sizeof(this.buffers[1]) );
 }
 
@@ -196,11 +226,22 @@
 		or else {}
 
+
+		char buff[100];
 		Time now = getTimeNsec();
-
-		strftime( this.buffers[this.idx].buff, 100, "%a, %d %b %Y %H:%M:%S %Z", now );
-
-		char * next = this.buffers[this.idx].buff;
-		__atomic_exchange_n((char * volatile *)&date, next, __ATOMIC_SEQ_CST);
+		strftime( buff, 100, "%a, %d %b %Y %H:%M:%S %Z", now );
+		sout | "Updated date to '" | buff | "'";
+
+		for(i; KNOWN_CODES) {
+			size_t len = snprintf( this.buffers[this.idx].strs[i].msg, 512, original_http_msgs[i], buff );
+			this.buffers[this.idx].strs[i].len = len;
+		}
+
+		for(i; KNOWN_CODES) {
+			https_msg_str * next = &this.buffers[this.idx].strs[i];
+			__atomic_exchange_n((https_msg_str * volatile *)&http_msgs[i], next, __ATOMIC_SEQ_CST);
+		}
 		this.idx = (this.idx + 1) % 2;
+
+		sout | "Date thread sleeping";
 
 		sleep(1`s);
Index: benchmark/io/http/protocol.hfa
===================================================================
--- benchmark/io/http/protocol.hfa	(revision 182256b4aa148c77a64e9731dedbd8a503973f2f)
+++ benchmark/io/http/protocol.hfa	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -1,8 +1,7 @@
 #pragma once
-
-struct io_cancellation;
 
 enum HttpCode {
 	OK200 = 0,
+	OK200_PlainText,
 	E400,
 	E404,
@@ -18,8 +17,8 @@
 int answer_error( int fd, HttpCode code );
 int answer_header( int fd, size_t size );
-int answer_plain( int fd, char buffer [], size_t size );
+int answer_plaintext( int fd );
 int answer_empty( int fd );
 
-[HttpCode code, bool closed, * const char file, size_t len] http_read(int fd, []char buffer, size_t len, io_cancellation *);
+[HttpCode code, bool closed, * const char file, size_t len] http_read(int fd, []char buffer, size_t len);
 
 int sendfile( int pipe[2], int fd, int ans_fd, size_t count );
Index: benchmark/io/http/worker.cfa
===================================================================
--- benchmark/io/http/worker.cfa	(revision 182256b4aa148c77a64e9731dedbd8a503973f2f)
+++ benchmark/io/http/worker.cfa	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -17,5 +17,7 @@
 //=============================================================================================
 void ?{}( Worker & this ) {
-	((thread&)this){ "Server Worker Thread", *options.clopts.instance };
+	size_t cli = rand() % options.clopts.cltr_cnt;
+	((thread&)this){ "Server Worker Thread", *options.clopts.instance[cli] };
+	options.clopts.thrd_cnt[cli]++;
 	this.pipe[0] = -1;
 	this.pipe[1] = -1;
@@ -35,6 +37,5 @@
 	for() {
 		if( options.log ) sout | "=== Accepting connection ===";
-		int fd = cfa_accept4( this.[sockfd, addr, addrlen, flags], 0, -1`s, &this.cancel, 0p );
-		// int fd = accept4( this.[sockfd, addr, addrlen, flags] );
+		int fd = cfa_accept4( this.[sockfd, addr, addrlen, flags], CFA_IO_LAZY );
 		if(fd < 0) {
 			if( errno == ECONNABORTED ) break;
@@ -42,4 +43,5 @@
 			abort( "accept error: (%d) %s\n", (int)errno, strerror(errno) );
 		}
+		if(this.done) break;
 
 		if( options.log ) sout | "=== New connection" | fd | "" | ", waiting for requests ===";
@@ -55,5 +57,5 @@
 			char buffer[len];
 			if( options.log ) sout | "=== Reading request ===";
-			[code, closed, file, name_size] = http_read(fd, buffer, len, &this.cancel);
+			[code, closed, file, name_size] = http_read(fd, buffer, len);
 
 			// if we are done, break out of the loop
@@ -70,8 +72,5 @@
 				if( options.log ) sout | "=== Request for /plaintext ===";
 
-				char text[] = "Hello, World!\n";
-
-				// Send the header
-				int ret = answer_plain(fd, text, sizeof(text));
+				int ret = answer_plaintext(fd);
 				if( ret == -ECONNRESET ) break REQUEST;
 
Index: benchmark/io/http/worker.hfa
===================================================================
--- benchmark/io/http/worker.hfa	(revision 182256b4aa148c77a64e9731dedbd8a503973f2f)
+++ benchmark/io/http/worker.hfa	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -17,5 +17,4 @@
 	socklen_t * addrlen;
 	int flags;
-	io_cancellation cancel;
 	volatile bool done;
 };
Index: example/io/tty-echo.cfa
===================================================================
--- example/io/tty-echo.cfa	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
+++ example/io/tty-echo.cfa	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -0,0 +1,63 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2020 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// io.cfa --
+//
+// Author           : Thierry Delisle
+// Created On       : Thu Feb 18 20:31:00 2021
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+#include <fstream.hfa>
+#include <iofwd.hfa>
+#include <time.hfa>
+#include <thread.hfa>
+
+thread EchoThread {};
+void main(EchoThread & this) {
+	char buffer[100];
+	for() {
+		int ret = cfa_read(0, buffer + 1, 97, 0);
+		if(ret < 0) {
+			serr | "Failed to read from fd 0";
+			exit(1);
+		}
+		if(ret == 0) return;
+		buffer[0]     = '\'';
+		buffer[ret]   = '\'';
+		buffer[ret+1] = '\n';
+		ret = cfa_write(1, buffer, ret + 2, 0);
+		if(ret < 0) {
+			serr | "Failed to write from fd 1 (ET)";
+			exit(1);
+		}
+	}
+}
+
+thread HeartBeatThread {};
+void main(HeartBeatThread & this) {
+	for() {
+		waitfor( ^?{} : this) {
+			return;
+		}
+		or else;
+		int ret = cfa_write(1, ".", 1, 0);
+		if(ret < 0) {
+			serr | "Failed to write from fd 1 (ET)";
+			exit(1);
+		}
+		sleep(5`s);
+	}
+}
+
+int main() {
+	HeartBeatThread hb;
+	{
+		EchoThread et;
+	}
+}
Index: libcfa/configure.ac
===================================================================
--- libcfa/configure.ac	(revision 182256b4aa148c77a64e9731dedbd8a503973f2f)
+++ libcfa/configure.ac	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -169,7 +169,8 @@
 AH_TEMPLATE([CFA_HAVE_IOSQE_FIXED_FILE],[Defined if io_uring support is present when compiling libcfathread and supports the flag FIXED_FILE.])
 AH_TEMPLATE([CFA_HAVE_IOSQE_IO_DRAIN],[Defined if io_uring support is present when compiling libcfathread and supports the flag IO_DRAIN.])
-AH_TEMPLATE([CFA_HAVE_IOSQE_ASYNC],[Defined if io_uring support is present when compiling libcfathread and supports the flag ASYNC.])
 AH_TEMPLATE([CFA_HAVE_IOSQE_IO_LINK],[Defined if io_uring support is present when compiling libcfathread and supports the flag IO_LINK.])
 AH_TEMPLATE([CFA_HAVE_IOSQE_IO_HARDLINK],[Defined if io_uring support is present when compiling libcfathread and supports the flag IO_HARDLINK.])
+AH_TEMPLATE([CFA_HAVE_IOSQE_ASYNC],[Defined if io_uring support is present when compiling libcfathread and supports the flag ASYNC.])
+AH_TEMPLATE([CFA_HAVE_IOSQE_BUFFER_SELECT],[Defined if io_uring support is present when compiling libcfathread and supports the flag BUFFER_SELEC.])
 AH_TEMPLATE([CFA_HAVE_SPLICE_F_FD_IN_FIXED],[Defined if io_uring support is present when compiling libcfathread and supports the flag SPLICE_F_FD_IN_FIXED.])
 AH_TEMPLATE([CFA_HAVE_IORING_SETUP_ATTACH_WQ],[Defined if io_uring support is present when compiling libcfathread and supports the flag IORING_SETUP_ATTACH_WQ.])
@@ -182,5 +183,5 @@
 
 define(ioring_ops, [IORING_OP_NOP,IORING_OP_READV,IORING_OP_WRITEV,IORING_OP_FSYNC,IORING_OP_READ_FIXED,IORING_OP_WRITE_FIXED,IORING_OP_POLL_ADD,IORING_OP_POLL_REMOVE,IORING_OP_SYNC_FILE_RANGE,IORING_OP_SENDMSG,IORING_OP_RECVMSG,IORING_OP_TIMEOUT,IORING_OP_TIMEOUT_REMOVE,IORING_OP_ACCEPT,IORING_OP_ASYNC_CANCEL,IORING_OP_LINK_TIMEOUT,IORING_OP_CONNECT,IORING_OP_FALLOCATE,IORING_OP_OPENAT,IORING_OP_CLOSE,IORING_OP_FILES_UPDATE,IORING_OP_STATX,IORING_OP_READ,IORING_OP_WRITE,IORING_OP_FADVISE,IORING_OP_MADVISE,IORING_OP_SEND,IORING_OP_RECV,IORING_OP_OPENAT2,IORING_OP_EPOLL_CTL,IORING_OP_SPLICE,IORING_OP_PROVIDE_BUFFERS,IORING_OP_REMOVE_BUFFER,IORING_OP_TEE])
-define(ioring_flags, [IOSQE_FIXED_FILE,IOSQE_IO_DRAIN,IOSQE_ASYNC,IOSQE_IO_LINK,IOSQE_IO_HARDLINK,SPLICE_F_FD_IN_FIXED,IORING_SETUP_ATTACH_WQ])
+define(ioring_flags, [IOSQE_FIXED_FILE,IOSQE_IO_DRAIN,IOSQE_IO_LINK,IOSQE_IO_HARDLINK,IOSQE_ASYNC,IOSQE_BUFFER_SELECT,SPLICE_F_FD_IN_FIXED,IORING_SETUP_ATTACH_WQ])
 
 define(ioring_from_decls, [
Index: libcfa/prelude/defines.hfa.in
===================================================================
--- libcfa/prelude/defines.hfa.in	(revision 182256b4aa148c77a64e9731dedbd8a503973f2f)
+++ libcfa/prelude/defines.hfa.in	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -149,4 +149,8 @@
 
 /* Defined if io_uring support is present when compiling libcfathread and
+   supports the flag BUFFER_SELEC. */
+#undef CFA_HAVE_IOSQE_BUFFER_SELECT
+
+/* Defined if io_uring support is present when compiling libcfathread and
    supports the flag FIXED_FILE. */
 #undef CFA_HAVE_IOSQE_FIXED_FILE
Index: libcfa/src/bits/defs.hfa
===================================================================
--- libcfa/src/bits/defs.hfa	(revision 182256b4aa148c77a64e9731dedbd8a503973f2f)
+++ libcfa/src/bits/defs.hfa	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -74,2 +74,4 @@
 	#error unsupported architecture
 #endif
+
+#define CFA_IO_LAZY (1_l64u << 32_l64u)
Index: libcfa/src/concurrency/io.cfa
===================================================================
--- libcfa/src/concurrency/io.cfa	(revision 182256b4aa148c77a64e9731dedbd8a503973f2f)
+++ libcfa/src/concurrency/io.cfa	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -32,4 +32,5 @@
 	extern "C" {
 		#include <sys/syscall.h>
+		#include <sys/eventfd.h>
 
 		#include <linux/io_uring.h>
@@ -79,248 +80,92 @@
 	};
 
-	// returns true of acquired as leader or second leader
-	static inline bool try_lock( __leaderlock_t & this ) {
-		const uintptr_t thrd = 1z | (uintptr_t)active_thread();
-		bool block;
-		disable_interrupts();
-		for() {
-			struct $thread * expected = this.value;
-			if( 1p != expected && 0p != expected ) {
-				/* paranoid */ verify( thrd != (uintptr_t)expected ); // We better not already be the next leader
-				enable_interrupts( __cfaabi_dbg_ctx );
-				return false;
-			}
-			struct $thread * desired;
-			if( 0p == expected ) {
-				// If the lock isn't locked acquire it, no need to block
-				desired = 1p;
-				block = false;
-			}
-			else {
-				// If the lock is already locked try becomming the next leader
-				desired = (struct $thread *)thrd;
-				block = true;
-			}
-			if( __atomic_compare_exchange_n(&this.value, &expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) ) break;
-		}
-		if( block ) {
-			enable_interrupts( __cfaabi_dbg_ctx );
-			park();
-			disable_interrupts();
-		}
-		return true;
-	}
-
-	static inline bool next( __leaderlock_t & this ) {
+	static $io_context * __ioarbiter_allocate( $io_arbiter & mutex this, processor *, __u32 idxs[], __u32 want );
+	static void __ioarbiter_submit( $io_arbiter & mutex this, $io_context * , __u32 idxs[], __u32 have, bool lazy );
+	static void __ioarbiter_flush ( $io_arbiter & mutex this, $io_context * );
+	static inline void __ioarbiter_notify( $io_context & ctx );
+//=============================================================================================
+// I/O Polling
+//=============================================================================================
+	static inline unsigned __flush( struct $io_context & );
+	static inline __u32 __release_sqes( struct $io_context & );
+
+	void __cfa_io_drain( processor * proc ) {
 		/* paranoid */ verify( ! __preemption_enabled() );
-		struct $thread * nextt;
-		for() {
-			struct $thread * expected = this.value;
-			/* paranoid */ verify( (1 & (uintptr_t)expected) == 1 ); // The lock better be locked
-
-			struct $thread * desired;
-			if( 1p == expected ) {
-				// No next leader, just unlock
-				desired = 0p;
-				nextt   = 0p;
-			}
-			else {
-				// There is a next leader, remove but keep locked
-				desired = 1p;
-				nextt   = (struct $thread *)(~1z & (uintptr_t)expected);
-			}
-			if( __atomic_compare_exchange_n(&this.value, &expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) ) break;
-		}
-
-		if(nextt) {
-			unpark( nextt );
-			enable_interrupts( __cfaabi_dbg_ctx );
-			return true;
-		}
-		enable_interrupts( __cfaabi_dbg_ctx );
-		return false;
-	}
-
-//=============================================================================================
-// I/O Syscall
-//=============================================================================================
-	static int __io_uring_enter( struct __io_data & ring, unsigned to_submit, bool get ) {
-		bool need_sys_to_submit = false;
-		bool need_sys_to_complete = false;
-		unsigned flags = 0;
-
-		TO_SUBMIT:
-		if( to_submit > 0 ) {
-			if( !(ring.ring_flags & IORING_SETUP_SQPOLL) ) {
-				need_sys_to_submit = true;
-				break TO_SUBMIT;
-			}
-			if( (*ring.submit_q.flags) & IORING_SQ_NEED_WAKEUP ) {
-				need_sys_to_submit = true;
-				flags |= IORING_ENTER_SQ_WAKEUP;
-			}
-		}
-
-		if( get && !(ring.ring_flags & IORING_SETUP_SQPOLL) ) {
-			flags |= IORING_ENTER_GETEVENTS;
-			if( (ring.ring_flags & IORING_SETUP_IOPOLL) ) {
-				need_sys_to_complete = true;
-			}
-		}
-
-		int ret = 0;
-		if( need_sys_to_submit || need_sys_to_complete ) {
-			__cfadbg_print_safe(io_core, "Kernel I/O : IO_URING enter %d %u %u\n", ring.fd, to_submit, flags);
-			ret = syscall( __NR_io_uring_enter, ring.fd, to_submit, 0, flags, (sigset_t *)0p, _NSIG / 8);
-			__cfadbg_print_safe(io_core, "Kernel I/O : IO_URING %d returned %d\n", ring.fd, ret);
-
-			if( ret < 0 ) {
-				switch((int)errno) {
-				case EAGAIN:
-				case EINTR:
-				case EBUSY:
-					ret = -1;
-					break;
-				default:
-					abort( "KERNEL ERROR: IO_URING SYSCALL - (%d) %s\n", (int)errno, strerror(errno) );
-				}
-			}
-		}
-
-		// Memory barrier
-		__atomic_thread_fence( __ATOMIC_SEQ_CST );
-		return ret;
-	}
-
-//=============================================================================================
-// I/O Polling
-//=============================================================================================
-	static unsigned __collect_submitions( struct __io_data & ring );
-	static __u32 __release_consumed_submission( struct __io_data & ring );
-	static inline void __clean( volatile struct io_uring_sqe * sqe );
-
-	// Process a single completion message from the io_uring
-	// This is NOT thread-safe
-	static inline void process( volatile struct io_uring_cqe & cqe ) {
-		struct io_future_t * future = (struct io_future_t *)(uintptr_t)cqe.user_data;
-		__cfadbg_print_safe( io, "Kernel I/O : Syscall completed : cqe %p, result %d for %p\n", &cqe, cqe.res, future );
-
-		fulfil( *future, cqe.res );
-	}
-
-	static [int, bool] __drain_io( & struct __io_data ring ) {
-		/* paranoid */ verify( ! __preemption_enabled() );
-
-		unsigned to_submit = 0;
-		if( ring.poller_submits ) {
-			// If the poller thread also submits, then we need to aggregate the submissions which are ready
-			to_submit = __collect_submitions( ring );
-		}
-
-		int ret = __io_uring_enter(ring, to_submit, true);
-		if( ret < 0 ) {
-			return [0, true];
-		}
-
-		// update statistics
-		if (to_submit > 0) {
-			__STATS__( true,
-				if( to_submit > 0 ) {
-					io.submit_q.submit_avg.rdy += to_submit;
-					io.submit_q.submit_avg.csm += ret;
-					io.submit_q.submit_avg.cnt += 1;
-				}
-			)
-		}
-
-		__atomic_thread_fence( __ATOMIC_SEQ_CST );
-
-		// Release the consumed SQEs
-		__release_consumed_submission( ring );
+		/* paranoid */ verify( proc );
+		/* paranoid */ verify( proc->io.ctx );
 
 		// Drain the queue
-		unsigned head = *ring.completion_q.head;
-		unsigned tail = *ring.completion_q.tail;
-		const __u32 mask = *ring.completion_q.mask;
-
-		// Nothing was new return 0
-		if (head == tail) {
-			return [0, to_submit > 0];
-		}
+		$io_context * ctx = proc->io.ctx;
+		unsigned head = *ctx->cq.head;
+		unsigned tail = *ctx->cq.tail;
+		const __u32 mask = *ctx->cq.mask;
 
 		__u32 count = tail - head;
-		/* paranoid */ verify( count != 0 );
+		__STATS__( false, io.calls.drain++; io.calls.completed += count; )
+
 		for(i; count) {
 			unsigned idx = (head + i) & mask;
-			volatile struct io_uring_cqe & cqe = ring.completion_q.cqes[idx];
+			volatile struct io_uring_cqe & cqe = ctx->cq.cqes[idx];
 
 			/* paranoid */ verify(&cqe);
 
-			process( cqe );
-		}
+			struct io_future_t * future = (struct io_future_t *)(uintptr_t)cqe.user_data;
+			__cfadbg_print_safe( io, "Kernel I/O : Syscall completed : cqe %p, result %d for %p\n", &cqe, cqe.res, future );
+
+			fulfil( *future, cqe.res );
+		}
+
+		__cfadbg_print_safe(io, "Kernel I/O : %u completed\n", count);
 
 		// Mark to the kernel that the cqe has been seen
 		// Ensure that the kernel only sees the new value of the head index after the CQEs have been read.
-		__atomic_fetch_add( ring.completion_q.head, count, __ATOMIC_SEQ_CST );
-
-		return [count, count > 0 || to_submit > 0];
-	}
-
-	void main( $io_ctx_thread & this ) {
-		__ioctx_register( this );
-
-		__cfadbg_print_safe(io_core, "Kernel I/O : IO poller %d (%p) ready\n", this.ring->fd, &this);
-
-		const int reset_cnt = 5;
-		int reset = reset_cnt;
-		// Then loop until we need to start
-		LOOP:
-		while(!__atomic_load_n(&this.done, __ATOMIC_SEQ_CST)) {
-			// Drain the io
-			int count;
-			bool again;
-			disable_interrupts();
-				[count, again] = __drain_io( *this.ring );
-
-				if(!again) reset--;
-
+		__atomic_store_n( ctx->cq.head, head + count, __ATOMIC_SEQ_CST );
+
+		/* paranoid */ verify( ! __preemption_enabled() );
+
+		return;
+	}
+
+	void __cfa_io_flush( processor * proc ) {
+		/* paranoid */ verify( ! __preemption_enabled() );
+		/* paranoid */ verify( proc );
+		/* paranoid */ verify( proc->io.ctx );
+
+		$io_context & ctx = *proc->io.ctx;
+
+		if(!ctx.ext_sq.empty) {
+			__ioarbiter_flush( *ctx.arbiter, &ctx );
+		}
+
+		__STATS__( true, io.calls.flush++; )
+		int ret = syscall( __NR_io_uring_enter, ctx.fd, ctx.sq.to_submit, 0, 0, (sigset_t *)0p, _NSIG / 8);
+		if( ret < 0 ) {
+			switch((int)errno) {
+			case EAGAIN:
+			case EINTR:
+			case EBUSY:
 				// Update statistics
-				__STATS__( true,
-					io.complete_q.completed_avg.val += count;
-					io.complete_q.completed_avg.cnt += 1;
-				)
-			enable_interrupts( __cfaabi_dbg_ctx );
-
-			// If we got something, just yield and check again
-			if(reset > 1) {
-				yield();
-				continue LOOP;
+				__STATS__( false, io.calls.errors.busy ++; )
+				return;
+			default:
+				abort( "KERNEL ERROR: IO_URING SYSCALL - (%d) %s\n", (int)errno, strerror(errno) );
 			}
-
-			// We alread failed to find completed entries a few time.
-			if(reset == 1) {
-				// Rearm the context so it can block
-				// but don't block right away
-				// we need to retry one last time in case
-				// something completed *just now*
-				__ioctx_prepare_block( this );
-				continue LOOP;
-			}
-
-				__STATS__( false,
-					io.complete_q.blocks += 1;
-				)
-				__cfadbg_print_safe(io_core, "Kernel I/O : Parking io poller %d (%p)\n", this.ring->fd, &this);
-
-				// block this thread
-				wait( this.sem );
-
-			// restore counter
-			reset = reset_cnt;
-		}
-
-		__cfadbg_print_safe(io_core, "Kernel I/O : Fast poller %d (%p) stopping\n", this.ring->fd, &this);
-
-		__ioctx_unregister( this );
+		}
+
+		__cfadbg_print_safe(io, "Kernel I/O : %u submitted to io_uring %d\n", ret, ctx.fd);
+		__STATS__( true, io.calls.submitted += ret; )
+		/* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num );
+		/* paranoid */ verify( ctx.sq.to_submit >= ret );
+
+		ctx.sq.to_submit -= ret;
+
+		/* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num );
+
+		// Release the consumed SQEs
+		__release_sqes( ctx );
+
+		/* paranoid */ verify( ! __preemption_enabled() );
+
+		ctx.proc->io.pending = false;
 	}
 
@@ -344,4 +189,38 @@
 //         head and tail must be fully filled and shouldn't ever be touched again.
 //
+	//=============================================================================================
+	// Allocation
+	// for user's convenience fill the sqes from the indexes
+	static inline void __fill(struct io_uring_sqe * out_sqes[], __u32 want, __u32 idxs[], struct $io_context * ctx)  {
+		struct io_uring_sqe * sqes = ctx->sq.sqes;
+		for(i; want) {
+			__cfadbg_print_safe(io, "Kernel I/O : filling loop\n");
+			out_sqes[i] = &sqes[idxs[i]];
+		}
+	}
+
+	// Try to directly allocate from the a given context
+	// Not thread-safe
+	static inline bool __alloc(struct $io_context * ctx, __u32 idxs[], __u32 want) {
+		__sub_ring_t & sq = ctx->sq;
+		const __u32 mask  = *sq.mask;
+		__u32 fhead = sq.free_ring.head;    // get the current head of the queue
+		__u32 ftail = sq.free_ring.tail;    // get the current tail of the queue
+
+		// If we don't have enough sqes, fail
+		if((ftail - fhead) < want) { return false; }
+
+		// copy all the indexes we want from the available list
+		for(i; want) {
+			__cfadbg_print_safe(io, "Kernel I/O : allocating loop\n");
+			idxs[i] = sq.free_ring.array[(fhead + i) & mask];
+		}
+
+		// Advance the head to mark the indexes as consumed
+		__atomic_store_n(&sq.free_ring.head, fhead + want, __ATOMIC_RELEASE);
+
+		// return success
+		return true;
+	}
 
 	// Allocate an submit queue entry.
@@ -350,370 +229,118 @@
 	// for convenience, return both the index and the pointer to the sqe
 	// sqe == &sqes[idx]
-	[* volatile struct io_uring_sqe, __u32] __submit_alloc( struct __io_data & ring, __u64 data ) {
-		/* paranoid */ verify( data != 0 );
-
-		// Prepare the data we need
-		__attribute((unused)) int len   = 0;
-		__attribute((unused)) int block = 0;
-		__u32 cnt = *ring.submit_q.num;
-		__u32 mask = *ring.submit_q.mask;
-
-		__u32 off = thread_rand();
-
-		// Loop around looking for an available spot
-		for() {
-			// Look through the list starting at some offset
-			for(i; cnt) {
-				__u64 expected = 3;
-				__u32 idx = (i + off) & mask; // Get an index from a random
-				volatile struct io_uring_sqe * sqe = &ring.submit_q.sqes[idx];
-				volatile __u64 * udata = &sqe->user_data;
-
-				// Allocate the entry by CASing the user_data field from 0 to the future address
-				if( *udata == expected &&
-					__atomic_compare_exchange_n( udata, &expected, data, true, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED ) )
-				{
-					// update statistics
-					__STATS__( false,
-						io.submit_q.alloc_avg.val   += len;
-						io.submit_q.alloc_avg.block += block;
-						io.submit_q.alloc_avg.cnt   += 1;
-					)
-
-					// debug log
-					__cfadbg_print_safe( io, "Kernel I/O : allocated [%p, %u] for %p (%p)\n", sqe, idx, active_thread(), (void*)data );
-
-					// Success return the data
-					return [sqe, idx];
-				}
-				verify(expected != data);
-
-				// This one was used
-				len ++;
-			}
-
-			block++;
-
-			yield();
-		}
-	}
-
-	static inline __u32 __submit_to_ready_array( struct __io_data & ring, __u32 idx, const __u32 mask ) {
-		/* paranoid */ verify( idx <= mask   );
-		/* paranoid */ verify( idx != -1ul32 );
-
-		// We need to find a spot in the ready array
-		__attribute((unused)) int len   = 0;
-		__attribute((unused)) int block = 0;
-		__u32 ready_mask = ring.submit_q.ready_cnt - 1;
-
-		__u32 off = thread_rand();
-
-		__u32 picked;
-		LOOKING: for() {
-			for(i; ring.submit_q.ready_cnt) {
-				picked = (i + off) & ready_mask;
-				__u32 expected = -1ul32;
-				if( __atomic_compare_exchange_n( &ring.submit_q.ready[picked], &expected, idx, true, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED ) ) {
-					break LOOKING;
-				}
-				verify(expected != idx);
-
-				len ++;
-			}
-
-			block++;
-
-			__u32 released = __release_consumed_submission( ring );
-			if( released == 0 ) {
-				yield();
-			}
-		}
-
-		// update statistics
-		__STATS__( false,
-			io.submit_q.look_avg.val   += len;
-			io.submit_q.look_avg.block += block;
-			io.submit_q.look_avg.cnt   += 1;
-		)
-
-		return picked;
-	}
-
-	void __submit( struct io_context * ctx, __u32 idx ) __attribute__((nonnull (1))) {
-		__io_data & ring = *ctx->thrd.ring;
-
+	struct $io_context * cfa_io_allocate(struct io_uring_sqe * sqes[], __u32 idxs[], __u32 want) {
+		__cfadbg_print_safe(io, "Kernel I/O : attempting to allocate %u\n", want);
+
+		disable_interrupts();
+		processor * proc = __cfaabi_tls.this_processor;
+		$io_context * ctx = proc->io.ctx;
+		/* paranoid */ verify( __cfaabi_tls.this_processor );
+		/* paranoid */ verify( ctx );
+
+		__cfadbg_print_safe(io, "Kernel I/O : attempting to fast allocation\n");
+
+		// We can proceed to the fast path
+		if( __alloc(ctx, idxs, want) ) {
+			// Allocation was successful
+			__STATS__( true, io.alloc.fast += 1; )
+			enable_interrupts( __cfaabi_dbg_ctx );
+
+			__cfadbg_print_safe(io, "Kernel I/O : fast allocation successful from ring %d\n", ctx->fd);
+
+			__fill( sqes, want, idxs, ctx );
+			return ctx;
+		}
+		// The fast path failed, fallback
+		__STATS__( true, io.alloc.fail += 1; )
+
+		// Fast path failed, fallback on arbitration
+		__STATS__( true, io.alloc.slow += 1; )
+		enable_interrupts( __cfaabi_dbg_ctx );
+
+		$io_arbiter * ioarb = proc->cltr->io.arbiter;
+		/* paranoid */ verify( ioarb );
+
+		__cfadbg_print_safe(io, "Kernel I/O : falling back on arbiter for allocation\n");
+
+		struct $io_context * ret = __ioarbiter_allocate(*ioarb, proc, idxs, want);
+
+		__cfadbg_print_safe(io, "Kernel I/O : slow allocation completed from ring %d\n", ret->fd);
+
+		__fill( sqes, want, idxs,ret );
+		return ret;
+	}
+
+
+	//=============================================================================================
+	// submission
+	static inline void __submit( struct $io_context * ctx, __u32 idxs[], __u32 have, bool lazy) {
+		// We can proceed to the fast path
+		// Get the right objects
+		__sub_ring_t & sq = ctx->sq;
+		const __u32 mask  = *sq.mask;
+		__u32 tail = *sq.kring.tail;
+
+		// Add the sqes to the array
+		for( i; have ) {
+			__cfadbg_print_safe(io, "Kernel I/O : __submit loop\n");
+			sq.kring.array[ (tail + i) & mask ] = idxs[i];
+		}
+
+		// Make the sqes visible to the submitter
+		__atomic_store_n(sq.kring.tail, tail + have, __ATOMIC_RELEASE);
+		sq.to_submit++;
+
+		ctx->proc->io.pending = true;
+		ctx->proc->io.dirty   = true;
+		if(sq.to_submit > 30 || !lazy) {
+			__cfa_io_flush( ctx->proc );
+		}
+	}
+
+	void cfa_io_submit( struct $io_context * inctx, __u32 idxs[], __u32 have, bool lazy ) __attribute__((nonnull (1))) {
+		__cfadbg_print_safe(io, "Kernel I/O : attempting to submit %u (%s)\n", have, lazy ? "lazy" : "eager");
+
+		disable_interrupts();
+		processor * proc = __cfaabi_tls.this_processor;
+		$io_context * ctx = proc->io.ctx;
+		/* paranoid */ verify( __cfaabi_tls.this_processor );
+		/* paranoid */ verify( ctx );
+
+		// Can we proceed to the fast path
+		if( ctx == inctx )		// We have the right instance?
 		{
-			__attribute__((unused)) volatile struct io_uring_sqe * sqe = &ring.submit_q.sqes[idx];
-			__cfadbg_print_safe( io,
-				"Kernel I/O : submitting %u (%p) for %p\n"
-				"    data: %p\n"
-				"    opcode: %s\n"
-				"    fd: %d\n"
-				"    flags: %d\n"
-				"    prio: %d\n"
-				"    off: %p\n"
-				"    addr: %p\n"
-				"    len: %d\n"
-				"    other flags: %d\n"
-				"    splice fd: %d\n"
-				"    pad[0]: %llu\n"
-				"    pad[1]: %llu\n"
-				"    pad[2]: %llu\n",
-				idx, sqe,
-				active_thread(),
-				(void*)sqe->user_data,
-				opcodes[sqe->opcode],
-				sqe->fd,
-				sqe->flags,
-				sqe->ioprio,
-				(void*)sqe->off,
-				(void*)sqe->addr,
-				sqe->len,
-				sqe->accept_flags,
-				sqe->splice_fd_in,
-				sqe->__pad2[0],
-				sqe->__pad2[1],
-				sqe->__pad2[2]
-			);
-		}
-
-
-		// Get now the data we definetely need
-		volatile __u32 * const tail = ring.submit_q.tail;
-		const __u32 mask  = *ring.submit_q.mask;
-
-		// There are 2 submission schemes, check which one we are using
-		if( ring.poller_submits ) {
-			// If the poller thread submits, then we just need to add this to the ready array
-			__submit_to_ready_array( ring, idx, mask );
-
-			post( ctx->thrd.sem );
-
-			__cfadbg_print_safe( io, "Kernel I/O : Added %u to ready for %p\n", idx, active_thread() );
-		}
-		else if( ring.eager_submits ) {
-			__attribute__((unused)) __u32 picked = __submit_to_ready_array( ring, idx, mask );
-
-			#if defined(LEADER_LOCK)
-				if( !try_lock(ring.submit_q.submit_lock) ) {
-					__STATS__( false,
-						io.submit_q.helped += 1;
-					)
-					return;
-				}
-				/* paranoid */ verify( ! __preemption_enabled() );
-				__STATS__( true,
-					io.submit_q.leader += 1;
-				)
-			#else
-				for() {
-					yield();
-
-					if( try_lock(ring.submit_q.submit_lock __cfaabi_dbg_ctx2) ) {
-						__STATS__( false,
-							io.submit_q.leader += 1;
-						)
-						break;
-					}
-
-					// If some one else collected our index, we are done
-					#warning ABA problem
-					if( ring.submit_q.ready[picked] != idx ) {
-						__STATS__( false,
-							io.submit_q.helped += 1;
-						)
-						return;
-					}
-
-					__STATS__( false,
-						io.submit_q.busy += 1;
-					)
-				}
-			#endif
-
-			// We got the lock
-			// Collect the submissions
-			unsigned to_submit = __collect_submitions( ring );
-
-			// Actually submit
-			int ret = __io_uring_enter( ring, to_submit, false );
-
-			#if defined(LEADER_LOCK)
-				/* paranoid */ verify( ! __preemption_enabled() );
-				next(ring.submit_q.submit_lock);
-			#else
-				unlock(ring.submit_q.submit_lock);
-			#endif
-			if( ret < 0 ) {
-				return;
-			}
-
-			// Release the consumed SQEs
-			__release_consumed_submission( ring );
-
-			// update statistics
-			__STATS__( false,
-				io.submit_q.submit_avg.rdy += to_submit;
-				io.submit_q.submit_avg.csm += ret;
-				io.submit_q.submit_avg.cnt += 1;
-			)
-
-			__cfadbg_print_safe( io, "Kernel I/O : submitted %u (among %u) for %p\n", idx, ret, active_thread() );
-		}
-		else
-		{
-			// get mutual exclusion
-			#if defined(LEADER_LOCK)
-				while(!try_lock(ring.submit_q.submit_lock));
-			#else
-				lock(ring.submit_q.submit_lock __cfaabi_dbg_ctx2);
-			#endif
-
-			/* paranoid */ verifyf( ring.submit_q.sqes[ idx ].user_data != 3ul64,
-			/* paranoid */ 	"index %u already reclaimed\n"
-			/* paranoid */ 	"head %u, prev %u, tail %u\n"
-			/* paranoid */ 	"[-0: %u,-1: %u,-2: %u,-3: %u]\n",
-			/* paranoid */ 	idx,
-			/* paranoid */ 	*ring.submit_q.head, ring.submit_q.prev_head, *tail
-			/* paranoid */ 	,ring.submit_q.array[ ((*ring.submit_q.head) - 0) & (*ring.submit_q.mask) ]
-			/* paranoid */ 	,ring.submit_q.array[ ((*ring.submit_q.head) - 1) & (*ring.submit_q.mask) ]
-			/* paranoid */ 	,ring.submit_q.array[ ((*ring.submit_q.head) - 2) & (*ring.submit_q.mask) ]
-			/* paranoid */ 	,ring.submit_q.array[ ((*ring.submit_q.head) - 3) & (*ring.submit_q.mask) ]
-			/* paranoid */ );
-
-			// Append to the list of ready entries
-
-			/* paranoid */ verify( idx <= mask );
-			ring.submit_q.array[ (*tail) & mask ] = idx;
-			__atomic_fetch_add(tail, 1ul32, __ATOMIC_SEQ_CST);
-
-			// Submit however, many entries need to be submitted
-			int ret = __io_uring_enter( ring, 1, false );
-			if( ret < 0 ) {
-				switch((int)errno) {
-				default:
-					abort( "KERNEL ERROR: IO_URING SUBMIT - %s\n", strerror(errno) );
-				}
-			}
-
-			/* paranoid */ verify(ret == 1);
-
-			// update statistics
-			__STATS__( false,
-				io.submit_q.submit_avg.csm += 1;
-				io.submit_q.submit_avg.cnt += 1;
-			)
-
-			{
-				__attribute__((unused)) volatile __u32 * const head = ring.submit_q.head;
-				__attribute__((unused)) __u32 last_idx = ring.submit_q.array[ ((*head) - 1) & mask ];
-				__attribute__((unused)) volatile struct io_uring_sqe * sqe = &ring.submit_q.sqes[last_idx];
-
-				__cfadbg_print_safe( io,
-					"Kernel I/O : last submitted is %u (%p)\n"
-					"    data: %p\n"
-					"    opcode: %s\n"
-					"    fd: %d\n"
-					"    flags: %d\n"
-					"    prio: %d\n"
-					"    off: %p\n"
-					"    addr: %p\n"
-					"    len: %d\n"
-					"    other flags: %d\n"
-					"    splice fd: %d\n"
-					"    pad[0]: %llu\n"
-					"    pad[1]: %llu\n"
-					"    pad[2]: %llu\n",
-					last_idx, sqe,
-					(void*)sqe->user_data,
-					opcodes[sqe->opcode],
-					sqe->fd,
-					sqe->flags,
-					sqe->ioprio,
-					(void*)sqe->off,
-					(void*)sqe->addr,
-					sqe->len,
-					sqe->accept_flags,
-					sqe->splice_fd_in,
-					sqe->__pad2[0],
-					sqe->__pad2[1],
-					sqe->__pad2[2]
-				);
-			}
-
-			__atomic_thread_fence( __ATOMIC_SEQ_CST );
-			// Release the consumed SQEs
-
-			__release_consumed_submission( ring );
-			// ring.submit_q.sqes[idx].user_data = 3ul64;
-
-			#if defined(LEADER_LOCK)
-				next(ring.submit_q.submit_lock);
-			#else
-				unlock(ring.submit_q.submit_lock);
-			#endif
-
-			__cfadbg_print_safe( io, "Kernel I/O : submitted %u for %p\n", idx, active_thread() );
-		}
-	}
-
-	// #define PARTIAL_SUBMIT 32
-
-	// go through the list of submissions in the ready array and moved them into
-	// the ring's submit queue
-	static unsigned __collect_submitions( struct __io_data & ring ) {
-		/* paranoid */ verify( ring.submit_q.ready != 0p );
-		/* paranoid */ verify( ring.submit_q.ready_cnt > 0 );
-
-		unsigned to_submit = 0;
-		__u32 tail = *ring.submit_q.tail;
-		const __u32 mask = *ring.submit_q.mask;
-		#if defined(PARTIAL_SUBMIT)
-			#if defined(LEADER_LOCK)
-				#error PARTIAL_SUBMIT and LEADER_LOCK cannot co-exist
-			#endif
-			const __u32 cnt = ring.submit_q.ready_cnt > PARTIAL_SUBMIT ? PARTIAL_SUBMIT : ring.submit_q.ready_cnt;
-			const __u32 offset = ring.submit_q.prev_ready;
-			ring.submit_q.prev_ready += cnt;
-		#else
-			const __u32 cnt = ring.submit_q.ready_cnt;
-			const __u32 offset = 0;
-		#endif
-
-		// Go through the list of ready submissions
-		for( c; cnt ) {
-			__u32 i = (offset + c) % ring.submit_q.ready_cnt;
-
-			// replace any submission with the sentinel, to consume it.
-			__u32 idx = __atomic_exchange_n( &ring.submit_q.ready[i], -1ul32, __ATOMIC_RELAXED);
-
-			// If it was already the sentinel, then we are done
-			if( idx == -1ul32 ) continue;
-
-			// If we got a real submission, append it to the list
-			ring.submit_q.array[ (tail + to_submit) & mask ] = idx & mask;
-			to_submit++;
-		}
-
-		// Increment the tail based on how many we are ready to submit
-		__atomic_fetch_add(ring.submit_q.tail, to_submit, __ATOMIC_SEQ_CST);
-
-		return to_submit;
-	}
-
+			__submit(ctx, idxs, have, lazy);
+
+			// Mark the instance as no longer in-use, re-enable interrupts and return
+			__STATS__( true, io.submit.fast += 1; )
+			enable_interrupts( __cfaabi_dbg_ctx );
+
+			__cfadbg_print_safe(io, "Kernel I/O : submitted on fast path\n");
+			return;
+		}
+
+		// Fast path failed, fallback on arbitration
+		__STATS__( true, io.submit.slow += 1; )
+		enable_interrupts( __cfaabi_dbg_ctx );
+
+		__cfadbg_print_safe(io, "Kernel I/O : falling back on arbiter for submission\n");
+
+		__ioarbiter_submit(*inctx->arbiter, inctx, idxs, have, lazy);
+	}
+
+	//=============================================================================================
+	// Flushing
 	// Go through the ring's submit queue and release everything that has already been consumed
 	// by io_uring
-	static __u32 __release_consumed_submission( struct __io_data & ring ) {
-		const __u32 smask = *ring.submit_q.mask;
-
-		// We need to get the lock to copy the old head and new head
-		if( !try_lock(ring.submit_q.release_lock __cfaabi_dbg_ctx2) ) return 0;
+	// This cannot be done by multiple threads
+	static __u32 __release_sqes( struct $io_context & ctx ) {
+		const __u32 mask = *ctx.sq.mask;
+
 		__attribute__((unused))
-		__u32 ctail = *ring.submit_q.tail;        // get the current tail of the queue
-		__u32 chead = *ring.submit_q.head;		// get the current head of the queue
-		__u32 phead = ring.submit_q.prev_head;	// get the head the last time we were here
-		ring.submit_q.prev_head = chead;		// note up to were we processed
-		unlock(ring.submit_q.release_lock);
+		__u32 ctail = *ctx.sq.kring.tail;    // get the current tail of the queue
+		__u32 chead = *ctx.sq.kring.head;	 // get the current head of the queue
+		__u32 phead = ctx.sq.kring.released; // get the head the last time we were here
+
+		__u32 ftail = ctx.sq.free_ring.tail;  // get the current tail of the queue
 
 		// the 3 fields are organized like this diagram
@@ -734,28 +361,104 @@
 		__u32 count = chead - phead;
 
+		if(count == 0) {
+			return 0;
+		}
+
 		// We acquired an previous-head/current-head range
 		// go through the range and release the sqes
 		for( i; count ) {
-			__u32 idx = ring.submit_q.array[ (phead + i) & smask ];
-
-			/* paranoid */ verify( 0 != ring.submit_q.sqes[ idx ].user_data );
-			__clean( &ring.submit_q.sqes[ idx ] );
-		}
+			__cfadbg_print_safe(io, "Kernel I/O : release loop\n");
+			__u32 idx = ctx.sq.kring.array[ (phead + i) & mask ];
+			ctx.sq.free_ring.array[ (ftail + i) & mask ] = idx;
+		}
+
+		ctx.sq.kring.released = chead;		// note up to were we processed
+		__atomic_store_n(&ctx.sq.free_ring.tail, ftail + count, __ATOMIC_SEQ_CST);
+
+		__ioarbiter_notify(ctx);
+
 		return count;
 	}
 
-	void __sqe_clean( volatile struct io_uring_sqe * sqe ) {
-		__clean( sqe );
-	}
-
-	static inline void __clean( volatile struct io_uring_sqe * sqe ) {
-		// If we are in debug mode, thrash the fields to make sure we catch reclamation errors
-		__cfaabi_dbg_debug_do(
-			memset(sqe, 0xde, sizeof(*sqe));
-			sqe->opcode = (sizeof(opcodes) / sizeof(const char *)) - 1;
-		);
-
-		// Mark the entry as unused
-		__atomic_store_n(&sqe->user_data, 3ul64, __ATOMIC_SEQ_CST);
+//=============================================================================================
+// I/O Arbiter
+//=============================================================================================
+	static $io_context * __ioarbiter_allocate( $io_arbiter & mutex this, processor * proc, __u32 idxs[], __u32 want ) {
+		__cfadbg_print_safe(io, "Kernel I/O : arbiter allocating\n");
+
+		__STATS__( false, io.alloc.block += 1; )
+
+		// No one has any resources left, wait for something to finish
+		// Mark as pending
+		__atomic_store_n( &this.pending.flag, true, __ATOMIC_SEQ_CST );
+
+		// Wait for our turn to submit
+		wait( this.pending.blocked, want );
+
+		__attribute((unused)) bool ret =
+		__alloc( this.pending.ctx, idxs, want);
+		/* paranoid */ verify( ret );
+
+		return this.pending.ctx;
+
+	}
+
+	static void __ioarbiter_notify( $io_arbiter & mutex this, $io_context * ctx ) {
+		/* paranoid */ verify( !is_empty(this.pending.blocked) );
+		this.pending.ctx = ctx;
+
+		while( !is_empty(this.pending.blocked) ) {
+			__cfadbg_print_safe(io, "Kernel I/O : notifying\n");
+			__u32 have = ctx->sq.free_ring.tail - ctx->sq.free_ring.head;
+			__u32 want = front( this.pending.blocked );
+
+			if( have > want ) return;
+
+			signal_block( this.pending.blocked );
+		}
+
+		this.pending.flag = false;
+	}
+
+	static void __ioarbiter_notify( $io_context & ctx ) {
+		if(__atomic_load_n( &ctx.arbiter->pending.flag, __ATOMIC_SEQ_CST)) {
+			__ioarbiter_notify( *ctx.arbiter, &ctx );
+		}
+	}
+
+	// Simply append to the pending
+	static void __ioarbiter_submit( $io_arbiter & mutex this, $io_context * ctx, __u32 idxs[], __u32 have, bool lazy ) {
+		__cfadbg_print_safe(io, "Kernel I/O : submitting %u from the arbiter to context %u\n", have, ctx->fd);
+
+		/* paranoid */ verify( &this == ctx->arbiter );
+
+		// Mark as pending
+		__atomic_store_n( &ctx->ext_sq.empty, false, __ATOMIC_SEQ_CST );
+
+		__cfadbg_print_safe(io, "Kernel I/O : waiting to submit %u\n", have);
+
+		// Wait for our turn to submit
+		wait( ctx->ext_sq.blocked );
+
+		// Submit our indexes
+		__submit(ctx, idxs, have, lazy);
+
+		__cfadbg_print_safe(io, "Kernel I/O : %u submitted from arbiter\n", have);
+	}
+
+	static void __ioarbiter_flush( $io_arbiter & mutex this, $io_context * ctx ) {
+		/* paranoid */ verify( &this == ctx->arbiter );
+
+		__STATS__( false, io.flush.external += 1; )
+
+		__cfadbg_print_safe(io, "Kernel I/O : arbiter flushing\n");
+
+		condition & blcked = ctx->ext_sq.blocked;
+		/* paranoid */ verify( ctx->ext_sq.empty == is_empty( blcked ) );
+		while(!is_empty( blcked )) {
+			signal_block( blcked );
+		}
+
+		ctx->ext_sq.empty = true;
 	}
 #endif
Index: libcfa/src/concurrency/io/call.cfa.in
===================================================================
--- libcfa/src/concurrency/io/call.cfa.in	(revision 182256b4aa148c77a64e9731dedbd8a503973f2f)
+++ libcfa/src/concurrency/io/call.cfa.in	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -54,15 +54,15 @@
 			| IOSQE_IO_DRAIN
 		#endif
+		#if defined(CFA_HAVE_IOSQE_IO_LINK)
+			| IOSQE_IO_LINK
+		#endif
+		#if defined(CFA_HAVE_IOSQE_IO_HARDLINK)
+			| IOSQE_IO_HARDLINK
+		#endif
 		#if defined(CFA_HAVE_IOSQE_ASYNC)
 			| IOSQE_ASYNC
 		#endif
-	;
-
-	static const __u32 LINK_FLAGS = 0
-		#if defined(CFA_HAVE_IOSQE_IO_LINK)
-			| IOSQE_IO_LINK
-		#endif
-		#if defined(CFA_HAVE_IOSQE_IO_HARDLINK)
-			| IOSQE_IO_HARDLINK
+		#if defined(CFA_HAVE_IOSQE_BUFFER_SELECTED)
+			| IOSQE_BUFFER_SELECTED
 		#endif
 	;
@@ -74,16 +74,6 @@
 	;
 
-	extern [* volatile struct io_uring_sqe, __u32] __submit_alloc( struct __io_data & ring, __u64 data );
-	extern void __submit( struct io_context * ctx, __u32 idx ) __attribute__((nonnull (1)));
-
-	static inline io_context * __get_io_context( void ) {
-		cluster * cltr = active_cluster();
-
-		/* paranoid */ verifyf( cltr, "No active cluster for io operation\\n");
-		assertf( cltr->io.cnt > 0, "Cluster %p has no default io contexts and no context was specified\\n", cltr );
-
-		/* paranoid */ verifyf( cltr->io.ctxs, "default io contexts for cluster %p are missing\\n", cltr);
-		return &cltr->io.ctxs[ thread_rand() % cltr->io.cnt ];
-	}
+	extern struct $io_context * cfa_io_allocate(struct io_uring_sqe * out_sqes[], __u32 out_idxs[], __u32 want)  __attribute__((nonnull (1,2)));
+	extern void cfa_io_submit( struct $io_context * in_ctx, __u32 in_idxs[], __u32 have, bool lazy ) __attribute__((nonnull (1,2)));
 #endif
 
@@ -98,5 +88,5 @@
 
 extern "C" {
-	#include <sys/types.h>
+	#include <asm/types.h>
 	#include <sys/socket.h>
 	#include <sys/syscall.h>
@@ -195,5 +185,5 @@
 		return ', '.join(args_a)
 
-AsyncTemplate = """inline void async_{name}(io_future_t & future, {params}, int submit_flags, io_cancellation * cancellation, io_context * context) {{
+AsyncTemplate = """inline void async_{name}(io_future_t & future, {params}, __u64 submit_flags) {{
 	#if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_{op})
 		ssize_t res = {name}({args});
@@ -205,24 +195,11 @@
 		}}
 	#else
-		// we don't support LINK yet
-		if( 0 != (submit_flags & LINK_FLAGS) ) {{
-			errno = ENOTSUP; return -1;
-		}}
-
-		if( !context ) {{
-			context = __get_io_context();
-		}}
-		if(cancellation) {{
-			cancellation->target = (__u64)(uintptr_t)&future;
-		}}
-
 		__u8 sflags = REGULAR_FLAGS & submit_flags;
-		struct __io_data & ring = *context->thrd.ring;
-
 		__u32 idx;
 		struct io_uring_sqe * sqe;
-		[(volatile struct io_uring_sqe *) sqe, idx] = __submit_alloc( ring, (__u64)(uintptr_t)&future );
+		struct $io_context * ctx = cfa_io_allocate( &sqe, &idx, 1 );
 
 		sqe->opcode = IORING_OP_{op};
+		sqe->user_data = (__u64)(uintptr_t)&future;
 		sqe->flags = sflags;
 		sqe->ioprio = 0;
@@ -239,16 +216,12 @@
 
 		verify( sqe->user_data == (__u64)(uintptr_t)&future );
-		__submit( context, idx );
+		cfa_io_submit( ctx, &idx, 1, 0 != (submit_flags & CFA_IO_LAZY) );
 	#endif
 }}"""
 
-SyncTemplate = """{ret} cfa_{name}({params}, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {{
-	if( timeout >= 0 ) {{
-		errno = ENOTSUP;
-		return -1;
-	}}
+SyncTemplate = """{ret} cfa_{name}({params}, __u64 submit_flags) {{
 	io_future_t future;
 
-	async_{name}( future, {args}, submit_flags, cancellation, context );
+	async_{name}( future, {args}, submit_flags );
 
 	wait( future );
@@ -415,8 +388,8 @@
 	if c.define:
 		print("""#if defined({define})
-	{ret} cfa_{name}({params}, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+	{ret} cfa_{name}({params}, __u64 submit_flags);
 #endif""".format(define=c.define,ret=c.ret, name=c.name, params=c.params))
 	else:
-		print("{ret} cfa_{name}({params}, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);"
+		print("{ret} cfa_{name}({params}, __u64 submit_flags);"
 		.format(ret=c.ret, name=c.name, params=c.params))
 
@@ -426,8 +399,8 @@
 	if c.define:
 		print("""#if defined({define})
-	void async_{name}(io_future_t & future, {params}, int submit_flags, io_cancellation * cancellation, io_context * context);
+	void async_{name}(io_future_t & future, {params}, __u64 submit_flags);
 #endif""".format(define=c.define,name=c.name, params=c.params))
 	else:
-		print("void async_{name}(io_future_t & future, {params}, int submit_flags, io_cancellation * cancellation, io_context * context);"
+		print("void async_{name}(io_future_t & future, {params}, __u64 submit_flags);"
 		.format(name=c.name, params=c.params))
 print("\n")
@@ -474,37 +447,4 @@
 
 print("""
-//-----------------------------------------------------------------------------
-bool cancel(io_cancellation & this) {
-	#if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_ASYNC_CANCEL)
-		return false;
-	#else
-		io_future_t future;
-
-		io_context * context = __get_io_context();
-
-		__u8 sflags = 0;
-		struct __io_data & ring = *context->thrd.ring;
-
-		__u32 idx;
-		volatile struct io_uring_sqe * sqe;
-		[sqe, idx] = __submit_alloc( ring, (__u64)(uintptr_t)&future );
-
-		sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0;
-		sqe->opcode = IORING_OP_ASYNC_CANCEL;
-		sqe->flags = sflags;
-		sqe->addr = this.target;
-
-		verify( sqe->user_data == (__u64)(uintptr_t)&future );
-		__submit( context, idx );
-
-		wait(future);
-
-		if( future.result == 0 ) return true; // Entry found
-		if( future.result == -EALREADY) return true; // Entry found but in progress
-		if( future.result == -ENOENT ) return false; // Entry not found
-		return false;
-	#endif
-}
-
 //-----------------------------------------------------------------------------
 // Check if a function is has asynchronous
Index: libcfa/src/concurrency/io/setup.cfa
===================================================================
--- libcfa/src/concurrency/io/setup.cfa	(revision 182256b4aa148c77a64e9731dedbd8a503973f2f)
+++ libcfa/src/concurrency/io/setup.cfa	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -26,22 +26,15 @@
 
 #if !defined(CFA_HAVE_LINUX_IO_URING_H)
-	void __kernel_io_startup() {
-		// Nothing to do without io_uring
-	}
-
-	void __kernel_io_shutdown() {
-		// Nothing to do without io_uring
-	}
-
 	void ?{}(io_context_params & this) {}
 
-	void ?{}(io_context & this, struct cluster & cl) {}
-	void ?{}(io_context & this, struct cluster & cl, const io_context_params & params) {}
-
-	void ^?{}(io_context & this) {}
-	void ^?{}(io_context & this, bool cluster_context) {}
-
-	void register_fixed_files( io_context &, int *, unsigned ) {}
-	void register_fixed_files( cluster    &, int *, unsigned ) {}
+	void  ?{}($io_context & this, struct cluster & cl) {}
+	void ^?{}($io_context & this) {}
+
+	void __cfa_io_start( processor * proc ) {}
+	void __cfa_io_flush( processor * proc ) {}
+	void __cfa_io_stop ( processor * proc ) {}
+
+	$io_arbiter * create(void) { return 0p; }
+	void destroy($io_arbiter *) {}
 
 #else
@@ -68,10 +61,4 @@
 	void ?{}(io_context_params & this) {
 		this.num_entries = 256;
-		this.num_ready = 256;
-		this.submit_aff = -1;
-		this.eager_submits = false;
-		this.poller_submits = false;
-		this.poll_submit = false;
-		this.poll_complete = false;
 	}
 
@@ -106,222 +93,31 @@
 
 //=============================================================================================
-// I/O Startup / Shutdown logic + Master Poller
-//=============================================================================================
-
-	// IO Master poller loop forward
-	static void * iopoll_loop( __attribute__((unused)) void * args );
-
-	static struct {
-		      pthread_t  thrd;    // pthread handle to io poller thread
-		      void *     stack;   // pthread stack for io poller thread
-		      int        epollfd; // file descriptor to the epoll instance
-		volatile     bool run;     // Whether or not to continue
-		volatile     bool stopped; // Whether the poller has finished running
-		volatile uint64_t epoch;   // Epoch used for memory reclamation
-	} iopoll;
-
-	void __kernel_io_startup(void) {
-		__cfadbg_print_safe(io_core, "Kernel : Creating EPOLL instance\n" );
-
-		iopoll.epollfd = epoll_create1(0);
-		if (iopoll.epollfd == -1) {
-			abort( "internal error, epoll_create1\n");
-		}
-
-		__cfadbg_print_safe(io_core, "Kernel : Starting io poller thread\n" );
-
-		iopoll.stack   = __create_pthread( &iopoll.thrd, iopoll_loop, 0p );
-		iopoll.run     = true;
-		iopoll.stopped = false;
-		iopoll.epoch   = 0;
-	}
-
-	void __kernel_io_shutdown(void) {
-		// Notify the io poller thread of the shutdown
-		iopoll.run = false;
-		sigval val = { 1 };
-		pthread_sigqueue( iopoll.thrd, SIGUSR1, val );
-
-		// Wait for the io poller thread to finish
-
-		__destroy_pthread( iopoll.thrd, iopoll.stack, 0p );
-
-		int ret = close(iopoll.epollfd);
-		if (ret == -1) {
-			abort( "internal error, close epoll\n");
-		}
-
-		// Io polling is now fully stopped
-
-		__cfadbg_print_safe(io_core, "Kernel : IO poller stopped\n" );
-	}
-
-	static void * iopoll_loop( __attribute__((unused)) void * args ) {
-		__processor_id_t id;
-		id.full_proc = false;
-		id.id = doregister(&id);
-		__cfaabi_tls.this_proc_id = &id;
-		__cfadbg_print_safe(io_core, "Kernel : IO poller thread starting\n" );
-
-		// Block signals to control when they arrive
-		sigset_t mask;
-		sigfillset(&mask);
-		if ( pthread_sigmask( SIG_BLOCK, &mask, 0p ) == -1 ) {
-		abort( "internal error, pthread_sigmask" );
-		}
-
-		sigdelset( &mask, SIGUSR1 );
-
-		// Create sufficient events
-		struct epoll_event events[10];
-		// Main loop
-		while( iopoll.run ) {
-			__cfadbg_print_safe(io_core, "Kernel I/O - epoll : waiting on io_uring contexts\n");
-
-			// increment the epoch to notify any deleters we are starting a new cycle
-			__atomic_fetch_add(&iopoll.epoch, 1, __ATOMIC_SEQ_CST);
-
-			// Wait for events
-			int nfds = epoll_pwait( iopoll.epollfd, events, 10, -1, &mask );
-
-			__cfadbg_print_safe(io_core, "Kernel I/O - epoll : %d io contexts events, waking up\n", nfds);
-
-			// Check if an error occured
-			if (nfds == -1) {
-				if( errno == EINTR ) continue;
-				abort( "internal error, pthread_sigmask" );
-			}
-
-			for(i; nfds) {
-				$io_ctx_thread * io_ctx = ($io_ctx_thread *)(uintptr_t)events[i].data.u64;
-				/* paranoid */ verify( io_ctx );
-				__cfadbg_print_safe(io_core, "Kernel I/O - epoll : Unparking io poller %d (%p)\n", io_ctx->ring->fd, io_ctx);
-				#if !defined( __CFA_NO_STATISTICS__ )
-					__cfaabi_tls.this_stats = io_ctx->self.curr_cluster->stats;
-				#endif
-
-				eventfd_t v;
-				eventfd_read(io_ctx->ring->efd, &v);
-
-				post( io_ctx->sem );
-			}
-		}
-
-		__atomic_store_n(&iopoll.stopped, true, __ATOMIC_SEQ_CST);
-
-		__cfadbg_print_safe(io_core, "Kernel : IO poller thread stopping\n" );
-		unregister(&id);
-		return 0p;
-	}
-
-//=============================================================================================
 // I/O Context Constrution/Destruction
 //=============================================================================================
 
-	void ?{}($io_ctx_thread & this, struct cluster & cl) { (this.self){ "IO Poller", cl }; }
-	void main( $io_ctx_thread & this );
-	static inline $thread * get_thread( $io_ctx_thread & this ) { return &this.self; }
-	void ^?{}( $io_ctx_thread & mutex this ) {}
-
-	static void __io_create ( __io_data & this, const io_context_params & params_in );
-	static void __io_destroy( __io_data & this );
-
-	void ?{}(io_context & this, struct cluster & cl, const io_context_params & params) {
-		(this.thrd){ cl };
-		this.thrd.ring = malloc();
-		__cfadbg_print_safe(io_core, "Kernel I/O : Creating ring for io_context %p\n", &this);
-		__io_create( *this.thrd.ring, params );
-
-		__cfadbg_print_safe(io_core, "Kernel I/O : Starting poller thread for io_context %p\n", &this);
-		this.thrd.done = false;
-		__thrd_start( this.thrd, main );
-
-		__cfadbg_print_safe(io_core, "Kernel I/O : io_context %p ready\n", &this);
-	}
-
-	void ?{}(io_context & this, struct cluster & cl) {
-		io_context_params params;
-		(this){ cl, params };
-	}
-
-	void ^?{}(io_context & this, bool cluster_context) {
-		__cfadbg_print_safe(io_core, "Kernel I/O : tearing down io_context %p\n", &this);
-
-		// Notify the thread of the shutdown
-		__atomic_store_n(&this.thrd.done, true, __ATOMIC_SEQ_CST);
-
-		// If this is an io_context within a cluster, things get trickier
-		$thread & thrd = this.thrd.self;
-		if( cluster_context ) {
-			// We are about to do weird things with the threads
-			// we don't need interrupts to complicate everything
-			disable_interrupts();
-
-			// Get cluster info
-			cluster & cltr = *thrd.curr_cluster;
-			/* paranoid */ verify( cltr.idles.total == 0 || &cltr == mainCluster );
-			/* paranoid */ verify( !ready_mutate_islocked() );
-
-			// We need to adjust the clean-up based on where the thread is
-			if( thrd.state == Ready || thrd.preempted != __NO_PREEMPTION ) {
-				// This is the tricky case
-				// The thread was preempted or ready to run and now it is on the ready queue
-				// but the cluster is shutting down, so there aren't any processors to run the ready queue
-				// the solution is to steal the thread from the ready-queue and pretend it was blocked all along
-
-				ready_schedule_lock();
-					// The thread should on the list
-					/* paranoid */ verify( thrd.link.next != 0p );
-
-					// Remove the thread from the ready queue of this cluster
-					// The thread should be the last on the list
-					__attribute__((unused)) bool removed = remove_head( &cltr, &thrd );
-					/* paranoid */ verify( removed );
-					thrd.link.next = 0p;
-					thrd.link.prev = 0p;
-
-					// Fixup the thread state
-					thrd.state = Blocked;
-					thrd.ticket = TICKET_BLOCKED;
-					thrd.preempted = __NO_PREEMPTION;
-
-				ready_schedule_unlock();
-
-				// Pretend like the thread was blocked all along
-			}
-			// !!! This is not an else if !!!
-			// Ok, now the thread is blocked (whether we cheated to get here or not)
-			if( thrd.state == Blocked ) {
-				// This is the "easy case"
-				// The thread is parked and can easily be moved to active cluster
-				verify( thrd.curr_cluster != active_cluster() || thrd.curr_cluster == mainCluster );
-				thrd.curr_cluster = active_cluster();
-
-				// unpark the fast io_poller
-				unpark( &thrd );
-			}
-			else {
-				// The thread is in a weird state
-				// I don't know what to do here
-				abort("io_context poller thread is in unexpected state, cannot clean-up correctly\n");
-			}
-
-			// The weird thread kidnapping stuff is over, restore interrupts.
-			enable_interrupts( __cfaabi_dbg_ctx );
-		} else {
-			post( this.thrd.sem );
-		}
-
-		^(this.thrd){};
-		__cfadbg_print_safe(io_core, "Kernel I/O : Stopped poller thread for io_context %p\n", &this);
-
-		__io_destroy( *this.thrd.ring );
-		__cfadbg_print_safe(io_core, "Kernel I/O : Destroyed ring for io_context %p\n", &this);
-
-		free(this.thrd.ring);
-	}
-
-	void ^?{}(io_context & this) {
-		^(this){ false };
+
+
+	static void __io_uring_setup ( $io_context & this, const io_context_params & params_in, int procfd );
+	static void __io_uring_teardown( $io_context & this );
+	static void __epoll_register($io_context & ctx);
+	static void __epoll_unregister($io_context & ctx);
+	void __ioarbiter_register( $io_arbiter & mutex, $io_context & ctx );
+	void __ioarbiter_unregister( $io_arbiter & mutex, $io_context & ctx );
+
+	void ?{}($io_context & this, processor * proc, struct cluster & cl) {
+		/* paranoid */ verify( cl.io.arbiter );
+		this.proc = proc;
+		this.arbiter = cl.io.arbiter;
+		this.ext_sq.empty = true;
+		(this.ext_sq.blocked){};
+		__io_uring_setup( this, cl.io.params, proc->idle );
+		__cfadbg_print_safe(io_core, "Kernel I/O : Created ring for io_context %u (%p)\n", this.fd, &this);
+	}
+
+	void ^?{}($io_context & this) {
+		__cfadbg_print_safe(io_core, "Kernel I/O : tearing down io_context %u\n", this.fd);
+
+		__io_uring_teardown( this );
+		__cfadbg_print_safe(io_core, "Kernel I/O : Destroyed ring for io_context %u\n", this.fd);
 	}
 
@@ -329,10 +125,10 @@
 	extern void __enable_interrupts_hard();
 
-	static void __io_create( __io_data & this, const io_context_params & params_in ) {
+	static void __io_uring_setup( $io_context & this, const io_context_params & params_in, int procfd ) {
 		// Step 1 : call to setup
 		struct io_uring_params params;
 		memset(&params, 0, sizeof(params));
-		if( params_in.poll_submit   ) params.flags |= IORING_SETUP_SQPOLL;
-		if( params_in.poll_complete ) params.flags |= IORING_SETUP_IOPOLL;
+		// if( params_in.poll_submit   ) params.flags |= IORING_SETUP_SQPOLL;
+		// if( params_in.poll_complete ) params.flags |= IORING_SETUP_IOPOLL;
 
 		__u32 nentries = params_in.num_entries != 0 ? params_in.num_entries : 256;
@@ -340,7 +136,4 @@
 			abort("ERROR: I/O setup 'num_entries' must be a power of 2\n");
 		}
-		if( params_in.poller_submits && params_in.eager_submits ) {
-			abort("ERROR: I/O setup 'poller_submits' and 'eager_submits' cannot be used together\n");
-		}
 
 		int fd = syscall(__NR_io_uring_setup, nentries, &params );
@@ -350,7 +143,6 @@
 
 		// Step 2 : mmap result
-		memset( &this, 0, sizeof(struct __io_data) );
-		struct __submition_data  & sq = this.submit_q;
-		struct __completion_data & cq = this.completion_q;
+		struct __sub_ring_t & sq = this.sq;
+		struct __cmp_ring_t & cq = this.cq;
 
 		// calculate the right ring size
@@ -401,37 +193,22 @@
 		// Get the pointers from the kernel to fill the structure
 		// submit queue
-		sq.head    = (volatile __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.head);
-		sq.tail    = (volatile __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.tail);
-		sq.mask    = (   const __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_mask);
-		sq.num     = (   const __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_entries);
-		sq.flags   = (         __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.flags);
-		sq.dropped = (         __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.dropped);
-		sq.array   = (         __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.array);
-		sq.prev_head = *sq.head;
-
-		{
-			const __u32 num = *sq.num;
-			for( i; num ) {
-				__sqe_clean( &sq.sqes[i] );
-			}
-		}
-
-		(sq.submit_lock){};
-		(sq.release_lock){};
-
-		if( params_in.poller_submits || params_in.eager_submits ) {
-			/* paranoid */ verify( is_pow2( params_in.num_ready ) || (params_in.num_ready < 8) );
-			sq.ready_cnt = max( params_in.num_ready, 8 );
-			sq.ready = alloc( sq.ready_cnt, 64`align );
-			for(i; sq.ready_cnt) {
-				sq.ready[i] = -1ul32;
-			}
-			sq.prev_ready = 0;
-		}
-		else {
-			sq.ready_cnt = 0;
-			sq.ready = 0p;
-			sq.prev_ready = 0;
-		}
+		sq.kring.head  = (volatile __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.head);
+		sq.kring.tail  = (volatile __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.tail);
+		sq.kring.array = (         __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.array);
+		sq.mask        = (   const __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_mask);
+		sq.num         = (   const __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_entries);
+		sq.flags       = (         __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.flags);
+		sq.dropped     = (         __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.dropped);
+
+		sq.kring.released = 0;
+
+		sq.free_ring.head = 0;
+		sq.free_ring.tail = *sq.num;
+		sq.free_ring.array = alloc( *sq.num, 128`align );
+		for(i; (__u32)*sq.num) {
+			sq.free_ring.array[i] = i;
+		}
+
+		sq.to_submit = 0;
 
 		// completion queue
@@ -446,12 +223,9 @@
 		// io_uring_register is so f*cking slow on some machine that it
 		// will never succeed if preemption isn't hard blocked
+		__cfadbg_print_safe(io_core, "Kernel I/O : registering %d for completion with ring %d\n", procfd, fd);
+
 		__disable_interrupts_hard();
 
-		int efd = eventfd(0, 0);
-		if (efd < 0) {
-			abort("KERNEL ERROR: IO_URING EVENTFD - %s\n", strerror(errno));
-		}
-
-		int ret = syscall( __NR_io_uring_register, fd, IORING_REGISTER_EVENTFD, &efd, 1);
+		int ret = syscall( __NR_io_uring_register, fd, IORING_REGISTER_EVENTFD, &procfd, 1);
 		if (ret < 0) {
 			abort("KERNEL ERROR: IO_URING EVENTFD REGISTER - %s\n", strerror(errno));
@@ -459,4 +233,6 @@
 
 		__enable_interrupts_hard();
+
+		__cfadbg_print_safe(io_core, "Kernel I/O : registered %d for completion with ring %d\n", procfd, fd);
 
 		// some paranoid checks
@@ -468,19 +244,16 @@
 		/* paranoid */ verifyf( (*sq.mask) == ((*sq.num) - 1ul32), "IO_URING Expected mask to be %u (%u entries), was %u", (*sq.num) - 1ul32, *sq.num, *sq.mask );
 		/* paranoid */ verifyf( (*sq.num) >= nentries, "IO_URING Expected %u entries, got %u", nentries, *sq.num );
-		/* paranoid */ verifyf( (*sq.head) == 0, "IO_URING Expected head to be 0, got %u", *sq.head );
-		/* paranoid */ verifyf( (*sq.tail) == 0, "IO_URING Expected tail to be 0, got %u", *sq.tail );
+		/* paranoid */ verifyf( (*sq.kring.head) == 0, "IO_URING Expected head to be 0, got %u", *sq.kring.head );
+		/* paranoid */ verifyf( (*sq.kring.tail) == 0, "IO_URING Expected tail to be 0, got %u", *sq.kring.tail );
 
 		// Update the global ring info
-		this.ring_flags = params.flags;
+		this.ring_flags = 0;
 		this.fd         = fd;
-		this.efd        = efd;
-		this.eager_submits  = params_in.eager_submits;
-		this.poller_submits = params_in.poller_submits;
-	}
-
-	static void __io_destroy( __io_data & this ) {
+	}
+
+	static void __io_uring_teardown( $io_context & this ) {
 		// Shutdown the io rings
-		struct __submition_data  & sq = this.submit_q;
-		struct __completion_data & cq = this.completion_q;
+		struct __sub_ring_t & sq = this.sq;
+		struct __cmp_ring_t & cq = this.cq;
 
 		// unmap the submit queue entries
@@ -497,7 +270,15 @@
 		// close the file descriptor
 		close(this.fd);
-		close(this.efd);
-
-		free( this.submit_q.ready ); // Maybe null, doesn't matter
+
+		free( this.sq.free_ring.array ); // Maybe null, doesn't matter
+	}
+
+	void __cfa_io_start( processor * proc ) {
+		proc->io.ctx = alloc();
+		(*proc->io.ctx){proc, *proc->cltr};
+	}
+	void __cfa_io_stop ( processor * proc ) {
+		^(*proc->io.ctx){};
+		free(proc->io.ctx);
 	}
 
@@ -505,58 +286,66 @@
 // I/O Context Sleep
 //=============================================================================================
-	static inline void __ioctx_epoll_ctl($io_ctx_thread & ctx, int op, const char * error) {
-		struct epoll_event ev;
-		ev.events = EPOLLIN | EPOLLONESHOT;
-		ev.data.u64 = (__u64)&ctx;
-		int ret = epoll_ctl(iopoll.epollfd, op, ctx.ring->efd, &ev);
-		if (ret < 0) {
-			abort( "KERNEL ERROR: EPOLL %s - (%d) %s\n", error, (int)errno, strerror(errno) );
-		}
-	}
-
-	void __ioctx_register($io_ctx_thread & ctx) {
-		__ioctx_epoll_ctl(ctx, EPOLL_CTL_ADD, "ADD");
-	}
-
-	void __ioctx_prepare_block($io_ctx_thread & ctx) {
-		__cfadbg_print_safe(io_core, "Kernel I/O - epoll : Re-arming io poller %d (%p)\n", ctx.ring->fd, &ctx);
-		__ioctx_epoll_ctl(ctx, EPOLL_CTL_MOD, "REARM");
-	}
-
-	void __ioctx_unregister($io_ctx_thread & ctx) {
-		// Read the current epoch so we know when to stop
-		size_t curr = __atomic_load_n(&iopoll.epoch, __ATOMIC_SEQ_CST);
-
-		// Remove the fd from the iopoller
-		__ioctx_epoll_ctl(ctx, EPOLL_CTL_DEL, "REMOVE");
-
-		// Notify the io poller thread of the shutdown
-		iopoll.run = false;
-		sigval val = { 1 };
-		pthread_sigqueue( iopoll.thrd, SIGUSR1, val );
-
-		// Make sure all this is done
-		__atomic_thread_fence(__ATOMIC_SEQ_CST);
-
-		// Wait for the next epoch
-		while(curr == iopoll.epoch && !iopoll.stopped) Pause();
-	}
+	// static inline void __epoll_ctl($io_context & ctx, int op, const char * error) {
+	// 	struct epoll_event ev;
+	// 	ev.events = EPOLLIN | EPOLLONESHOT;
+	// 	ev.data.u64 = (__u64)&ctx;
+	// 	int ret = epoll_ctl(iopoll.epollfd, op, ctx.efd, &ev);
+	// 	if (ret < 0) {
+	// 		abort( "KERNEL ERROR: EPOLL %s - (%d) %s\n", error, (int)errno, strerror(errno) );
+	// 	}
+	// }
+
+	// static void __epoll_register($io_context & ctx) {
+	// 	__epoll_ctl(ctx, EPOLL_CTL_ADD, "ADD");
+	// }
+
+	// static void __epoll_unregister($io_context & ctx) {
+	// 	// Read the current epoch so we know when to stop
+	// 	size_t curr = __atomic_load_n(&iopoll.epoch, __ATOMIC_SEQ_CST);
+
+	// 	// Remove the fd from the iopoller
+	// 	__epoll_ctl(ctx, EPOLL_CTL_DEL, "REMOVE");
+
+	// 	// Notify the io poller thread of the shutdown
+	// 	iopoll.run = false;
+	// 	sigval val = { 1 };
+	// 	pthread_sigqueue( iopoll.thrd, SIGUSR1, val );
+
+	// 	// Make sure all this is done
+	// 	__atomic_thread_fence(__ATOMIC_SEQ_CST);
+
+	// 	// Wait for the next epoch
+	// 	while(curr == iopoll.epoch && !iopoll.stopped) Pause();
+	// }
+
+	// void __ioctx_prepare_block($io_context & ctx) {
+	// 	__cfadbg_print_safe(io_core, "Kernel I/O - epoll : Re-arming io poller %d (%p)\n", ctx.fd, &ctx);
+	// 	__epoll_ctl(ctx, EPOLL_CTL_MOD, "REARM");
+	// }
+
 
 //=============================================================================================
 // I/O Context Misc Setup
 //=============================================================================================
-	void register_fixed_files( io_context & ctx, int * files, unsigned count ) {
-		int ret = syscall( __NR_io_uring_register, ctx.thrd.ring->fd, IORING_REGISTER_FILES, files, count );
-		if( ret < 0 ) {
-			abort( "KERNEL ERROR: IO_URING REGISTER - (%d) %s\n", (int)errno, strerror(errno) );
-		}
-
-		__cfadbg_print_safe( io_core, "Kernel I/O : Performed io_register for %p, returned %d\n", active_thread(), ret );
-	}
-
-	void register_fixed_files( cluster & cltr, int * files, unsigned count ) {
-		for(i; cltr.io.cnt) {
-			register_fixed_files( cltr.io.ctxs[i], files, count );
-		}
-	}
+	void ?{}( $io_arbiter & this ) {
+		this.pending.flag = false;
+	}
+
+	void ^?{}( $io_arbiter & mutex this ) {
+		// /* paranoid */ verify( empty(this.assigned) );
+		// /* paranoid */ verify( empty(this.available) );
+		/* paranoid */ verify( is_empty(this.pending.blocked) );
+	}
+
+	$io_arbiter * create(void) {
+		return new();
+	}
+	void destroy($io_arbiter * arbiter) {
+		delete(arbiter);
+	}
+
+//=============================================================================================
+// I/O Context Misc Setup
+//=============================================================================================
+
 #endif
Index: libcfa/src/concurrency/io/types.hfa
===================================================================
--- libcfa/src/concurrency/io/types.hfa	(revision 182256b4aa148c77a64e9731dedbd8a503973f2f)
+++ libcfa/src/concurrency/io/types.hfa	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -25,22 +25,34 @@
 
 #if defined(CFA_HAVE_LINUX_IO_URING_H)
-	#define LEADER_LOCK
-	struct __leaderlock_t {
-		struct $thread * volatile value;	// ($thread) next_leader | (bool:1) is_locked
-	};
+	#include "bits/sequence.hfa"
+	#include "monitor.hfa"
 
-	static inline void ?{}( __leaderlock_t & this ) { this.value = 0p; }
+	struct processor;
+	monitor $io_arbiter;
 
 	//-----------------------------------------------------------------------
 	// Ring Data structure
-      struct __submition_data {
-		// Head and tail of the ring (associated with array)
-		volatile __u32 * head;
-		volatile __u32 * tail;
-		volatile __u32 prev_head;
+      struct __sub_ring_t {
+		struct {
+			// Head and tail of the ring (associated with array)
+			volatile __u32 * head;	 // one passed last index consumed by the kernel
+			volatile __u32 * tail;   // one passed last index visible to the kernel
+			volatile __u32 released; // one passed last index released back to the free list
 
-		// The actual kernel ring which uses head/tail
-		// indexes into the sqes arrays
-		__u32 * array;
+			// The actual kernel ring which uses head/tail
+			// indexes into the sqes arrays
+			__u32 * array;
+		} kring;
+
+		struct {
+			volatile __u32 head;
+			volatile __u32 tail;
+			// The ring which contains free allocations
+			// indexes into the sqes arrays
+			__u32 * array;
+		} free_ring;
+
+		// number of sqes to submit on next system call.
+		__u32 to_submit;
 
 		// number of entries and mask to go with it
@@ -48,24 +60,13 @@
 		const __u32 * mask;
 
-		// Submission flags (Not sure what for)
+		// Submission flags, currently only IORING_SETUP_SQPOLL
 		__u32 * flags;
 
-		// number of sqes not submitted (whatever that means)
+		// number of sqes not submitted
+		// From documentation : [dropped] is incremented for each invalid submission queue entry encountered in the ring buffer.
 		__u32 * dropped;
 
-		// Like head/tail but not seen by the kernel
-		volatile __u32 * ready;
-		__u32 ready_cnt;
-		__u32 prev_ready;
-
-		#if defined(LEADER_LOCK)
-			__leaderlock_t submit_lock;
-		#else
-			__spinlock_t submit_lock;
-		#endif
-		__spinlock_t  release_lock;
-
 		// A buffer of sqes (not the actual ring)
-		volatile struct io_uring_sqe * sqes;
+		struct io_uring_sqe * sqes;
 
 		// The location and size of the mmaped area
@@ -74,5 +75,5 @@
 	};
 
-	struct __completion_data {
+	struct __cmp_ring_t {
 		// Head and tail of the ring
 		volatile __u32 * head;
@@ -83,5 +84,5 @@
 		const __u32 * num;
 
-		// number of cqes not submitted (whatever that means)
+		// I don't know what this value is for
 		__u32 * overflow;
 
@@ -94,12 +95,25 @@
 	};
 
-	struct __io_data {
-		struct __submition_data submit_q;
-		struct __completion_data completion_q;
+	struct __attribute__((aligned(128))) $io_context {
+		$io_arbiter * arbiter;
+		processor * proc;
+
+		struct {
+			volatile bool empty;
+			condition blocked;
+		} ext_sq;
+
+		struct __sub_ring_t sq;
+		struct __cmp_ring_t cq;
 		__u32 ring_flags;
 		int fd;
-		int efd;
-		bool eager_submits:1;
-		bool poller_submits:1;
+	};
+
+	monitor __attribute__((aligned(128))) $io_arbiter {
+		struct {
+			condition blocked;
+			$io_context * ctx;
+			volatile bool flag;
+		} pending;
 	};
 
@@ -133,9 +147,5 @@
 	#endif
 
-	struct $io_ctx_thread;
-	void __ioctx_register($io_ctx_thread & ctx);
-	void __ioctx_unregister($io_ctx_thread & ctx);
-	void __ioctx_prepare_block($io_ctx_thread & ctx);
-	void __sqe_clean( volatile struct io_uring_sqe * sqe );
+	// void __ioctx_prepare_block($io_context & ctx);
 #endif
 
Index: libcfa/src/concurrency/iofwd.hfa
===================================================================
--- libcfa/src/concurrency/iofwd.hfa	(revision 182256b4aa148c77a64e9731dedbd8a503973f2f)
+++ libcfa/src/concurrency/iofwd.hfa	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -18,5 +18,5 @@
 #include <unistd.h>
 extern "C" {
-	#include <sys/types.h>
+	#include <asm/types.h>
 	#if CFA_HAVE_LINUX_IO_URING_H
 		#include <linux/io_uring.h>
@@ -48,6 +48,5 @@
 struct cluster;
 struct io_future_t;
-struct io_context;
-struct io_cancellation;
+struct $io_context;
 
 struct iovec;
@@ -55,70 +54,76 @@
 struct sockaddr;
 struct statx;
+struct epoll_event;
+
+//----------
+// underlying calls
+extern struct $io_context * cfa_io_allocate(struct io_uring_sqe * out_sqes[], __u32 out_idxs[], __u32 want)  __attribute__((nonnull (1,2)));
+extern void cfa_io_submit( struct $io_context * in_ctx, __u32 in_idxs[], __u32 have, bool lazy ) __attribute__((nonnull (1,2)));
 
 //----------
 // synchronous calls
 #if defined(CFA_HAVE_PREADV2)
-	extern ssize_t cfa_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+	extern ssize_t cfa_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, __u64 submit_flags);
 #endif
 #if defined(CFA_HAVE_PWRITEV2)
-	extern ssize_t cfa_pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+	extern ssize_t cfa_pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, __u64 submit_flags);
 #endif
-extern int cfa_fsync(int fd, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
-extern int cfa_epoll_ctl(int epfd, int op, int fd, struct epoll_event *event, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
-extern int cfa_sync_file_range(int fd, off64_t offset, off64_t nbytes, unsigned int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
-extern  ssize_t cfa_sendmsg(int sockfd, const struct msghdr *msg, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
-extern ssize_t cfa_recvmsg(int sockfd, struct msghdr *msg, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
-extern ssize_t cfa_send(int sockfd, const void *buf, size_t len, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
-extern ssize_t cfa_recv(int sockfd, void *buf, size_t len, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
-extern int cfa_accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
-extern int cfa_connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
-extern int cfa_fallocate(int fd, int mode, off_t offset, off_t len, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
-extern int cfa_posix_fadvise(int fd, off_t offset, off_t len, int advice, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
-extern int cfa_madvise(void *addr, size_t length, int advice, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
-extern int cfa_openat(int dirfd, const char *pathname, int flags, mode_t mode, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern int cfa_fsync(int fd, __u64 submit_flags);
+extern int cfa_epoll_ctl(int epfd, int op, int fd, struct epoll_event *event, __u64 submit_flags);
+extern int cfa_sync_file_range(int fd, off64_t offset, off64_t nbytes, unsigned int flags, __u64 submit_flags);
+extern  ssize_t cfa_sendmsg(int sockfd, const struct msghdr *msg, int flags, __u64 submit_flags);
+extern ssize_t cfa_recvmsg(int sockfd, struct msghdr *msg, int flags, __u64 submit_flags);
+extern ssize_t cfa_send(int sockfd, const void *buf, size_t len, int flags, __u64 submit_flags);
+extern ssize_t cfa_recv(int sockfd, void *buf, size_t len, int flags, __u64 submit_flags);
+extern int cfa_accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags, __u64 submit_flags);
+extern int cfa_connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen, __u64 submit_flags);
+extern int cfa_fallocate(int fd, int mode, off_t offset, off_t len, __u64 submit_flags);
+extern int cfa_posix_fadvise(int fd, off_t offset, off_t len, int advice, __u64 submit_flags);
+extern int cfa_madvise(void *addr, size_t length, int advice, __u64 submit_flags);
+extern int cfa_openat(int dirfd, const char *pathname, int flags, mode_t mode, __u64 submit_flags);
 #if defined(CFA_HAVE_OPENAT2)
-	extern int cfa_openat2(int dirfd, const char *pathname, struct open_how * how, size_t size, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+	extern int cfa_openat2(int dirfd, const char *pathname, struct open_how * how, size_t size, __u64 submit_flags);
 #endif
-extern int cfa_close(int fd, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern int cfa_close(int fd, __u64 submit_flags);
 #if defined(CFA_HAVE_STATX)
-	extern int cfa_statx(int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+	extern int cfa_statx(int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf, __u64 submit_flags);
 #endif
-extern ssize_t cfa_read(int fd, void * buf, size_t count, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
-extern ssize_t cfa_write(int fd, void * buf, size_t count, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
-extern ssize_t cfa_splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
-extern ssize_t cfa_tee(int fd_in, int fd_out, size_t len, unsigned int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern ssize_t cfa_read(int fd, void * buf, size_t count, __u64 submit_flags);
+extern ssize_t cfa_write(int fd, void * buf, size_t count, __u64 submit_flags);
+extern ssize_t cfa_splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags, __u64 submit_flags);
+extern ssize_t cfa_tee(int fd_in, int fd_out, size_t len, unsigned int flags, __u64 submit_flags);
 
 //----------
 // asynchronous calls
 #if defined(CFA_HAVE_PREADV2)
-	extern void async_preadv2(io_future_t & future, int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
+	extern void async_preadv2(io_future_t & future, int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, __u64 submit_flags);
 #endif
 #if defined(CFA_HAVE_PWRITEV2)
-	extern void async_pwritev2(io_future_t & future, int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
+	extern void async_pwritev2(io_future_t & future, int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, __u64 submit_flags);
 #endif
-extern void async_fsync(io_future_t & future, int fd, int submit_flags, io_cancellation * cancellation, io_context * context);
-extern void async_epoll_ctl(io_future_t & future, int epfd, int op, int fd, struct epoll_event *event, int submit_flags, io_cancellation * cancellation, io_context * context);
-extern void async_sync_file_range(io_future_t & future, int fd, off64_t offset, off64_t nbytes, unsigned int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
-extern void async_sendmsg(io_future_t & future, int sockfd, const struct msghdr *msg, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
-extern void async_recvmsg(io_future_t & future, int sockfd, struct msghdr *msg, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
-extern void async_send(io_future_t & future, int sockfd, const void *buf, size_t len, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
-extern void async_recv(io_future_t & future, int sockfd, void *buf, size_t len, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
-extern void async_accept4(io_future_t & future, int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
-extern void async_connect(io_future_t & future, int sockfd, const struct sockaddr *addr, socklen_t addrlen, int submit_flags, io_cancellation * cancellation, io_context * context);
-extern void async_fallocate(io_future_t & future, int fd, int mode, off_t offset, off_t len, int submit_flags, io_cancellation * cancellation, io_context * context);
-extern void async_posix_fadvise(io_future_t & future, int fd, off_t offset, off_t len, int advice, int submit_flags, io_cancellation * cancellation, io_context * context);
-extern void async_madvise(io_future_t & future, void *addr, size_t length, int advice, int submit_flags, io_cancellation * cancellation, io_context * context);
-extern void async_openat(io_future_t & future, int dirfd, const char *pathname, int flags, mode_t mode, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_fsync(io_future_t & future, int fd, __u64 submit_flags);
+extern void async_epoll_ctl(io_future_t & future, int epfd, int op, int fd, struct epoll_event *event, __u64 submit_flags);
+extern void async_sync_file_range(io_future_t & future, int fd, off64_t offset, off64_t nbytes, unsigned int flags, __u64 submit_flags);
+extern void async_sendmsg(io_future_t & future, int sockfd, const struct msghdr *msg, int flags, __u64 submit_flags);
+extern void async_recvmsg(io_future_t & future, int sockfd, struct msghdr *msg, int flags, __u64 submit_flags);
+extern void async_send(io_future_t & future, int sockfd, const void *buf, size_t len, int flags, __u64 submit_flags);
+extern void async_recv(io_future_t & future, int sockfd, void *buf, size_t len, int flags, __u64 submit_flags);
+extern void async_accept4(io_future_t & future, int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags, __u64 submit_flags);
+extern void async_connect(io_future_t & future, int sockfd, const struct sockaddr *addr, socklen_t addrlen, __u64 submit_flags);
+extern void async_fallocate(io_future_t & future, int fd, int mode, off_t offset, off_t len, __u64 submit_flags);
+extern void async_posix_fadvise(io_future_t & future, int fd, off_t offset, off_t len, int advice, __u64 submit_flags);
+extern void async_madvise(io_future_t & future, void *addr, size_t length, int advice, __u64 submit_flags);
+extern void async_openat(io_future_t & future, int dirfd, const char *pathname, int flags, mode_t mode, __u64 submit_flags);
 #if defined(CFA_HAVE_OPENAT2)
-	extern void async_openat2(io_future_t & future, int dirfd, const char *pathname, struct open_how * how, size_t size, int submit_flags, io_cancellation * cancellation, io_context * context);
+	extern void async_openat2(io_future_t & future, int dirfd, const char *pathname, struct open_how * how, size_t size, __u64 submit_flags);
 #endif
-extern void async_close(io_future_t & future, int fd, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_close(io_future_t & future, int fd, __u64 submit_flags);
 #if defined(CFA_HAVE_STATX)
-	extern void async_statx(io_future_t & future, int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf, int submit_flags, io_cancellation * cancellation, io_context * context);
+	extern void async_statx(io_future_t & future, int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf, __u64 submit_flags);
 #endif
-void async_read(io_future_t & future, int fd, void * buf, size_t count, int submit_flags, io_cancellation * cancellation, io_context * context);
-extern void async_write(io_future_t & future, int fd, void * buf, size_t count, int submit_flags, io_cancellation * cancellation, io_context * context);
-extern void async_splice(io_future_t & future, int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
-extern void async_tee(io_future_t & future, int fd_in, int fd_out, size_t len, unsigned int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
+void async_read(io_future_t & future, int fd, void * buf, size_t count, __u64 submit_flags);
+extern void async_write(io_future_t & future, int fd, void * buf, size_t count, __u64 submit_flags);
+extern void async_splice(io_future_t & future, int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags, __u64 submit_flags);
+extern void async_tee(io_future_t & future, int fd_in, int fd_out, size_t len, unsigned int flags, __u64 submit_flags);
 
 
@@ -126,6 +131,2 @@
 // Check if a function is blocks a only the user thread
 bool has_user_level_blocking( fptr_t func );
-
-//-----------------------------------------------------------------------------
-void register_fixed_files( io_context & ctx , int * files, unsigned count );
-void register_fixed_files( cluster    & cltr, int * files, unsigned count );
Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision 182256b4aa148c77a64e9731dedbd8a503973f2f)
+++ libcfa/src/concurrency/kernel.cfa	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -22,4 +22,7 @@
 #include <signal.h>
 #include <unistd.h>
+extern "C" {
+	#include <sys/eventfd.h>
+}
 
 //CFA Includes
@@ -109,5 +112,4 @@
 static void __run_thread(processor * this, $thread * dst);
 static void __wake_one(cluster * cltr);
-static void wait(__bin_sem_t & this);
 
 static void push  (__cluster_idles & idles, processor & proc);
@@ -115,4 +117,12 @@
 static [unsigned idle, unsigned total, * processor] query( & __cluster_idles idles );
 
+extern void __cfa_io_start( processor * );
+extern void __cfa_io_drain( processor * );
+extern void __cfa_io_flush( processor * );
+extern void __cfa_io_stop ( processor * );
+static inline void __maybe_io_drain( processor * );
+
+extern void __disable_interrupts_hard();
+extern void __enable_interrupts_hard();
 
 //=============================================================================================
@@ -130,4 +140,6 @@
 	verify(this);
 
+	__cfa_io_start( this );
+
 	__cfadbg_print_safe(runtime_core, "Kernel : core %p starting\n", this);
 	#if !defined(__CFA_NO_STATISTICS__)
@@ -151,8 +163,12 @@
 		MAIN_LOOP:
 		for() {
+			// Check if there is pending io
+			__maybe_io_drain( this );
+
 			// Try to get the next thread
 			readyThread = __next_thread( this->cltr );
 
 			if( !readyThread ) {
+				__cfa_io_flush( this );
 				readyThread = __next_thread_slow( this->cltr );
 			}
@@ -190,5 +206,10 @@
 				#endif
 
-				wait( this->idle );
+				__cfadbg_print_safe(runtime_core, "Kernel : core %p waiting on eventfd %d\n", this, this->idle);
+
+				__disable_interrupts_hard();
+				eventfd_t val;
+				eventfd_read( this->idle, &val );
+				__enable_interrupts_hard();
 
 				#if !defined(__CFA_NO_STATISTICS__)
@@ -206,4 +227,7 @@
 
 			/* paranoid */ verify( readyThread );
+
+			// Reset io dirty bit
+			this->io.dirty = false;
 
 			// We found a thread run it
@@ -220,4 +244,8 @@
 				}
 			#endif
+
+			if(this->io.pending && !this->io.dirty) {
+				__cfa_io_flush( this );
+			}
 		}
 
@@ -225,5 +253,8 @@
 	}
 
+	__cfa_io_stop( this );
+
 	post( this->terminated );
+
 
 	if(this == mainProcessor) {
@@ -248,4 +279,6 @@
 	/* paranoid */ verifyf( thrd_dst->link.next == 0p, "Expected null got %p", thrd_dst->link.next );
 	__builtin_prefetch( thrd_dst->context.SP );
+
+	__cfadbg_print_safe(runtime_core, "Kernel : core %p running thread %p (%s)\n", this, thrd_dst, thrd_dst->self_cor.name);
 
 	$coroutine * proc_cor = get_coroutine(this->runner);
@@ -330,4 +363,6 @@
 	// Just before returning to the processor, set the processor coroutine to active
 	proc_cor->state = Active;
+
+	__cfadbg_print_safe(runtime_core, "Kernel : core %p finished running thread %p\n", this, thrd_dst);
 
 	/* paranoid */ verify( ! __preemption_enabled() );
@@ -549,35 +584,4 @@
 // Kernel Idle Sleep
 //=============================================================================================
-extern "C" {
-	char * strerror(int);
-}
-#define CHECKED(x) { int err = x; if( err != 0 ) abort("KERNEL ERROR: Operation \"" #x "\" return error %d - %s\n", err, strerror(err)); }
-
-static void wait(__bin_sem_t & this) with( this ) {
-	verify(__cfaabi_dbg_in_kernel());
-	CHECKED( pthread_mutex_lock(&lock) );
-		while(val < 1) {
-			pthread_cond_wait(&cond, &lock);
-		}
-		val -= 1;
-	CHECKED( pthread_mutex_unlock(&lock) );
-}
-
-static bool post(__bin_sem_t & this) with( this ) {
-	bool needs_signal = false;
-
-	CHECKED( pthread_mutex_lock(&lock) );
-		if(val < 1) {
-			val += 1;
-			pthread_cond_signal(&cond);
-			needs_signal = true;
-		}
-	CHECKED( pthread_mutex_unlock(&lock) );
-
-	return needs_signal;
-}
-
-#undef CHECKED
-
 // Wake a thread from the front if there are any
 static void __wake_one(cluster * this) {
@@ -595,5 +599,7 @@
 
 	// We found a processor, wake it up
-	post( p->idle );
+	eventfd_t val;
+	val = 1;
+	eventfd_write( p->idle, val );
 
 	#if !defined(__CFA_NO_STATISTICS__)
@@ -613,5 +619,7 @@
 	disable_interrupts();
 		/* paranoid */ verify( ! __preemption_enabled() );
-		post( this->idle );
+		eventfd_t val;
+		val = 1;
+		eventfd_write( this->idle, val );
 	enable_interrupts( __cfaabi_dbg_ctx );
 }
@@ -696,4 +704,20 @@
 // Kernel Utilities
 //=============================================================================================
+#if defined(CFA_HAVE_LINUX_IO_URING_H)
+#include "io/types.hfa"
+#endif
+
+static inline void __maybe_io_drain( processor * proc ) {
+	#if defined(CFA_HAVE_LINUX_IO_URING_H)
+		__cfadbg_print_safe(runtime_core, "Kernel : core %p checking io for ring %d\n", proc, proc->io.ctx->fd);
+
+		// Check if we should drain the queue
+		$io_context * ctx = proc->io.ctx;
+		unsigned head = *ctx->cq.head;
+		unsigned tail = *ctx->cq.tail;
+		if(head != tail) __cfa_io_drain( proc );
+	#endif
+}
+
 //-----------------------------------------------------------------------------
 // Debug
Index: libcfa/src/concurrency/kernel.hfa
===================================================================
--- libcfa/src/concurrency/kernel.hfa	(revision 182256b4aa148c77a64e9731dedbd8a503973f2f)
+++ libcfa/src/concurrency/kernel.hfa	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -28,15 +28,19 @@
 }
 
-//-----------------------------------------------------------------------------
-// Underlying Locks
 #ifdef __CFA_WITH_VERIFY__
 	extern bool __cfaabi_dbg_in_kernel();
 #endif
 
-struct __bin_sem_t {
-	pthread_mutex_t 	lock;
-	pthread_cond_t  	cond;
-	int     		val;
-};
+//-----------------------------------------------------------------------------
+// I/O
+struct cluster;
+struct $io_context;
+struct $io_arbiter;
+
+struct io_context_params {
+	int num_entries;
+};
+
+void  ?{}(io_context_params & this);
 
 //-----------------------------------------------------------------------------
@@ -78,4 +82,10 @@
 	pthread_t kernel_thread;
 
+	struct {
+		$io_context * ctx;
+		bool pending;
+		bool dirty;
+	} io;
+
 	// Preemption data
 	// Node which is added in the discrete event simulaiton
@@ -86,5 +96,5 @@
 
 	// Idle lock (kernel semaphore)
-	__bin_sem_t idle;
+	int idle;
 
 	// Termination synchronisation (user semaphore)
@@ -116,47 +126,4 @@
 
 DLISTED_MGD_IMPL_OUT(processor)
-
-//-----------------------------------------------------------------------------
-// I/O
-struct __io_data;
-
-// IO poller user-thread
-// Not using the "thread" keyword because we want to control
-// more carefully when to start/stop it
-struct $io_ctx_thread {
-	struct __io_data * ring;
-	single_sem sem;
-	volatile bool done;
-	$thread self;
-};
-
-
-struct io_context {
-	$io_ctx_thread thrd;
-};
-
-struct io_context_params {
-	int num_entries;
-	int num_ready;
-	int submit_aff;
-	bool eager_submits:1;
-	bool poller_submits:1;
-	bool poll_submit:1;
-	bool poll_complete:1;
-};
-
-void  ?{}(io_context_params & this);
-
-void  ?{}(io_context & this, struct cluster & cl);
-void  ?{}(io_context & this, struct cluster & cl, const io_context_params & params);
-void ^?{}(io_context & this);
-
-struct io_cancellation {
-	__u64 target;
-};
-
-static inline void  ?{}(io_cancellation & this) { this.target = -1u; }
-static inline void ^?{}(io_cancellation &) {}
-bool cancel(io_cancellation & this);
 
 //-----------------------------------------------------------------------------
@@ -244,6 +211,6 @@
 
 	struct {
-		io_context * ctxs;
-		unsigned cnt;
+		$io_arbiter * arbiter;
+		io_context_params params;
 	} io;
 
Index: libcfa/src/concurrency/kernel/startup.cfa
===================================================================
--- libcfa/src/concurrency/kernel/startup.cfa	(revision 182256b4aa148c77a64e9731dedbd8a503973f2f)
+++ libcfa/src/concurrency/kernel/startup.cfa	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -22,4 +22,5 @@
 extern "C" {
       #include <limits.h>       // PTHREAD_STACK_MIN
+	#include <sys/eventfd.h>  // eventfd
       #include <sys/mman.h>     // mprotect
       #include <sys/resource.h> // getrlimit
@@ -80,6 +81,4 @@
 static void ?{}(processorCtx_t & this) {}
 static void ?{}(processorCtx_t & this, processor * proc, current_stack_info_t * info);
-static void ?{}(__bin_sem_t & this);
-static void ^?{}(__bin_sem_t & this);
 
 #if defined(__CFA_WITH_VERIFY__)
@@ -91,6 +90,4 @@
 extern void __kernel_alarm_startup(void);
 extern void __kernel_alarm_shutdown(void);
-extern void __kernel_io_startup (void);
-extern void __kernel_io_shutdown(void);
 
 //-----------------------------------------------------------------------------
@@ -104,5 +101,4 @@
 KERNEL_STORAGE($thread,	             mainThread);
 KERNEL_STORAGE(__stack_t,            mainThreadCtx);
-KERNEL_STORAGE(io_context,           mainPollerThread);
 KERNEL_STORAGE(__scheduler_RWLock_t, __scheduler_lock);
 #if !defined(__CFA_NO_STATISTICS__)
@@ -200,5 +196,4 @@
 
 	void ?{}(processor & this) with( this ) {
-		( this.idle ){};
 		( this.terminated ){};
 		( this.runner ){};
@@ -228,7 +223,4 @@
 	__kernel_alarm_startup();
 
-	// Start IO
-	__kernel_io_startup();
-
 	// Add the main thread to the ready queue
 	// once resume is called on mainProcessor->runner the mainThread needs to be scheduled like any normal thread
@@ -243,14 +235,4 @@
 	// THE SYSTEM IS NOW COMPLETELY RUNNING
 
-
-	// SKULLDUGGERY: The constructor for the mainCluster will call alloc with a dimension of 0
-	// malloc *can* return a non-null value, we should free it if that is the case
-	free( mainCluster->io.ctxs );
-
-	// Now that the system is up, finish creating systems that need threading
-	mainCluster->io.ctxs = (io_context *)&storage_mainPollerThread;
-	mainCluster->io.cnt  = 1;
-	(*mainCluster->io.ctxs){ *mainCluster };
-
 	__cfadbg_print_safe(runtime_core, "Kernel : Started\n--------------------------------------------------\n\n");
 
@@ -262,9 +244,4 @@
 
 static void __kernel_shutdown(void) {
-	//Before we start shutting things down, wait for systems that need threading to shutdown
-	^(*mainCluster->io.ctxs){};
-	mainCluster->io.cnt  = 0;
-	mainCluster->io.ctxs = 0p;
-
 	/* paranoid */ verify( __preemption_enabled() );
 	disable_interrupts();
@@ -284,7 +261,4 @@
 	// Disable preemption
 	__kernel_alarm_shutdown();
-
-	// Stop IO
-	__kernel_io_shutdown();
 
 	// Destroy the main processor and its context in reverse order of construction
@@ -486,4 +460,13 @@
 	pending_preemption = false;
 
+	this.io.ctx = 0p;
+	this.io.pending = false;
+	this.io.dirty   = false;
+
+	this.idle = eventfd(0, 0);
+	if (idle < 0) {
+		abort("KERNEL ERROR: PROCESSOR EVENTFD - %s\n", strerror(errno));
+	}
+
 	#if !defined(__CFA_NO_STATISTICS__)
 		print_stats = 0;
@@ -526,8 +509,9 @@
 	// Finally we don't need the read_lock any more
 	unregister((__processor_id_t*)&this);
+
+	close(this.idle);
 }
 
 void ?{}(processor & this, const char name[], cluster & _cltr) {
-	( this.idle ){};
 	( this.terminated ){};
 	( this.runner ){};
@@ -584,4 +568,7 @@
 	threads{ __get };
 
+	io.arbiter = create();
+	io.params = io_params;
+
 	doregister(this);
 
@@ -596,18 +583,8 @@
 	ready_mutate_unlock( last_size );
 	enable_interrupts_noPoll(); // Don't poll, could be in main cluster
-
-
-	this.io.cnt  = num_io;
-	this.io.ctxs = aalloc(num_io);
-	for(i; this.io.cnt) {
-		(this.io.ctxs[i]){ this, io_params };
-	}
 }
 
 void ^?{}(cluster & this) {
-	for(i; this.io.cnt) {
-		^(this.io.ctxs[i]){ true };
-	}
-	free(this.io.ctxs);
+	destroy(this.io.arbiter);
 
 	// Lock the RWlock so no-one pushes/pops while we are changing the queue
@@ -738,27 +715,4 @@
 }
 
-extern "C" {
-	char * strerror(int);
-}
-#define CHECKED(x) { int err = x; if( err != 0 ) abort("KERNEL ERROR: Operation \"" #x "\" return error %d - %s\n", err, strerror(err)); }
-
-static void ?{}(__bin_sem_t & this) with( this ) {
-	// Create the mutex with error checking
-	pthread_mutexattr_t mattr;
-	pthread_mutexattr_init( &mattr );
-	pthread_mutexattr_settype( &mattr, PTHREAD_MUTEX_ERRORCHECK_NP);
-	pthread_mutex_init(&lock, &mattr);
-
-	pthread_cond_init (&cond, (const pthread_condattr_t *)0p);  // workaround trac#208: cast should not be required
-	val = 0;
-}
-
-static void ^?{}(__bin_sem_t & this) with( this ) {
-	CHECKED( pthread_mutex_destroy(&lock) );
-	CHECKED( pthread_cond_destroy (&cond) );
-}
-
-#undef CHECKED
-
 #if defined(__CFA_WITH_VERIFY__)
 static bool verify_fwd_bck_rng(void) {
Index: libcfa/src/concurrency/kernel_private.hfa
===================================================================
--- libcfa/src/concurrency/kernel_private.hfa	(revision 182256b4aa148c77a64e9731dedbd8a503973f2f)
+++ libcfa/src/concurrency/kernel_private.hfa	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -77,5 +77,6 @@
 //-----------------------------------------------------------------------------
 // I/O
-void ^?{}(io_context & this, bool );
+$io_arbiter * create(void);
+void destroy($io_arbiter *);
 
 //=======================================================================
Index: libcfa/src/concurrency/preemption.cfa
===================================================================
--- libcfa/src/concurrency/preemption.cfa	(revision 182256b4aa148c77a64e9731dedbd8a503973f2f)
+++ libcfa/src/concurrency/preemption.cfa	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -585,6 +585,6 @@
 
 	// Setup proper signal handlers
-	__cfaabi_sigaction( SIGUSR1, sigHandler_ctxSwitch, SA_SIGINFO | SA_RESTART ); // __cfactx_switch handler
-	__cfaabi_sigaction( SIGALRM, sigHandler_alarm    , SA_SIGINFO | SA_RESTART ); // debug handler
+	__cfaabi_sigaction( SIGUSR1, sigHandler_ctxSwitch, SA_SIGINFO ); // __cfactx_switch handler
+	__cfaabi_sigaction( SIGALRM, sigHandler_alarm    , SA_SIGINFO ); // debug handler
 
 	signal_block( SIGALRM );
Index: libcfa/src/concurrency/stats.cfa
===================================================================
--- libcfa/src/concurrency/stats.cfa	(revision 182256b4aa148c77a64e9731dedbd8a503973f2f)
+++ libcfa/src/concurrency/stats.cfa	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -25,19 +25,18 @@
 
 		#if defined(CFA_HAVE_LINUX_IO_URING_H)
-			stats->io.submit_q.submit_avg.rdy = 0;
-			stats->io.submit_q.submit_avg.csm = 0;
-			stats->io.submit_q.submit_avg.cnt = 0;
-			stats->io.submit_q.look_avg.val   = 0;
-			stats->io.submit_q.look_avg.cnt   = 0;
-			stats->io.submit_q.look_avg.block = 0;
-			stats->io.submit_q.alloc_avg.val   = 0;
-			stats->io.submit_q.alloc_avg.cnt   = 0;
-			stats->io.submit_q.alloc_avg.block = 0;
-			stats->io.submit_q.helped = 0;
-			stats->io.submit_q.leader = 0;
-			stats->io.submit_q.busy   = 0;
-			stats->io.complete_q.completed_avg.val = 0;
-			stats->io.complete_q.completed_avg.cnt = 0;
-			stats->io.complete_q.blocks = 0;
+			stats->io.alloc.fast        = 0;
+			stats->io.alloc.slow        = 0;
+			stats->io.alloc.fail        = 0;
+			stats->io.alloc.revoke      = 0;
+			stats->io.alloc.block       = 0;
+			stats->io.submit.fast       = 0;
+			stats->io.submit.slow       = 0;
+			stats->io.flush.external    = 0;
+			stats->io.calls.flush       = 0;
+			stats->io.calls.submitted   = 0;
+			stats->io.calls.drain       = 0;
+			stats->io.calls.completed   = 0;
+			stats->io.calls.errors.busy = 0;
+			stats->io.poller.sleeps     = 0;
 		#endif
 	}
@@ -60,20 +59,18 @@
 
 		#if defined(CFA_HAVE_LINUX_IO_URING_H)
-			__atomic_fetch_add( &cltr->io.submit_q.submit_avg.rdy     , proc->io.submit_q.submit_avg.rdy     , __ATOMIC_SEQ_CST ); proc->io.submit_q.submit_avg.rdy      = 0;
-			__atomic_fetch_add( &cltr->io.submit_q.submit_avg.csm     , proc->io.submit_q.submit_avg.csm     , __ATOMIC_SEQ_CST ); proc->io.submit_q.submit_avg.csm      = 0;
-			__atomic_fetch_add( &cltr->io.submit_q.submit_avg.avl     , proc->io.submit_q.submit_avg.avl     , __ATOMIC_SEQ_CST ); proc->io.submit_q.submit_avg.avl      = 0;
-			__atomic_fetch_add( &cltr->io.submit_q.submit_avg.cnt     , proc->io.submit_q.submit_avg.cnt     , __ATOMIC_SEQ_CST ); proc->io.submit_q.submit_avg.cnt      = 0;
-			__atomic_fetch_add( &cltr->io.submit_q.look_avg.val       , proc->io.submit_q.look_avg.val       , __ATOMIC_SEQ_CST ); proc->io.submit_q.look_avg.val        = 0;
-			__atomic_fetch_add( &cltr->io.submit_q.look_avg.cnt       , proc->io.submit_q.look_avg.cnt       , __ATOMIC_SEQ_CST ); proc->io.submit_q.look_avg.cnt        = 0;
-			__atomic_fetch_add( &cltr->io.submit_q.look_avg.block     , proc->io.submit_q.look_avg.block     , __ATOMIC_SEQ_CST ); proc->io.submit_q.look_avg.block      = 0;
-			__atomic_fetch_add( &cltr->io.submit_q.alloc_avg.val      , proc->io.submit_q.alloc_avg.val      , __ATOMIC_SEQ_CST ); proc->io.submit_q.alloc_avg.val       = 0;
-			__atomic_fetch_add( &cltr->io.submit_q.alloc_avg.cnt      , proc->io.submit_q.alloc_avg.cnt      , __ATOMIC_SEQ_CST ); proc->io.submit_q.alloc_avg.cnt       = 0;
-			__atomic_fetch_add( &cltr->io.submit_q.alloc_avg.block    , proc->io.submit_q.alloc_avg.block    , __ATOMIC_SEQ_CST ); proc->io.submit_q.alloc_avg.block     = 0;
-			__atomic_fetch_add( &cltr->io.submit_q.helped             , proc->io.submit_q.helped             , __ATOMIC_SEQ_CST ); proc->io.submit_q.helped              = 0;
-			__atomic_fetch_add( &cltr->io.submit_q.leader             , proc->io.submit_q.leader             , __ATOMIC_SEQ_CST ); proc->io.submit_q.leader              = 0;
-			__atomic_fetch_add( &cltr->io.submit_q.busy               , proc->io.submit_q.busy               , __ATOMIC_SEQ_CST ); proc->io.submit_q.busy                = 0;
-			__atomic_fetch_add( &cltr->io.complete_q.completed_avg.val, proc->io.complete_q.completed_avg.val, __ATOMIC_SEQ_CST ); proc->io.complete_q.completed_avg.val = 0;
-			__atomic_fetch_add( &cltr->io.complete_q.completed_avg.cnt, proc->io.complete_q.completed_avg.cnt, __ATOMIC_SEQ_CST ); proc->io.complete_q.completed_avg.cnt = 0;
-			__atomic_fetch_add( &cltr->io.complete_q.blocks           , proc->io.complete_q.blocks           , __ATOMIC_SEQ_CST ); proc->io.complete_q.blocks            = 0;
+			__atomic_fetch_add( &cltr->io.alloc.fast       , proc->io.alloc.fast       , __ATOMIC_SEQ_CST ); proc->io.alloc.fast        = 0;
+			__atomic_fetch_add( &cltr->io.alloc.slow       , proc->io.alloc.slow       , __ATOMIC_SEQ_CST ); proc->io.alloc.slow        = 0;
+			__atomic_fetch_add( &cltr->io.alloc.fail       , proc->io.alloc.fail       , __ATOMIC_SEQ_CST ); proc->io.alloc.fail        = 0;
+			__atomic_fetch_add( &cltr->io.alloc.revoke     , proc->io.alloc.revoke     , __ATOMIC_SEQ_CST ); proc->io.alloc.revoke      = 0;
+			__atomic_fetch_add( &cltr->io.alloc.block      , proc->io.alloc.block      , __ATOMIC_SEQ_CST ); proc->io.alloc.block       = 0;
+			__atomic_fetch_add( &cltr->io.submit.fast      , proc->io.submit.fast      , __ATOMIC_SEQ_CST ); proc->io.submit.fast       = 0;
+			__atomic_fetch_add( &cltr->io.submit.slow      , proc->io.submit.slow      , __ATOMIC_SEQ_CST ); proc->io.submit.slow       = 0;
+			__atomic_fetch_add( &cltr->io.flush.external   , proc->io.flush.external   , __ATOMIC_SEQ_CST ); proc->io.flush.external    = 0;
+			__atomic_fetch_add( &cltr->io.calls.flush      , proc->io.calls.flush      , __ATOMIC_SEQ_CST ); proc->io.calls.flush       = 0;
+			__atomic_fetch_add( &cltr->io.calls.submitted  , proc->io.calls.submitted  , __ATOMIC_SEQ_CST ); proc->io.calls.submitted   = 0;
+			__atomic_fetch_add( &cltr->io.calls.drain      , proc->io.calls.drain      , __ATOMIC_SEQ_CST ); proc->io.calls.drain       = 0;
+			__atomic_fetch_add( &cltr->io.calls.completed  , proc->io.calls.completed  , __ATOMIC_SEQ_CST ); proc->io.calls.completed   = 0;
+			__atomic_fetch_add( &cltr->io.calls.errors.busy, proc->io.calls.errors.busy, __ATOMIC_SEQ_CST ); proc->io.calls.errors.busy = 0;
+			__atomic_fetch_add( &cltr->io.poller.sleeps    , proc->io.poller.sleeps    , __ATOMIC_SEQ_CST ); proc->io.poller.sleeps     = 0;
 		#endif
 	}
@@ -82,12 +79,6 @@
 
 		if( flags & CFA_STATS_READY_Q ) {
-			double push_sur = (100.0 * ((double)ready.pick.push.success) / ready.pick.push.attempt);
-			double pop_sur  = (100.0 * ((double)ready.pick.pop .success) / ready.pick.pop .attempt);
-
 			double push_len = ((double)ready.pick.push.attempt) / ready.pick.push.success;
 			double pop_len  = ((double)ready.pick.pop .attempt) / ready.pick.pop .success;
-
-			double lpush_sur = (100.0 * ((double)ready.pick.push.lsuccess) / ready.pick.push.local);
-			double lpop_sur  = (100.0 * ((double)ready.pick.pop .lsuccess) / ready.pick.pop .local);
 
 			double lpush_len = ((double)ready.pick.push.local) / ready.pick.push.lsuccess;
@@ -96,25 +87,15 @@
 			__cfaabi_bits_print_safe( STDOUT_FILENO,
 				"----- %s \"%s\" (%p) - Ready Q Stats -----\n"
-				"- total threads run      : %'15" PRIu64 "\n"
-				"- total threads scheduled: %'15" PRIu64 "\n"
-				"- push average probe len : %'18.2lf, %'18.2lf%% (%'15" PRIu64 " attempts)\n"
-				"- pop  average probe len : %'18.2lf, %'18.2lf%% (%'15" PRIu64 " attempts)\n"
-				"- local push avg prb len : %'18.2lf, %'18.2lf%% (%'15" PRIu64 " attempts)\n"
-				"- local pop  avg prb len : %'18.2lf, %'18.2lf%% (%'15" PRIu64 " attempts)\n"
-				"- thread migrations      : %'15" PRIu64 "\n"
-				"- Idle Sleep -\n"
-				"-- halts                 : %'15" PRIu64 "\n"
-				"-- cancelled halts       : %'15" PRIu64 "\n"
-				"-- schedule wake         : %'15" PRIu64 "\n"
-				"-- wake on exit          : %'15" PRIu64 "\n"
+				"- total threads  : %'15" PRIu64 "run, %'15" PRIu64 "schd (%'" PRIu64 "mig )\n"
+				"- push avg probe : %'3.2lf, %'3.2lfl (%'15" PRIu64 " attempts, %'15" PRIu64 " locals)\n"
+				"- pop  avg probe : %'3.2lf, %'3.2lfl (%'15" PRIu64 " attempts, %'15" PRIu64 " locals)\n"
+				"- Idle Sleep     : %'15" PRIu64 "h, %'15" PRIu64 "c, %'15" PRIu64 "w, %'15" PRIu64 "e\n"
 				"\n"
 				, type, name, id
 				, ready.pick.pop.success
 				, ready.pick.push.success
-				, push_len, push_sur, ready.pick.push.attempt
-				, pop_len , pop_sur , ready.pick.pop .attempt
-				, lpush_len, lpush_sur, ready.pick.push.local
-				, lpop_len , lpop_sur , ready.pick.pop .local
 				, ready.threads.migration
+				, push_len, lpush_len, ready.pick.push.attempt, ready.pick.push.local
+				, pop_len , lpop_len , ready.pick.pop .attempt, ready.pick.pop .local
 				, ready.sleep.halts, ready.sleep.cancels, ready.sleep.wakes, ready.sleep.exits
 			);
@@ -123,50 +104,33 @@
 		#if defined(CFA_HAVE_LINUX_IO_URING_H)
 			if( flags & CFA_STATS_IO ) {
-				double avgrdy = ((double)io.submit_q.submit_avg.rdy) / io.submit_q.submit_avg.cnt;
-				double avgcsm = ((double)io.submit_q.submit_avg.csm) / io.submit_q.submit_avg.cnt;
+				uint64_t total_allocs = io.alloc.fast + io.alloc.slow;
+				double avgfasta = ((double)io.alloc.fast) / total_allocs;
 
-				double lavgv = 0;
-				double lavgb = 0;
-				if(io.submit_q.look_avg.cnt != 0) {
-					lavgv = ((double)io.submit_q.look_avg.val  ) / io.submit_q.look_avg.cnt;
-					lavgb = ((double)io.submit_q.look_avg.block) / io.submit_q.look_avg.cnt;
-				}
+				uint64_t total_submits = io.submit.fast + io.submit.slow;
+				double avgfasts = ((double)io.submit.fast) / total_submits;
 
-				double aavgv = 0;
-				double aavgb = 0;
-				if(io.submit_q.alloc_avg.cnt != 0) {
-					aavgv = ((double)io.submit_q.alloc_avg.val  ) / io.submit_q.alloc_avg.cnt;
-					aavgb = ((double)io.submit_q.alloc_avg.block) / io.submit_q.alloc_avg.cnt;
-				}
+				double avgsubs = ((double)io.calls.submitted) / io.calls.flush;
+				double avgcomp = ((double)io.calls.completed) / io.calls.drain;
 
 				__cfaabi_bits_print_safe( STDOUT_FILENO,
 					"----- %s \"%s\" (%p) - I/O Stats -----\n"
-					"- total submit calls     : %'15" PRIu64 "\n"
-					"- avg ready entries      : %'18.2lf\n"
-					"- avg submitted entries  : %'18.2lf\n"
-					"- total helped entries   : %'15" PRIu64 "\n"
-					"- total leader entries   : %'15" PRIu64 "\n"
-					"- total busy submit      : %'15" PRIu64 "\n"
-					"- total ready search     : %'15" PRIu64 "\n"
-					"- avg ready search len   : %'18.2lf\n"
-					"- avg ready search block : %'18.2lf\n"
-					"- total alloc search     : %'15" PRIu64 "\n"
-					"- avg alloc search len   : %'18.2lf\n"
-					"- avg alloc search block : %'18.2lf\n"
-					"- total wait calls       : %'15" PRIu64 "\n"
-					"- avg completion/wait    : %'18.2lf\n"
-					"- total completion blocks: %'15" PRIu64 "\n"
+					"- total allocations : %'" PRIu64 "f, %'" PRIu64 "s (%'2.2lff) \n"
+					"-     failures      : %'" PRIu64 "oom, %'" PRIu64 "rvk, %'" PRIu64 "blk\n"
+					"- total submits     : %'" PRIu64 "f, %'" PRIu64 "s (%'2.2lf) \n"
+					"- flush external    : %'" PRIu64 "\n"
+					"- io_uring_enter    : %'" PRIu64 " (%'" PRIu64 ", %'" PRIu64 " EBUSY)\n"
+					"-     submits       : %'" PRIu64 " (%'.2lf) \n"
+					"-     completes     : %'" PRIu64 " (%'.2lf) \n"
+					"- poller sleeping   : %'" PRIu64 "\n"
 					"\n"
 					, type,  name, id
-					, io.submit_q.submit_avg.cnt
-					, avgrdy, avgcsm
-					, io.submit_q.helped, io.submit_q.leader, io.submit_q.busy
-					, io.submit_q.look_avg.cnt
-					, lavgv, lavgb
-					, io.submit_q.alloc_avg.cnt
-					, aavgv, aavgb
-					, io.complete_q.completed_avg.cnt
-					, ((double)io.complete_q.completed_avg.val) / io.complete_q.completed_avg.cnt
-					, io.complete_q.blocks
+					, io.alloc.fast, io.alloc.slow, avgfasta
+					, io.alloc.fail, io.alloc.revoke, io.alloc.block
+					, io.submit.fast, io.submit.slow, avgfasts
+					, io.flush.external
+					, io.calls.flush, io.calls.drain, io.calls.errors.busy
+					, io.calls.submitted, avgsubs
+					, io.calls.completed, avgcomp
+					, io.poller.sleeps
 				);
 			}
Index: libcfa/src/concurrency/stats.hfa
===================================================================
--- libcfa/src/concurrency/stats.hfa	(revision 182256b4aa148c77a64e9731dedbd8a503973f2f)
+++ libcfa/src/concurrency/stats.hfa	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -66,31 +66,29 @@
 		struct __attribute__((aligned(64))) __stats_io_t{
 			struct {
+				volatile uint64_t fast;
+				volatile uint64_t slow;
+				volatile uint64_t fail;
+				volatile uint64_t revoke;
+				volatile uint64_t block;
+			} alloc;
+			struct {
+				volatile uint64_t fast;
+				volatile uint64_t slow;
+			} submit;
+			struct {
+				volatile uint64_t external;
+			} flush;
+			struct {
+				volatile uint64_t drain;
+				volatile uint64_t completed;
+				volatile uint64_t flush;
+				volatile uint64_t submitted;
 				struct {
-					volatile uint64_t rdy;
-					volatile uint64_t csm;
-					volatile uint64_t avl;
-					volatile uint64_t cnt;
-				} submit_avg;
-				struct {
-					volatile uint64_t val;
-					volatile uint64_t cnt;
-					volatile uint64_t block;
-				} look_avg;
-				struct {
-					volatile uint64_t val;
-					volatile uint64_t cnt;
-					volatile uint64_t block;
-				} alloc_avg;
-				volatile uint64_t helped;
-				volatile uint64_t leader;
-				volatile uint64_t busy;
-			} submit_q;
+					volatile uint64_t busy;
+				} errors;
+			} calls;
 			struct {
-				struct {
-					volatile uint64_t val;
-					volatile uint64_t cnt;
-				} completed_avg;
-				volatile uint64_t blocks;
-			} complete_q;
+				volatile uint64_t sleeps;
+			} poller;
 		};
 	#endif
Index: tests/Makefile.am
===================================================================
--- tests/Makefile.am	(revision 182256b4aa148c77a64e9731dedbd8a503973f2f)
+++ tests/Makefile.am	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -44,6 +44,5 @@
 	-Wall \
 	-Wno-unused-function \
-	-quiet @CFA_FLAGS@ \
-	-DIN_DIR="${abs_srcdir}/.in/"
+	-quiet @CFA_FLAGS@
 
 AM_CFAFLAGS = -XCFA --deterministic-out
@@ -76,4 +75,5 @@
 	long_tests.hfa \
 	.in/io.data \
+	io/.in/io.data \
 	avltree/avl.h \
 	avltree/avl-private.h \
@@ -142,5 +142,5 @@
 # don't use distcc to do the linking because distcc doesn't do linking
 % : %.cfa $(CFACCBIN)
-	$(CFACOMPILETEST) -c -o $(abspath ${@}).o
+	$(CFACOMPILETEST) -c -o $(abspath ${@}).o -DIN_DIR="$(abspath $(dir ${<}))/.in/"
 	$(CFACCLINK) ${@}.o -o $(abspath ${@})
 	rm $(abspath ${@}).o
Index: tests/io/.expect/many_read.txt
===================================================================
--- tests/io/.expect/many_read.txt	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
+++ tests/io/.expect/many_read.txt	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -0,0 +1,5 @@
+File has size: 72 bytes
+This is the content of the file to read, it's pretty self explanatory!
+
+
+Yup, it all matches.
Index: tests/io/.in/io.data
===================================================================
--- tests/io/.in/io.data	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
+++ tests/io/.in/io.data	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -0,0 +1,2 @@
+This is the content of the file to read, it's pretty self explanatory!
+
Index: tests/io/many_read.cfa
===================================================================
--- tests/io/many_read.cfa	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
+++ tests/io/many_read.cfa	(revision 266ecf1eace6e01cf462c6e47ac0538f28f02825)
@@ -0,0 +1,125 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2021 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// many_read.cfa -- Make sure that multiple concurrent reads to mess up.
+//
+// Author           : Thierry Delisle
+// Created On       : Thu Feb 18 15:26:05 2021
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+extern "C" {
+	#include <sys/types.h>
+	#include <sys/stat.h>
+	#include <fcntl.h>
+	#include <unistd.h>
+}
+
+#include <string.h>
+
+#include <fstream.hfa>
+#include <iofwd.hfa>
+
+#include <thread.hfa>
+#include <kernel.hfa>
+
+#define xstr(s) str(s)
+#define str(s) #s
+
+const char * file = xstr(IN_DIR) "io.data";
+int fd;
+off_t size;
+
+const char * content;
+
+thread reader {};
+void ?{}(reader & this) {}
+void ^?{}(reader & mutex this) {}
+void main(reader & this);
+
+int main() {
+	fd = open(file, 0);
+	if(fd < 0) {
+		serr | "Failed to open file: " | file;
+		exit(1);
+	}
+
+	struct stat sb;
+	int ret = fstat(fd, &sb);
+	if(ret < 0) {
+		serr | "Failed to stat file: " | file;
+		exit(1);
+	}
+
+	size = sb.st_size;
+	sout | "File has size: " | size | "bytes";
+
+	char data[size+1];
+	{
+		size_t left = size;
+		char * it = data;
+
+		while(left) {
+			ret = read(fd, it, left);
+			if(ret < 0) {
+				serr | "Failed to read file: " | file | " (the first time)";
+				exit(1);
+			}
+			it += ret;
+			left -= ret;
+		}
+	}
+	data[size] = 0;
+	content = data;
+	sout | content;
+
+	{
+		processor proc[2];
+		{
+			reader readers[17];
+		}
+	}
+
+	sout | "Yup, it all matches.";
+}
+
+#ifdef CFA_HAVE_PREADV2
+	extern "C" {
+		#include <sys/uio.h>
+	}
+#endif
+
+void main(reader & this) {
+	for(50) {
+		char data[size];
+		char * it = data;
+		size_t left = size;
+		off_t offset = 0;
+
+		#ifdef CFA_HAVE_PREADV2
+			while(left) {
+				struct iovec vec;
+				vec.iov_base = (void*)it;
+				vec.iov_len = left;
+				int ret = cfa_preadv2(fd, &vec, 1, offset, 0, 0);
+				if(ret < 0) {
+					serr | "Failed to read file: " | file | " (NOT the first time)";
+					exit(1);
+				}
+				it += ret;
+				left -= ret;
+				offset += ret;
+			}
+
+			if(strncmp(content, data, size) != 0) {
+				serr | "Subsequent read of file '" | file | "' return different content";
+				exit(1);
+			}
+		#endif
+	}
+}