Index: libcfa/src/concurrency/io.cfa
===================================================================
--- libcfa/src/concurrency/io.cfa	(revision 77fde9d52d93437c80f050335facd87a71c4b34e)
+++ libcfa/src/concurrency/io.cfa	(revision 426f60c4047b31cdf0e37c53aec7722f2f9b4c8e)
@@ -32,4 +32,5 @@
 	extern "C" {
 		#include <sys/epoll.h>
+		#include <sys/eventfd.h>
 		#include <sys/syscall.h>
 
@@ -41,4 +42,42 @@
 	#include "kernel/fwd.hfa"
 	#include "io/types.hfa"
+
+	static const char * opcodes[] = {
+		"OP_NOP",
+		"OP_READV",
+		"OP_WRITEV",
+		"OP_FSYNC",
+		"OP_READ_FIXED",
+		"OP_WRITE_FIXED",
+		"OP_POLL_ADD",
+		"OP_POLL_REMOVE",
+		"OP_SYNC_FILE_RANGE",
+		"OP_SENDMSG",
+		"OP_RECVMSG",
+		"OP_TIMEOUT",
+		"OP_TIMEOUT_REMOVE",
+		"OP_ACCEPT",
+		"OP_ASYNC_CANCEL",
+		"OP_LINK_TIMEOUT",
+		"OP_CONNECT",
+		"OP_FALLOCATE",
+		"OP_OPENAT",
+		"OP_CLOSE",
+		"OP_FILES_UPDATE",
+		"OP_STATX",
+		"OP_READ",
+		"OP_WRITE",
+		"OP_FADVISE",
+		"OP_MADVISE",
+		"OP_SEND",
+		"OP_RECV",
+		"OP_OPENAT2",
+		"OP_EPOLL_CTL",
+		"OP_SPLICE",
+		"OP_PROVIDE_BUFFERS",
+		"OP_REMOVE_BUFFERS",
+		"OP_TEE",
+		"INVALID_OP"
+	};
 
 	// returns true of acquired as leader or second leader
@@ -159,5 +198,7 @@
 	static __u32 __release_consumed_submission( struct __io_data & ring );
 
-	static inline void process(struct io_uring_cqe & cqe ) {
+	// Process a single completion message from the io_uring
+	// This is NOT thread-safe
+	static inline void process( volatile struct io_uring_cqe & cqe ) {
 		struct io_future_t * future = (struct io_future_t *)(uintptr_t)cqe.user_data;
 		__cfadbg_print_safe( io, "Kernel I/O : Syscall completed : cqe %p, result %d for %p\n", &cqe, cqe.res, future );
@@ -166,6 +207,4 @@
 	}
 
-	// Process a single completion message from the io_uring
-	// This is NOT thread-safe
 	static [int, bool] __drain_io( & struct __io_data ring ) {
 		/* paranoid */ verify( ! __preemption_enabled() );
@@ -193,4 +232,6 @@
 		}
 
+		__atomic_thread_fence( __ATOMIC_SEQ_CST );
+
 		// Release the consumed SQEs
 		__release_consumed_submission( ring );
@@ -210,5 +251,5 @@
 		for(i; count) {
 			unsigned idx = (head + i) & mask;
-			struct io_uring_cqe & cqe = ring.completion_q.cqes[idx];
+			volatile struct io_uring_cqe & cqe = ring.completion_q.cqes[idx];
 
 			/* paranoid */ verify(&cqe);
@@ -219,6 +260,5 @@
 		// Mark to the kernel that the cqe has been seen
 		// Ensure that the kernel only sees the new value of the head index after the CQEs have been read.
-		__atomic_thread_fence( __ATOMIC_SEQ_CST );
-		__atomic_fetch_add( ring.completion_q.head, count, __ATOMIC_RELAXED );
+		__atomic_fetch_add( ring.completion_q.head, count, __ATOMIC_SEQ_CST );
 
 		return [count, count > 0 || to_submit > 0];
@@ -229,5 +269,5 @@
 		__ioctx_register( this, ev );
 
-		__cfadbg_print_safe(io_core, "Kernel I/O : IO poller %p for ring %p ready\n", &this, &this.ring);
+		__cfadbg_print_safe(io_core, "Kernel I/O : IO poller %d (%p) ready\n", this.ring->fd, &this);
 
 		const int reset_cnt = 5;
@@ -257,5 +297,5 @@
 			}
 
-			// We alread failed to find events a few time.
+			// We alread failed to find completed entries a few time.
 			if(reset == 1) {
 				// Rearm the context so it can block
@@ -270,14 +310,17 @@
 					io.complete_q.blocks += 1;
 				)
-				__cfadbg_print_safe(io_core, "Kernel I/O : Parking io poller %p\n", &this.self);
+				__cfadbg_print_safe(io_core, "Kernel I/O : Parking io poller %d (%p)\n", this.ring->fd, &this);
 
 				// block this thread
 				wait( this.sem );
 
+				eventfd_t v;
+				eventfd_read(this.ring->efd, &v);
+
 			// restore counter
 			reset = reset_cnt;
 		}
 
-		__cfadbg_print_safe(io_core, "Kernel I/O : Fast poller for ring %p stopping\n", &this.ring);
+		__cfadbg_print_safe(io_core, "Kernel I/O : Fast poller %d (%p) stopping\n", this.ring->fd, &this);
 	}
 
@@ -302,5 +345,10 @@
 //
 
-	[* struct io_uring_sqe, __u32] __submit_alloc( struct __io_data & ring, __u64 data ) {
+	// Allocate an submit queue entry.
+	// The kernel cannot see these entries until they are submitted, but other threads must be
+	// able to see which entries can be used and which are already un used by an other thread
+	// for convenience, return both the index and the pointer to the sqe
+	// sqe == &sqes[idx]
+	[* volatile struct io_uring_sqe, __u32] __submit_alloc( struct __io_data & ring, __u64 data ) {
 		/* paranoid */ verify( data != 0 );
 
@@ -317,9 +365,10 @@
 			// Look through the list starting at some offset
 			for(i; cnt) {
-				__u64 expected = 0;
-				__u32 idx = (i + off) & mask;
-				struct io_uring_sqe * sqe = &ring.submit_q.sqes[idx];
+				__u64 expected = 3;
+				__u32 idx = (i + off) & mask; // Get an index from a random
+				volatile struct io_uring_sqe * sqe = &ring.submit_q.sqes[idx];
 				volatile __u64 * udata = &sqe->user_data;
 
+				// Allocate the entry by CASing the user_data field from 0 to the future address
 				if( *udata == expected &&
 					__atomic_compare_exchange_n( udata, &expected, data, true, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED ) )
@@ -332,15 +381,31 @@
 					)
 
+					// debug log
 					__cfadbg_print_safe( io, "Kernel I/O : allocated [%p, %u] for %p (%p)\n", sqe, idx, active_thread(), (void*)data );
 
 					// Success return the data
+					sqe->opcode = 0;
+					sqe->flags = 0;
+					sqe->ioprio = 0;
+					sqe->fd = 0;
+					sqe->off = 0;
+					sqe->addr = 0;
+					sqe->len = 0;
+					sqe->accept_flags = 0;
+					sqe->__pad2[0] = 0;
+					sqe->__pad2[1] = 0;
+					sqe->__pad2[2] = 0;
 					return [sqe, idx];
 				}
 				verify(expected != data);
 
+				// This one was used
 				len ++;
 			}
 
 			block++;
+
+			abort( "Kernel I/O : all submit queue entries used, yielding\n" );
+
 			yield();
 		}
@@ -390,7 +455,42 @@
 
 	void __submit( struct io_context * ctx, __u32 idx ) __attribute__((nonnull (1))) {
-		__cfadbg_print_safe( io, "Kernel I/O : submitting %u for %p\n", idx, active_thread() );
-
 		__io_data & ring = *ctx->thrd.ring;
+
+		{
+			__attribute__((unused)) volatile struct io_uring_sqe * sqe = &ring.submit_q.sqes[idx];
+			__cfadbg_print_safe( io,
+				"Kernel I/O : submitting %u (%p) for %p\n"
+				"    data: %p\n"
+				"    opcode: %s\n"
+				"    fd: %d\n"
+				"    flags: %d\n"
+				"    prio: %d\n"
+				"    off: %p\n"
+				"    addr: %p\n"
+				"    len: %d\n"
+				"    other flags: %d\n"
+				"    splice fd: %d\n"
+				"    pad[0]: %llu\n"
+				"    pad[1]: %llu\n"
+				"    pad[2]: %llu\n",
+				idx, sqe,
+				active_thread(),
+				(void*)sqe->user_data,
+				opcodes[sqe->opcode],
+				sqe->fd,
+				sqe->flags,
+				sqe->ioprio,
+				sqe->off,
+				sqe->addr,
+				sqe->len,
+				sqe->accept_flags,
+				sqe->splice_fd_in,
+				sqe->__pad2[0],
+				sqe->__pad2[1],
+				sqe->__pad2[2]
+			);
+		}
+
+
 		// Get now the data we definetely need
 		volatile __u32 * const tail = ring.submit_q.tail;
@@ -475,5 +575,6 @@
 			__cfadbg_print_safe( io, "Kernel I/O : submitted %u (among %u) for %p\n", idx, ret, active_thread() );
 		}
-		else {
+		else
+		{
 			// get mutual exclusion
 			#if defined(LEADER_LOCK)
@@ -483,5 +584,5 @@
 			#endif
 
-			/* paranoid */ verifyf( ring.submit_q.sqes[ idx ].user_data != 0,
+			/* paranoid */ verifyf( ring.submit_q.sqes[ idx ].user_data != 3ul64,
 			/* paranoid */ 	"index %u already reclaimed\n"
 			/* paranoid */ 	"head %u, prev %u, tail %u\n"
@@ -510,4 +611,6 @@
 			}
 
+			/* paranoid */ verify(ret == 1);
+
 			// update statistics
 			__STATS__( false,
@@ -516,6 +619,45 @@
 			)
 
+			{
+				__attribute__((unused)) volatile __u32 * const head = ring.submit_q.head;
+				__attribute__((unused)) __u32 last_idx = ring.submit_q.array[ ((*head) - 1) & mask ];
+				__attribute__((unused)) volatile struct io_uring_sqe * sqe = &ring.submit_q.sqes[last_idx];
+
+				__cfadbg_print_safe( io,
+					"Kernel I/O : last submitted is %u (%p)\n"
+					"    data: %p\n"
+					"    opcode: %s\n"
+					"    fd: %d\n"
+					"    flags: %d\n"
+					"    prio: %d\n"
+					"    off: %p\n"
+					"    addr: %p\n"
+					"    len: %d\n"
+					"    other flags: %d\n"
+					"    splice fd: %d\n"
+					"    pad[0]: %llu\n"
+					"    pad[1]: %llu\n"
+					"    pad[2]: %llu\n",
+					last_idx, sqe,
+					(void*)sqe->user_data,
+					opcodes[sqe->opcode],
+					sqe->fd,
+					sqe->flags,
+					sqe->ioprio,
+					sqe->off,
+					sqe->addr,
+					sqe->len,
+					sqe->accept_flags,
+					sqe->splice_fd_in,
+					sqe->__pad2[0],
+					sqe->__pad2[1],
+					sqe->__pad2[2]
+				);
+			}
+
+			__atomic_thread_fence( __ATOMIC_SEQ_CST );
 			// Release the consumed SQEs
 			__release_consumed_submission( ring );
+			// ring.submit_q.sqes[idx].user_data = 3ul64;
 
 			#if defined(LEADER_LOCK)
@@ -525,9 +667,12 @@
 			#endif
 
-			__cfadbg_print_safe( io, "Kernel I/O : Performed io_submit for %p, returned %d\n", active_thread(), ret );
+			__cfadbg_print_safe( io, "Kernel I/O : submitted %u for %p\n", idx, active_thread() );
 		}
 	}
 
 	// #define PARTIAL_SUBMIT 32
+
+	// go through the list of submissions in the ready array and moved them into
+	// the ring's submit queue
 	static unsigned __collect_submitions( struct __io_data & ring ) {
 		/* paranoid */ verify( ring.submit_q.ready != 0p );
@@ -570,17 +715,42 @@
 	}
 
+	// Go through the ring's submit queue and release everything that has already been consumed
+	// by io_uring
 	static __u32 __release_consumed_submission( struct __io_data & ring ) {
 		const __u32 smask = *ring.submit_q.mask;
 
+		// We need to get the lock to copy the old head and new head
 		if( !try_lock(ring.submit_q.release_lock __cfaabi_dbg_ctx2) ) return 0;
-		__u32 chead = *ring.submit_q.head;
-		__u32 phead = ring.submit_q.prev_head;
-		ring.submit_q.prev_head = chead;
+		__attribute__((unused))
+		__u32 ctail = *ring.submit_q.tail;        // get the current tail of the queue
+		__u32 chead = *ring.submit_q.head;		// get the current head of the queue
+		__u32 phead = ring.submit_q.prev_head;	// get the head the last time we were here
+		ring.submit_q.prev_head = chead;		// note up to were we processed
 		unlock(ring.submit_q.release_lock);
 
+		// the 3 fields are organized like this diagram
+		// except it's are ring
+		// ---+--------+--------+----
+		// ---+--------+--------+----
+		//    ^        ^        ^
+		// phead    chead    ctail
+
+		// make sure ctail doesn't wrap around and reach phead
+		/* paranoid */ verify(
+			   (ctail >= chead && chead >= phead)
+			|| (chead >= phead && phead >= ctail)
+			|| (phead >= ctail && ctail >= chead)
+		);
+
+		// find the range we need to clear
 		__u32 count = chead - phead;
+
+		// We acquired an previous-head/current-head range
+		// go through the range and release the sqes
 		for( i; count ) {
 			__u32 idx = ring.submit_q.array[ (phead + i) & smask ];
-			ring.submit_q.sqes[ idx ].user_data = 0;
+
+			/* paranoid */ verify( 0 != ring.submit_q.sqes[ idx ].user_data );
+			ring.submit_q.sqes[ idx ].user_data = 3ul64;
 		}
 		return count;
Index: libcfa/src/concurrency/io/call.cfa.in
===================================================================
--- libcfa/src/concurrency/io/call.cfa.in	(revision 77fde9d52d93437c80f050335facd87a71c4b34e)
+++ libcfa/src/concurrency/io/call.cfa.in	(revision 426f60c4047b31cdf0e37c53aec7722f2f9b4c8e)
@@ -74,5 +74,5 @@
 	;
 
-	extern [* struct io_uring_sqe, __u32] __submit_alloc( struct __io_data & ring, __u64 data );
+	extern [* volatile struct io_uring_sqe, __u32] __submit_alloc( struct __io_data & ring, __u64 data );
 	extern void __submit( struct io_context * ctx, __u32 idx ) __attribute__((nonnull (1)));
 
@@ -221,5 +221,5 @@
 
 		__u32 idx;
-		struct io_uring_sqe * sqe;
+		volatile struct io_uring_sqe * sqe;
 		[sqe, idx] = __submit_alloc( ring, (__u64)(uintptr_t)&future );
 
@@ -314,4 +314,5 @@
 	Call('ACCEPT', 'int accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags)', {
 		'fd': 'sockfd',
+		'ioprio': '0',
 		'addr': '(__u64)addr',
 		'addr2': '(__u64)addrlen',
@@ -373,4 +374,5 @@
 	Call('READ', 'ssize_t read(int fd, void * buf, size_t count)', {
 		'fd': 'fd',
+		'off': '0',
 		'addr': '(__u64)buf',
 		'len': 'count'
@@ -379,4 +381,5 @@
 	Call('WRITE', 'ssize_t write(int fd, void * buf, size_t count)', {
 		'fd': 'fd',
+		'off': '0',
 		'addr': '(__u64)buf',
 		'len': 'count'
@@ -477,5 +480,5 @@
 
 		__u32 idx;
-		struct io_uring_sqe * sqe;
+		volatile struct io_uring_sqe * sqe;
 		[sqe, idx] = __submit_alloc( ring, (__u64)(uintptr_t)&future );
 
Index: libcfa/src/concurrency/io/setup.cfa
===================================================================
--- libcfa/src/concurrency/io/setup.cfa	(revision 77fde9d52d93437c80f050335facd87a71c4b34e)
+++ libcfa/src/concurrency/io/setup.cfa	(revision 426f60c4047b31cdf0e37c53aec7722f2f9b4c8e)
@@ -52,4 +52,5 @@
 		#include <pthread.h>
 		#include <sys/epoll.h>
+		#include <sys/eventfd.h>
 		#include <sys/mman.h>
 		#include <sys/syscall.h>
@@ -185,5 +186,5 @@
 				$io_ctx_thread * io_ctx = ($io_ctx_thread *)(uintptr_t)events[i].data.u64;
 				/* paranoid */ verify( io_ctx );
-				__cfadbg_print_safe(io_core, "Kernel I/O - epoll : Unparking io poller %p\n", io_ctx);
+				__cfadbg_print_safe(io_core, "Kernel I/O - epoll : Unparking io poller %d (%p)\n", io_ctx->ring->fd, io_ctx);
 				#if !defined( __CFA_NO_STATISTICS__ )
 					__cfaabi_tls.this_stats = io_ctx->self.curr_cluster->stats;
@@ -309,4 +310,7 @@
 	}
 
+	extern void signal_unblock( int sig );
+	extern void signal_block  ( int sig );
+
 	static void __io_create( __io_data & this, const io_context_params & params_in ) {
 		// Step 1 : call to setup
@@ -377,5 +381,9 @@
 			abort("KERNEL ERROR: IO_URING MMAP3 - %s\n", strerror(errno));
 		}
-
+		memset(sq.sqes, 0xde, size);
+
+		verify( 0 != (params.features & IORING_FEAT_NODROP) );
+
+		// Step 3 : Initialize the data structure
 		// Get the pointers from the kernel to fill the structure
 		// submit queue
@@ -392,5 +400,6 @@
 			const __u32 num = *sq.num;
 			for( i; num ) {
-				sq.sqes[i].user_data = 0ul64;
+				sq.sqes[i].opcode = IORING_OP_LAST;
+				sq.sqes[i].user_data = 3ul64;
 			}
 		}
@@ -422,4 +431,19 @@
 		cq.cqes = (struct io_uring_cqe *)(((intptr_t)cq.ring_ptr) + params.cq_off.cqes);
 
+		signal_block( SIGUSR1 );
+
+		// Step 4 : eventfd
+		int efd = eventfd(0, 0);
+		if (efd < 0) {
+			abort("KERNEL ERROR: IO_URING EVENTFD - %s\n", strerror(errno));
+		}
+
+		int ret = syscall( __NR_io_uring_register, fd, IORING_REGISTER_EVENTFD, &efd, 1);
+		if (ret < 0) {
+			abort("KERNEL ERROR: IO_URING EVENTFD REGISTER - %s\n", strerror(errno));
+		}
+
+		signal_unblock( SIGUSR1 );
+
 		// some paranoid checks
 		/* paranoid */ verifyf( (*cq.mask) == ((*cq.num) - 1ul32), "IO_URING Expected mask to be %u (%u entries), was %u", (*cq.num) - 1ul32, *cq.num, *cq.mask  );
@@ -436,4 +460,5 @@
 		this.ring_flags = params.flags;
 		this.fd         = fd;
+		this.efd        = efd;
 		this.eager_submits  = params_in.eager_submits;
 		this.poller_submits = params_in.poller_submits;
@@ -458,4 +483,5 @@
 		// close the file descriptor
 		close(this.fd);
+		close(this.efd);
 
 		free( this.submit_q.ready ); // Maybe null, doesn't matter
@@ -467,7 +493,7 @@
 
 	void __ioctx_register($io_ctx_thread & ctx, struct epoll_event & ev) {
-		ev.events = EPOLLIN | EPOLLONESHOT;
+		ev.events = EPOLLIN | EPOLLET | EPOLLONESHOT;
 		ev.data.u64 = (__u64)&ctx;
-		int ret = epoll_ctl(iopoll.epollfd, EPOLL_CTL_ADD, ctx.ring->fd, &ev);
+		int ret = epoll_ctl(iopoll.epollfd, EPOLL_CTL_ADD, ctx.ring->efd, &ev);
 		if (ret < 0) {
 			abort( "KERNEL ERROR: EPOLL ADD - (%d) %s\n", (int)errno, strerror(errno) );
@@ -476,6 +502,6 @@
 
 	void __ioctx_prepare_block($io_ctx_thread & ctx, struct epoll_event & ev) {
-		__cfadbg_print_safe(io_core, "Kernel I/O - epoll : Re-arming io poller %p\n", &ctx);
-		int ret = epoll_ctl(iopoll.epollfd, EPOLL_CTL_MOD, ctx.ring->fd, &ev);
+		__cfadbg_print_safe(io_core, "Kernel I/O - epoll : Re-arming io poller %d (%p)\n", ctx.ring->fd, &ctx);
+		int ret = epoll_ctl(iopoll.epollfd, EPOLL_CTL_MOD, ctx.ring->efd, &ev);
 		if (ret < 0) {
 			abort( "KERNEL ERROR: EPOLL REARM - (%d) %s\n", (int)errno, strerror(errno) );
Index: libcfa/src/concurrency/io/types.hfa
===================================================================
--- libcfa/src/concurrency/io/types.hfa	(revision 77fde9d52d93437c80f050335facd87a71c4b34e)
+++ libcfa/src/concurrency/io/types.hfa	(revision 426f60c4047b31cdf0e37c53aec7722f2f9b4c8e)
@@ -65,5 +65,5 @@
 
 		// A buffer of sqes (not the actual ring)
-		struct io_uring_sqe * sqes;
+		volatile struct io_uring_sqe * sqes;
 
 		// The location and size of the mmaped area
@@ -85,5 +85,5 @@
 
 		// the kernel ring
-		struct io_uring_cqe * cqes;
+		volatile struct io_uring_cqe * cqes;
 
 		// The location and size of the mmaped area
@@ -97,4 +97,5 @@
 		__u32 ring_flags;
 		int fd;
+		int efd;
 		bool eager_submits:1;
 		bool poller_submits:1;
