Index: benchmark/io/readv.cfa
===================================================================
--- benchmark/io/readv.cfa	(revision ab444132862e18cf726d745d1fe520843b493819)
+++ benchmark/io/readv.cfa	(revision 792215831299879512200cd21ecb49df3a642367)
@@ -1,2 +1,4 @@
+#define _GNU_SOURCE
+
 #include <stdlib.h>
 #include <stdio.h>
@@ -22,4 +24,6 @@
 extern bool traceHeapOn();
 extern ssize_t cfa_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags);
+extern ssize_t cfa_preadv2_fixed(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags);
+extern void register_fixed_files( cluster &, int *, unsigned count );
 
 int fd;
@@ -28,8 +32,18 @@
 
 unsigned long int buflen = 50;
+bool fixed_file = false;
 
 thread __attribute__((aligned(128))) Reader {};
 void ?{}( Reader & this ) {
 	((thread&)this){ "Reader Thread", *the_benchmark_cluster };
+}
+
+int do_read(int fd, struct iovec * iov) {
+	if(fixed_file) {
+		return cfa_preadv2_fixed(fd, iov, 1, 0, 0);
+	}
+	else {
+		return cfa_preadv2(fd, iov, 1, 0, 0);
+	}
 }
 
@@ -42,5 +56,5 @@
 
 	while(__atomic_load_n(&run, __ATOMIC_RELAXED)) {
-		int r = cfa_preadv2(fd, &iov, 1, 0, 0);
+		int r = do_read(fd, &iov);
 		if(r < 0) abort("%s\n", strerror(-r));
 
@@ -52,4 +66,5 @@
 	BENCH_DECL
 	unsigned flags = 0;
+	int file_flags = 0;
 	unsigned sublen = 16;
 
@@ -96,7 +111,9 @@
 			case 'k':
 				flags |= CFA_CLUSTER_IO_KERNEL_POLL_SUBMITS;
+				fixed_file = true;
 				break;
 			case 'i':
 				flags |= CFA_CLUSTER_IO_KERNEL_POLL_COMPLETES;
+				file_flags |= O_DIRECT;
 				break;
 			case 'l':
@@ -123,6 +140,6 @@
 	}
 
-	fd = open(__FILE__, 0);
-	if(fd < 0) {
+	int lfd = open(__FILE__, file_flags);
+	if(lfd < 0) {
 		fprintf(stderr, "Could not open source file\n");
 		exit(EXIT_FAILURE);
@@ -134,4 +151,13 @@
 		Time start, end;
 		BenchCluster cl = { flags, CFA_STATS_READY_Q | CFA_STATS_IO };
+
+		if(fixed_file) {
+			fd = 0;
+			register_fixed_files( cl.self, &lfd, 1 );
+		}
+		else {
+			fd = lfd;
+		}
+
 		{
 			BenchProc procs[nprocs];
@@ -161,4 +187,4 @@
 	}
 
-	close(fd);
+	close(lfd);
 }
Index: libcfa/src/concurrency/io.cfa
===================================================================
--- libcfa/src/concurrency/io.cfa	(revision ab444132862e18cf726d745d1fe520843b493819)
+++ libcfa/src/concurrency/io.cfa	(revision 792215831299879512200cd21ecb49df3a642367)
@@ -14,6 +14,8 @@
 //
 
-// #define __CFA_DEBUG_PRINT_IO__
-// #define __CFA_DEBUG_PRINT_IO_CORE__
+#if defined(__CFA_DEBUG__)
+	// #define __CFA_DEBUG_PRINT_IO__
+	#define __CFA_DEBUG_PRINT_IO_CORE__
+#endif
 
 #include "kernel.hfa"
@@ -325,5 +327,5 @@
 
 		// Create the poller thread
-		__cfadbg_print_safe(io_core, "Kernel I/O : Creating slow poller for cluter %p\n", &this);
+		__cfadbg_print_safe(io_core, "Kernel I/O : Creating slow poller for cluster %p\n", &this);
 		this.io->poller.slow.blocked = false;
 		this.io->poller.slow.stack = __create_pthread( &this.io->poller.slow.kthrd, __io_poller_slow, &this );
@@ -430,4 +432,56 @@
 	}
 
+	int __io_uring_enter( struct __io_data & ring, unsigned to_submit, bool get, sigset_t * mask ) {
+		bool need_sys_to_submit = false;
+		bool need_sys_to_complete = false;
+		unsigned min_complete = 0;
+		unsigned flags = 0;
+
+
+		TO_SUBMIT:
+		if( to_submit > 0 ) {
+			if( !(ring.ring_flags & IORING_SETUP_SQPOLL) ) {
+				need_sys_to_submit = true;
+				break TO_SUBMIT;
+			}
+			if( (*ring.submit_q.flags) & IORING_SQ_NEED_WAKEUP ) {
+				need_sys_to_submit = true;
+				flags |= IORING_ENTER_SQ_WAKEUP;
+			}
+		}
+
+		TO_COMPLETE:
+		if( get && !(ring.ring_flags & IORING_SETUP_SQPOLL) ) {
+			flags |= IORING_ENTER_GETEVENTS;
+			if( mask ) {
+				need_sys_to_complete = true;
+				min_complete = 1;
+				break TO_COMPLETE;
+			}
+			if( (ring.ring_flags & IORING_SETUP_IOPOLL) ) {
+				need_sys_to_complete = true;
+			}
+		}
+
+		int ret = 0;
+		if( need_sys_to_submit || need_sys_to_complete ) {
+			ret = syscall( __NR_io_uring_enter, ring.fd, to_submit, min_complete, flags, mask, _NSIG / 8);
+			if( ret < 0 ) {
+				switch((int)errno) {
+				case EAGAIN:
+				case EINTR:
+					ret = -1;
+					break;
+				default:
+					abort( "KERNEL ERROR: IO_URING SYSCALL - (%d) %s\n", (int)errno, strerror(errno) );
+				}
+			}
+		}
+
+		// Memory barrier
+		__atomic_thread_fence( __ATOMIC_SEQ_CST );
+		return ret;
+	}
+
 //=============================================================================================
 // I/O Polling
@@ -438,5 +492,5 @@
 	// Process a single completion message from the io_uring
 	// This is NOT thread-safe
-	static [int, bool] __drain_io( & struct __io_data ring, * sigset_t mask, int waitcnt, bool in_kernel ) {
+	static [int, bool] __drain_io( & struct __io_data ring, * sigset_t mask ) {
 		/* paranoid */ verify( !kernelTLS.preemption_state.enabled );
 
@@ -447,20 +501,11 @@
 		}
 
-		if (to_submit > 0 || waitcnt > 0) {
-			int ret = syscall( __NR_io_uring_enter, ring.fd, to_submit, waitcnt, IORING_ENTER_GETEVENTS, mask, _NSIG / 8);
-			if( ret < 0 ) {
-				switch((int)errno) {
-				case EAGAIN:
-				case EINTR:
-					return [0, true];
-				default:
-					abort( "KERNEL ERROR: IO_URING WAIT - %s\n", strerror(errno) );
-				}
-			}
-
-			// Release the consumed SQEs
-			__release_consumed_submission( ring );
-
-			// update statistics
+		int ret = __io_uring_enter(ring, to_submit, true, mask);
+		if( ret < 0 ) {
+			return [0, true];
+		}
+
+		// update statistics
+		if (to_submit > 0) {
 			__STATS__( true,
 				if( to_submit > 0 ) {
@@ -472,6 +517,6 @@
 		}
 
-		// Memory barrier
-		__atomic_thread_fence( __ATOMIC_SEQ_CST );
+		// Release the consumed SQEs
+		__release_consumed_submission( ring );
 
 		// Drain the queue
@@ -497,6 +542,6 @@
 
 			data->result = cqe.res;
-			if(!in_kernel) { unpark( data->thrd __cfaabi_dbg_ctx2 ); }
-			else         { __unpark( &ring.poller.slow.id, data->thrd __cfaabi_dbg_ctx2 ); }
+			if(!mask) { unpark( data->thrd __cfaabi_dbg_ctx2 ); }
+			else      { __unpark( &ring.poller.slow.id, data->thrd __cfaabi_dbg_ctx2 ); }
 		}
 
@@ -546,5 +591,5 @@
 				int count;
 				bool again;
-				[count, again] = __drain_io( ring, &mask, 1, true );
+				[count, again] = __drain_io( ring, &mask );
 
 				__atomic_store_n( &ring.poller.slow.blocked, false, __ATOMIC_SEQ_CST );
@@ -568,5 +613,5 @@
 				int count;
 				bool again;
-				[count, again] = __drain_io( ring, &mask, 1, true );
+				[count, again] = __drain_io( ring, &mask );
 
 				// Update statistics
@@ -606,5 +651,5 @@
 			bool again;
 			disable_interrupts();
-				[count, again] = __drain_io( *this.ring, 0p, 0, false );
+				[count, again] = __drain_io( *this.ring, 0p );
 
 				if(!again) reset++;
@@ -800,17 +845,11 @@
 			// We got the lock
 			unsigned to_submit = __collect_submitions( ring );
-			int ret = syscall( __NR_io_uring_enter, ring.fd, to_submit, 0, 0, 0p, _NSIG / 8);
+			int ret = __io_uring_enter( ring, to_submit, false, 0p );
 			if( ret < 0 ) {
-				switch((int)errno) {
-				case EAGAIN:
-				case EINTR:
-					unlock(ring.submit_q.lock);
-					return;
-				default:
-					abort( "KERNEL ERROR: IO_URING WAIT - %s\n", strerror(errno) );
-				}
-			}
-
-			/* paranoid */ verify( ret > 0 );
+				unlock(ring.submit_q.lock);
+				return;
+			}
+
+			/* paranoid */ verify( ret > 0 || (ring.ring_flags & IORING_SETUP_SQPOLL) );
 
 			// Release the consumed SQEs
@@ -830,15 +869,24 @@
 			lock(ring.submit_q.lock __cfaabi_dbg_ctx2);
 
+			/* paranoid */ verifyf( ring.submit_q.sqes[ idx ].user_data != 0,
+			/* paranoid */ 	"index %u already reclaimed\n"
+			/* paranoid */ 	"head %u, prev %u, tail %u\n"
+			/* paranoid */ 	"[-0: %u,-1: %u,-2: %u,-3: %u]\n",
+			/* paranoid */ 	idx,
+			/* paranoid */ 	*ring.submit_q.head, ring.submit_q.prev_head, *tail
+			/* paranoid */ 	,ring.submit_q.array[ ((*ring.submit_q.head) - 0) & (*ring.submit_q.mask) ]
+			/* paranoid */ 	,ring.submit_q.array[ ((*ring.submit_q.head) - 1) & (*ring.submit_q.mask) ]
+			/* paranoid */ 	,ring.submit_q.array[ ((*ring.submit_q.head) - 2) & (*ring.submit_q.mask) ]
+			/* paranoid */ 	,ring.submit_q.array[ ((*ring.submit_q.head) - 3) & (*ring.submit_q.mask) ]
+			/* paranoid */ );
+
 			// Append to the list of ready entries
 
 			/* paranoid */ verify( idx <= mask );
-
-			ring.submit_q.array[ (*tail) & mask ] = idx & mask;
+			ring.submit_q.array[ (*tail) & mask ] = idx;
 			__atomic_fetch_add(tail, 1ul32, __ATOMIC_SEQ_CST);
 
-			/* paranoid */ verify( ring.submit_q.sqes[ idx ].user_data != 0 );
-
 			// Submit however, many entries need to be submitted
-			int ret = syscall( __NR_io_uring_enter, ring.fd, 1, 0, 0, 0p, 0);
+			int ret = __io_uring_enter( ring, 1, false, 0p );
 			if( ret < 0 ) {
 				switch((int)errno) {
@@ -906,3 +954,16 @@
 		return count;
 	}
+
+//=============================================================================================
+// I/O Submissions
+//=============================================================================================
+
+	void register_fixed_files( cluster & cl, int * files, unsigned count ) {
+		int ret = syscall( __NR_io_uring_register, cl.io->fd, IORING_REGISTER_FILES, files, count );
+		if( ret < 0 ) {
+			abort( "KERNEL ERROR: IO_URING SYSCALL - (%d) %s\n", (int)errno, strerror(errno) );
+		}
+
+		__cfadbg_print_safe( io_core, "Kernel I/O : Performed io_register for %p, returned %d\n", active_thread(), ret );
+	}
 #endif
Index: libcfa/src/concurrency/iocall.cfa
===================================================================
--- libcfa/src/concurrency/iocall.cfa	(revision ab444132862e18cf726d745d1fe520843b493819)
+++ libcfa/src/concurrency/iocall.cfa	(revision 792215831299879512200cd21ecb49df3a642367)
@@ -108,4 +108,7 @@
 
 	extern ssize_t read (int fd, void *buf, size_t count);
+
+	extern ssize_t splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags);
+	extern ssize_t tee(int fd_in, int fd_out, size_t len, unsigned int flags);
 }
 
@@ -128,4 +131,17 @@
 		#endif
 	}
+
+	ssize_t cfa_preadv2_fixed(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags) {
+		#if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_READV)
+			return preadv2(fd, iov, iovcnt, offset, flags);
+		#else
+			__submit_prelude
+
+			(*sqe){ IORING_OP_READV, fd, iov, iovcnt, offset };
+			sqe->flags |= IOSQE_FIXED_FILE;
+
+			__submit_wait
+		#endif
+	}
 #endif
 
@@ -329,5 +345,4 @@
 }
 
-
 ssize_t cfa_read(int fd, void *buf, size_t count) {
 	#if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_READ)
@@ -349,4 +364,33 @@
 
 		(*sqe){ IORING_OP_WRITE, fd, buf, count, 0 };
+
+		__submit_wait
+	#endif
+}
+
+ssize_t cfa_splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags) {
+	#if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_SPLICE)
+		return splice( fd_in, off_in, fd_out, off_out, len, flags );
+	#else
+		__submit_prelude
+
+		(*sqe){ IORING_OP_SPLICE, fd_out, 0p, len, off_out };
+		sqe->splice_fd_in  = fd_in;
+		sqe->splice_off_in = off_in;
+		sqe->splice_flags  = flags;
+
+		__submit_wait
+	#endif
+}
+
+ssize_t cfa_tee(int fd_in, int fd_out, size_t len, unsigned int flags) {
+	#if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_TEE)
+		return tee( fd_in, fd_out, len, flags );
+	#else
+		__submit_prelude
+
+		(*sqe){ IORING_OP_TEE, fd_out, 0p, len, 0 };
+		sqe->splice_fd_in = fd_in;
+		sqe->splice_flags = flags;
 
 		__submit_wait
@@ -453,4 +497,14 @@
 			#define _CFA_IO_FEATURE_IORING_OP_WRITE ,
 			return IS_DEFINED(IORING_OP_WRITE);
+
+		if( /*func == (fptr_t)splice || */
+			func == (fptr_t)cfa_splice )
+			#define _CFA_IO_FEATURE_IORING_OP_SPLICE ,
+			return IS_DEFINED(IORING_OP_SPLICE);
+
+		if( /*func == (fptr_t)tee || */
+			func == (fptr_t)cfa_tee )
+			#define _CFA_IO_FEATURE_IORING_OP_TEE ,
+			return IS_DEFINED(IORING_OP_TEE);
 	#endif