Index: libcfa/src/concurrency/io.cfa
===================================================================
--- libcfa/src/concurrency/io.cfa	(revision 4c925cd31e0a0d5ba86584befde68164c77404b8)
+++ libcfa/src/concurrency/io.cfa	(revision 49981556d70010c36f072dc3534ff01415259a87)
@@ -93,5 +93,5 @@
 //=============================================================================================
 	static unsigned __collect_submitions( struct __io_data & ring );
-	static uint32_t __release_consumed_submission( struct __io_data & ring );
+	static __u32 __release_consumed_submission( struct __io_data & ring );
 
 	static inline void process(struct io_uring_cqe & cqe ) {
@@ -100,5 +100,5 @@
 
 		data->result = cqe.res;
-		unpark( data->thrd __cfaabi_dbg_ctx2 );
+		post( data->sem );
 	}
 
@@ -136,5 +136,5 @@
 		unsigned head = *ring.completion_q.head;
 		unsigned tail = *ring.completion_q.tail;
-		const uint32_t mask = *ring.completion_q.mask;
+		const __u32 mask = *ring.completion_q.mask;
 
 		// Nothing was new return 0
@@ -143,5 +143,5 @@
 		}
 
-		uint32_t count = tail - head;
+		__u32 count = tail - head;
 		/* paranoid */ verify( count != 0 );
 		for(i; count) {
@@ -224,5 +224,5 @@
 //
 
-	[* struct io_uring_sqe, uint32_t] __submit_alloc( struct __io_data & ring, uint64_t data ) {
+	[* struct io_uring_sqe, __u32] __submit_alloc( struct __io_data & ring, __u64 data ) {
 		/* paranoid */ verify( data != 0 );
 
@@ -230,9 +230,9 @@
 		__attribute((unused)) int len   = 0;
 		__attribute((unused)) int block = 0;
-		uint32_t cnt = *ring.submit_q.num;
-		uint32_t mask = *ring.submit_q.mask;
+		__u32 cnt = *ring.submit_q.num;
+		__u32 mask = *ring.submit_q.mask;
 
 		disable_interrupts();
-			uint32_t off = __tls_rand();
+			__u32 off = __tls_rand();
 		enable_interrupts( __cfaabi_dbg_ctx );
 
@@ -241,8 +241,8 @@
 			// Look through the list starting at some offset
 			for(i; cnt) {
-				uint64_t expected = 0;
-				uint32_t idx = (i + off) & mask;
+				__u64 expected = 0;
+				__u32 idx = (i + off) & mask;
 				struct io_uring_sqe * sqe = &ring.submit_q.sqes[idx];
-				volatile uint64_t * udata = (volatile uint64_t *)&sqe->user_data;
+				volatile __u64 * udata = &sqe->user_data;
 
 				if( *udata == expected &&
@@ -270,5 +270,5 @@
 	}
 
-	static inline uint32_t __submit_to_ready_array( struct __io_data & ring, uint32_t idx, const uint32_t mask ) {
+	static inline __u32 __submit_to_ready_array( struct __io_data & ring, __u32 idx, const __u32 mask ) {
 		/* paranoid */ verify( idx <= mask   );
 		/* paranoid */ verify( idx != -1ul32 );
@@ -277,15 +277,15 @@
 		__attribute((unused)) int len   = 0;
 		__attribute((unused)) int block = 0;
-		uint32_t ready_mask = ring.submit_q.ready_cnt - 1;
+		__u32 ready_mask = ring.submit_q.ready_cnt - 1;
 
 		disable_interrupts();
-			uint32_t off = __tls_rand();
+			__u32 off = __tls_rand();
 		enable_interrupts( __cfaabi_dbg_ctx );
 
-		uint32_t picked;
+		__u32 picked;
 		LOOKING: for() {
 			for(i; ring.submit_q.ready_cnt) {
 				picked = (i + off) & ready_mask;
-				uint32_t expected = -1ul32;
+				__u32 expected = -1ul32;
 				if( __atomic_compare_exchange_n( &ring.submit_q.ready[picked], &expected, idx, true, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED ) ) {
 					break LOOKING;
@@ -316,9 +316,9 @@
 	}
 
-	void __submit( struct io_context * ctx, uint32_t idx ) __attribute__((nonnull (1))) {
+	void __submit( struct io_context * ctx, __u32 idx ) __attribute__((nonnull (1))) {
 		__io_data & ring = *ctx->thrd.ring;
 		// Get now the data we definetely need
-		volatile uint32_t * const tail = ring.submit_q.tail;
-		const uint32_t mask  = *ring.submit_q.mask;
+		volatile __u32 * const tail = ring.submit_q.tail;
+		const __u32 mask  = *ring.submit_q.mask;
 
 		// There are 2 submission schemes, check which one we are using
@@ -332,5 +332,5 @@
 		}
 		else if( ring.eager_submits ) {
-			uint32_t picked = __submit_to_ready_array( ring, idx, mask );
+			__u32 picked = __submit_to_ready_array( ring, idx, mask );
 
 			for() {
@@ -429,11 +429,11 @@
 
 		unsigned to_submit = 0;
-		uint32_t tail = *ring.submit_q.tail;
-		const uint32_t mask = *ring.submit_q.mask;
+		__u32 tail = *ring.submit_q.tail;
+		const __u32 mask = *ring.submit_q.mask;
 
 		// Go through the list of ready submissions
 		for( i; ring.submit_q.ready_cnt ) {
 			// replace any submission with the sentinel, to consume it.
-			uint32_t idx = __atomic_exchange_n( &ring.submit_q.ready[i], -1ul32, __ATOMIC_RELAXED);
+			__u32 idx = __atomic_exchange_n( &ring.submit_q.ready[i], -1ul32, __ATOMIC_RELAXED);
 
 			// If it was already the sentinel, then we are done
@@ -451,16 +451,16 @@
 	}
 
-	static uint32_t __release_consumed_submission( struct __io_data & ring ) {
-		const uint32_t smask = *ring.submit_q.mask;
+	static __u32 __release_consumed_submission( struct __io_data & ring ) {
+		const __u32 smask = *ring.submit_q.mask;
 
 		if( !try_lock(ring.submit_q.release_lock __cfaabi_dbg_ctx2) ) return 0;
-		uint32_t chead = *ring.submit_q.head;
-		uint32_t phead = ring.submit_q.prev_head;
+		__u32 chead = *ring.submit_q.head;
+		__u32 phead = ring.submit_q.prev_head;
 		ring.submit_q.prev_head = chead;
 		unlock(ring.submit_q.release_lock);
 
-		uint32_t count = chead - phead;
+		__u32 count = chead - phead;
 		for( i; count ) {
-			uint32_t idx = ring.submit_q.array[ (phead + i) & smask ];
+			__u32 idx = ring.submit_q.array[ (phead + i) & smask ];
 			ring.submit_q.sqes[ idx ].user_data = 0;
 		}
Index: libcfa/src/concurrency/io/setup.cfa
===================================================================
--- libcfa/src/concurrency/io/setup.cfa	(revision 4c925cd31e0a0d5ba86584befde68164c77404b8)
+++ libcfa/src/concurrency/io/setup.cfa	(revision 49981556d70010c36f072dc3534ff01415259a87)
@@ -298,5 +298,5 @@
 		if( params_in.poll_complete ) params.flags |= IORING_SETUP_IOPOLL;
 
-		uint32_t nentries = params_in.num_entries != 0 ? params_in.num_entries : 256;
+		__u32 nentries = params_in.num_entries != 0 ? params_in.num_entries : 256;
 		if( !is_pow2(nentries) ) {
 			abort("ERROR: I/O setup 'num_entries' must be a power of 2\n");
@@ -362,15 +362,15 @@
 		// Get the pointers from the kernel to fill the structure
 		// submit queue
-		sq.head    = (volatile uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.head);
-		sq.tail    = (volatile uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.tail);
-		sq.mask    = (   const uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_mask);
-		sq.num     = (   const uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_entries);
-		sq.flags   = (         uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.flags);
-		sq.dropped = (         uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.dropped);
-		sq.array   = (         uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.array);
+		sq.head    = (volatile __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.head);
+		sq.tail    = (volatile __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.tail);
+		sq.mask    = (   const __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_mask);
+		sq.num     = (   const __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_entries);
+		sq.flags   = (         __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.flags);
+		sq.dropped = (         __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.dropped);
+		sq.array   = (         __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.array);
 		sq.prev_head = *sq.head;
 
 		{
-			const uint32_t num = *sq.num;
+			const __u32 num = *sq.num;
 			for( i; num ) {
 				sq.sqes[i].user_data = 0ul64;
@@ -395,10 +395,10 @@
 
 		// completion queue
-		cq.head     = (volatile uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.head);
-		cq.tail     = (volatile uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.tail);
-		cq.mask     = (   const uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.ring_mask);
-		cq.num      = (   const uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.ring_entries);
-		cq.overflow = (         uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.overflow);
-		cq.cqes   = (struct io_uring_cqe *)(((intptr_t)cq.ring_ptr) + params.cq_off.cqes);
+		cq.head      = (volatile __u32 *)(((intptr_t)cq.ring_ptr) + params.cq_off.head);
+		cq.tail      = (volatile __u32 *)(((intptr_t)cq.ring_ptr) + params.cq_off.tail);
+		cq.mask      = (   const __u32 *)(((intptr_t)cq.ring_ptr) + params.cq_off.ring_mask);
+		cq.num       = (   const __u32 *)(((intptr_t)cq.ring_ptr) + params.cq_off.ring_entries);
+		cq.overflow  = (         __u32 *)(((intptr_t)cq.ring_ptr) + params.cq_off.overflow);
+		cq.cqes = (struct io_uring_cqe *)(((intptr_t)cq.ring_ptr) + params.cq_off.cqes);
 
 		// some paranoid checks
@@ -448,5 +448,5 @@
 	void __ioctx_register($io_ctx_thread & ctx, struct epoll_event & ev) {
 		ev.events = EPOLLIN | EPOLLONESHOT;
-		ev.data.u64 = (uint64_t)&ctx;
+		ev.data.u64 = (__u64)&ctx;
 		int ret = epoll_ctl(iopoll.epollfd, EPOLL_CTL_ADD, ctx.ring->fd, &ev);
 		if (ret < 0) {
Index: libcfa/src/concurrency/io/types.hfa
===================================================================
--- libcfa/src/concurrency/io/types.hfa	(revision 4c925cd31e0a0d5ba86584befde68164c77404b8)
+++ libcfa/src/concurrency/io/types.hfa	(revision 49981556d70010c36f072dc3534ff01415259a87)
@@ -17,4 +17,8 @@
 
 #if defined(CFA_HAVE_LINUX_IO_URING_H)
+	extern "C" {
+		#include <linux/types.h>
+	}
+
       #include "bits/locks.hfa"
 
@@ -23,25 +27,25 @@
       struct __submition_data {
 		// Head and tail of the ring (associated with array)
-		volatile uint32_t * head;
-		volatile uint32_t * tail;
-		volatile uint32_t prev_head;
+		volatile __u32 * head;
+		volatile __u32 * tail;
+		volatile __u32 prev_head;
 
 		// The actual kernel ring which uses head/tail
 		// indexes into the sqes arrays
-		uint32_t * array;
+		__u32 * array;
 
 		// number of entries and mask to go with it
-		const uint32_t * num;
-		const uint32_t * mask;
+		const __u32 * num;
+		const __u32 * mask;
 
 		// Submission flags (Not sure what for)
-		uint32_t * flags;
+		__u32 * flags;
 
 		// number of sqes not submitted (whatever that means)
-		uint32_t * dropped;
+		__u32 * dropped;
 
 		// Like head/tail but not seen by the kernel
-		volatile uint32_t * ready;
-		uint32_t ready_cnt;
+		volatile __u32 * ready;
+		__u32 ready_cnt;
 
 		__spinlock_t lock;
@@ -58,13 +62,13 @@
 	struct __completion_data {
 		// Head and tail of the ring
-		volatile uint32_t * head;
-		volatile uint32_t * tail;
+		volatile __u32 * head;
+		volatile __u32 * tail;
 
 		// number of entries and mask to go with it
-		const uint32_t * mask;
-		const uint32_t * num;
+		const __u32 * mask;
+		const __u32 * num;
 
 		// number of cqes not submitted (whatever that means)
-		uint32_t * overflow;
+		__u32 * overflow;
 
 		// the kernel ring
@@ -79,5 +83,5 @@
 		struct __submition_data submit_q;
 		struct __completion_data completion_q;
-		uint32_t ring_flags;
+		__u32 ring_flags;
 		int fd;
 		bool eager_submits:1;
@@ -89,6 +93,6 @@
 	// IO user data
 	struct __io_user_data_t {
-		int32_t result;
-		$thread * thrd;
+		__s32 result;
+		oneshot sem;
 	};
 
Index: libcfa/src/concurrency/iocall.cfa
===================================================================
--- libcfa/src/concurrency/iocall.cfa	(revision 4c925cd31e0a0d5ba86584befde68164c77404b8)
+++ libcfa/src/concurrency/iocall.cfa	(revision 49981556d70010c36f072dc3534ff01415259a87)
@@ -32,8 +32,8 @@
 	#include "io/types.hfa"
 
-	extern [* struct io_uring_sqe, uint32_t] __submit_alloc( struct __io_data & ring, uint64_t data );
-	extern void __submit( struct io_context * ctx, uint32_t idx ) __attribute__((nonnull (1)));
-
-	static inline void ?{}(struct io_uring_sqe & this, uint8_t opcode, int fd) {
+	extern [* struct io_uring_sqe, __u32] __submit_alloc( struct __io_data & ring, __u64 data );
+	extern void __submit( struct io_context * ctx, __u32 idx ) __attribute__((nonnull (1)));
+
+	static inline void ?{}(struct io_uring_sqe & this, __u8 opcode, int fd) {
 		this.opcode = opcode;
 		#if !defined(IOSQE_ASYNC)
@@ -51,8 +51,8 @@
 	}
 
-	static inline void ?{}(struct io_uring_sqe & this, uint8_t opcode, int fd, void * addr, uint32_t len, uint64_t off ) {
+	static inline void ?{}(struct io_uring_sqe & this, __u8 opcode, int fd, void * addr, __u32 len, __u64 off ) {
 		(this){ opcode, fd };
 		this.off = off;
-		this.addr = (uint64_t)(uintptr_t)addr;
+		this.addr = (__u64)(uintptr_t)addr;
 		this.len = len;
 	}
@@ -105,17 +105,17 @@
 		(void)timeout; (void)cancellation; \
 		if( !context ) context = __get_io_context(); \
-		__io_user_data_t data = { 0, active_thread() }; \
+		__io_user_data_t data = { 0 }; \
 		struct __io_data & ring = *context->thrd.ring; \
 		struct io_uring_sqe * sqe; \
-		uint32_t idx; \
-		uint8_t sflags = REGULAR_FLAGS & submit_flags; \
-		[sqe, idx] = __submit_alloc( ring, (uint64_t)(uintptr_t)&data ); \
+		__u32 idx; \
+		__u8 sflags = REGULAR_FLAGS & submit_flags; \
+		[sqe, idx] = __submit_alloc( ring, (__u64)(uintptr_t)&data ); \
 		sqe->flags = sflags;
 
 	#define __submit_wait \
 		/*__cfaabi_bits_print_safe( STDERR_FILENO, "Preparing user data %p for %p\n", &data, data.thrd );*/ \
-		verify( sqe->user_data == (uint64_t)(uintptr_t)&data ); \
+		verify( sqe->user_data == (__u64)(uintptr_t)&data ); \
 		__submit( context, idx ); \
-		park( __cfaabi_dbg_ctx ); \
+		wait( data.sem ); \
 		if( data.result < 0 ) { \
 			errno = -data.result; \
@@ -149,5 +149,8 @@
 
 	extern int fsync(int fd);
-	extern int sync_file_range(int fd, int64_t offset, int64_t nbytes, unsigned int flags);
+
+	typedef __off64_t off_t;
+	typedef __off64_t off64_t;
+	extern int sync_file_range(int fd, off64_t offset, off64_t nbytes, unsigned int flags);
 
 	struct msghdr;
@@ -160,6 +163,6 @@
 	extern int connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen);
 
-	extern int fallocate(int fd, int mode, uint64_t offset, uint64_t len);
-	extern int posix_fadvise(int fd, uint64_t offset, uint64_t len, int advice);
+	extern int fallocate(int fd, int mode, off_t offset, off_t len);
+	extern int posix_fadvise(int fd, off_t offset, off_t len, int advice);
 	extern int madvise(void *addr, size_t length, int advice);
 
@@ -190,5 +193,5 @@
 			sqe->fd = fd;
 			sqe->off = offset;
-			sqe->addr = (uint64_t)(uintptr_t)iov;
+			sqe->addr = (__u64)iov;
 			sqe->len = iovcnt;
 			sqe->rw_flags = 0;
@@ -207,5 +210,12 @@
 			__submit_prelude
 
-			(*sqe){ IORING_OP_WRITEV, fd, iov, iovcnt, offset };
+			sqe->opcode = IORING_OP_WRITEV;
+			sqe->ioprio = 0;
+			sqe->fd = fd;
+			sqe->off = offset;
+			sqe->addr = (__u64)iov;
+			sqe->len = iovcnt;
+			sqe->rw_flags = 0;
+			sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0;
 
 			__submit_wait
@@ -220,11 +230,18 @@
 		__submit_prelude
 
-		(*sqe){ IORING_OP_FSYNC, fd };
-
-		__submit_wait
-	#endif
-}
-
-int cfa_sync_file_range(int fd, int64_t offset, int64_t nbytes, unsigned int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
+		sqe->opcode = IORING_OP_FSYNC;
+		sqe->ioprio = 0;
+		sqe->fd = fd;
+		sqe->off = 0;
+		sqe->addr = 0;
+		sqe->len = 0;
+		sqe->rw_flags = 0;
+		sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0;
+
+		__submit_wait
+	#endif
+}
+
+int cfa_sync_file_range(int fd, off64_t offset, off64_t nbytes, unsigned int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
 	#if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_SYNC_FILE_RANGE)
 		return sync_file_range(fd, offset, nbytes, flags);
@@ -275,5 +292,5 @@
 
 		(*sqe){ IORING_OP_SEND, sockfd };
-		sqe->addr = (uint64_t)buf;
+		sqe->addr = (__u64)buf;
 		sqe->len = len;
 		sqe->msg_flags = flags;
@@ -290,5 +307,5 @@
 
 		(*sqe){ IORING_OP_RECV, sockfd };
-		sqe->addr = (uint64_t)buf;
+		sqe->addr = (__u64)buf;
 		sqe->len = len;
 		sqe->msg_flags = flags;
@@ -305,6 +322,6 @@
 
 		(*sqe){ IORING_OP_ACCEPT, sockfd };
-		sqe->addr = (uint64_t)(uintptr_t)addr;
-		sqe->addr2 = (uint64_t)(uintptr_t)addrlen;
+		sqe->addr  = (__u64)addr;
+		sqe->addr2 = (__u64)addrlen;
 		sqe->accept_flags = flags;
 
@@ -320,12 +337,12 @@
 
 		(*sqe){ IORING_OP_CONNECT, sockfd };
-		sqe->addr = (uint64_t)(uintptr_t)addr;
-		sqe->off  = (uint64_t)(uintptr_t)addrlen;
-
-		__submit_wait
-	#endif
-}
-
-int cfa_fallocate(int fd, int mode, uint64_t offset, uint64_t len, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
+		sqe->addr = (__u64)addr;
+		sqe->off  = (__u64)addrlen;
+
+		__submit_wait
+	#endif
+}
+
+int cfa_fallocate(int fd, int mode, off_t offset, off_t len, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
 	#if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_FALLOCATE)
 		return fallocate( fd, mode, offset, len );
@@ -344,5 +361,5 @@
 }
 
-int cfa_fadvise(int fd, uint64_t offset, uint64_t len, int advice, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
+int cfa_fadvise(int fd, off_t offset, off_t len, int advice, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
 	#if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_FADVISE)
 		return posix_fadvise( fd, offset, len, advice );
@@ -351,5 +368,5 @@
 
 		(*sqe){ IORING_OP_FADVISE, fd };
-		sqe->off = (uint64_t)offset;
+		sqe->off = (__u64)offset;
 		sqe->len = len;
 		sqe->fadvise_advice = advice;
@@ -366,5 +383,5 @@
 
 		(*sqe){ IORING_OP_MADVISE, 0 };
-		sqe->addr = (uint64_t)addr;
+		sqe->addr = (__u64)addr;
 		sqe->len = length;
 		sqe->fadvise_advice = advice;
@@ -381,5 +398,5 @@
 
 		(*sqe){ IORING_OP_OPENAT, dirfd };
-		sqe->addr = (uint64_t)pathname;
+		sqe->addr = (__u64)pathname;
 		sqe->open_flags = flags;
 		sqe->len = mode;
@@ -414,5 +431,5 @@
 		__submit_prelude
 
-		(*sqe){ IORING_OP_STATX, dirfd, pathname, mask, (uint64_t)statxbuf };
+		(*sqe){ IORING_OP_STATX, dirfd, pathname, mask, (__u64)statxbuf };
 		sqe->statx_flags = flags;
 
@@ -456,5 +473,5 @@
 		}
 		else {
-			sqe->off = (uint64_t)-1;
+			sqe->off = (__u64)-1;
 		}
 		sqe->len = len;
@@ -464,5 +481,5 @@
 		}
 		else {
-			sqe->splice_off_in = (uint64_t)-1;
+			sqe->splice_off_in = (__u64)-1;
 		}
 		sqe->splice_flags  = flags | (SPLICE_FLAGS & submit_flags);
