Index: libcfa/src/concurrency/io.cfa
===================================================================
--- libcfa/src/concurrency/io.cfa	(revision ecf6b466cf33910b43c4c1e9b980e00690da2140)
+++ libcfa/src/concurrency/io.cfa	(revision d384787be999e142272516fb61f200e092c7a722)
@@ -101,8 +101,10 @@
 
 		// Requires features
-		// // adjust the size according to the parameters
-		// if ((params.features & IORING_FEAT_SINGLE_MMAP) != 0) {
-		// 	cq->ring_sz = sq->ring_sz = max(cq->ring_sz, sq->ring_sz);
-		// }
+		#if defined(IORING_FEAT_SINGLE_MMAP)
+			// adjust the size according to the parameters
+			if ((params.features & IORING_FEAT_SINGLE_MMAP) != 0) {
+				cq->ring_sz = sq->ring_sz = max(cq->ring_sz, sq->ring_sz);
+			}
+		#endif
 
 		// mmap the Submit Queue into existence
@@ -112,10 +114,13 @@
 		}
 
-		// mmap the Completion Queue into existence (may or may not be needed)
 		// Requires features
-		// if ((params.features & IORING_FEAT_SINGLE_MMAP) != 0) {
-		// 	cq->ring_ptr = sq->ring_ptr;
-		// }
-		// else {
+		#if defined(IORING_FEAT_SINGLE_MMAP)
+			// mmap the Completion Queue into existence (may or may not be needed)
+			if ((params.features & IORING_FEAT_SINGLE_MMAP) != 0) {
+				cq->ring_ptr = sq->ring_ptr;
+			}
+			else
+		#endif
+		{
 			// We need multiple call to MMAP
 			cq.ring_ptr = mmap(0, cq.ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING);
@@ -124,5 +129,5 @@
 				abort("KERNEL ERROR: IO_URING MMAP2 - %s\n", strerror(errno));
 			}
-		// }
+		}
 
 		// mmap the submit queue entries
@@ -171,4 +176,10 @@
 		(this.io.submit){ min(*sq.num, *cq.num) };
 
+		// Initialize statistics
+		this.io.submit_q.stats.submit_avg.val = 0;
+		this.io.submit_q.stats.submit_avg.cnt = 0;
+		this.io.completion_q.stats.completed_avg.val = 0;
+		this.io.completion_q.stats.completed_avg.cnt = 0;
+
 		// Create the poller thread
 		this.io.stack = __create_pthread( &this.io.poller, __io_poller, &this );
@@ -185,4 +196,17 @@
 		pthread_join( this.io.poller, 0p );
 		free( this.io.stack );
+
+		// print statistics
+		__cfaabi_bits_print_safe( STDERR_FILENO,
+			"----- I/O uRing Stats -----\n"
+			"- total submit calls  : %llu\n"
+			"- avg submit          : %lf\n"
+			"- total wait calls    : %llu\n"
+			"- avg completion/wait : %lf\n",
+			this.io.submit_q.stats.submit_avg.cnt,
+			((double)this.io.submit_q.stats.submit_avg.val) / this.io.submit_q.stats.submit_avg.cnt,
+			this.io.completion_q.stats.completed_avg.cnt,
+			((double)this.io.completion_q.stats.completed_avg.val) / this.io.completion_q.stats.completed_avg.cnt
+		);
 
 		// Shutdown the io rings
@@ -215,29 +239,51 @@
 	// Process a single completion message from the io_uring
 	// This is NOT thread-safe
-	static bool __io_process(struct io_ring & ring) {
+	int __drain_io( struct io_ring & ring, sigset_t & mask, int waitcnt ) {
+		int ret = syscall( __NR_io_uring_enter, ring.fd, 0, waitcnt, IORING_ENTER_GETEVENTS, &mask, _NSIG / 8);
+		if( ret < 0 ) {
+			switch((int)errno) {
+			case EAGAIN:
+			case EINTR:
+				return -EAGAIN;
+			default:
+				abort( "KERNEL ERROR: IO_URING WAIT - %s\n", strerror(errno) );
+			}
+		}
+
+		// Drain the queue
 		unsigned head = *ring.completion_q.head;
 		unsigned tail = __atomic_load_n(ring.completion_q.tail, __ATOMIC_ACQUIRE);
 
-		if (head == tail) return false;
-
-		unsigned idx = head & (*ring.completion_q.mask);
-		struct io_uring_cqe & cqe = ring.completion_q.cqes[idx];
-
-		/* paranoid */ verify(&cqe);
-
-		struct io_user_data * data = (struct io_user_data *)cqe.user_data;
-		// __cfaabi_bits_print_safe( STDERR_FILENO, "Performed reading io cqe %p, result %d for %p\n", data, cqe.res, data->thrd );
-
-		data->result = cqe.res;
-		__unpark( data->thrd __cfaabi_dbg_ctx2 );
+		// Nothing was new return 0
+		if (head == tail) {
+			ring.completion_q.stats.completed_avg.cnt += 1;
+			return 0;
+		}
+
+		uint32_t count = tail - head;
+		for(i; count) {
+			unsigned idx = (head + i) & (*ring.completion_q.mask);
+			struct io_uring_cqe & cqe = ring.completion_q.cqes[idx];
+
+			/* paranoid */ verify(&cqe);
+
+			struct io_user_data * data = (struct io_user_data *)cqe.user_data;
+			// __cfaabi_bits_print_safe( STDERR_FILENO, "Performed reading io cqe %p, result %d for %p\n", data, cqe.res, data->thrd );
+
+			data->result = cqe.res;
+			__unpark( data->thrd __cfaabi_dbg_ctx2 );
+		}
 
 		// Allow new submissions to happen
-		V(ring.submit);
+		V(ring.submit, count);
 
 		// Mark to the kernel that the cqe has been seen
 		// Ensure that the kernel only sees the new value of the head index after the CQEs have been read.
-		__atomic_fetch_add( ring.completion_q.head, 1, __ATOMIC_RELAXED );
-
-		return true;
+		__atomic_fetch_add( ring.completion_q.head, count, __ATOMIC_RELAXED );
+
+		ring.completion_q.stats.completed_avg.val += count;
+		ring.completion_q.stats.completed_avg.cnt += 1;
+
+		return count;
 	}
 
@@ -257,18 +303,6 @@
 		verify( (*ring.completion_q.head) == (*ring.completion_q.tail) );
 
-		LOOP: while(!__atomic_load_n(&ring.done, __ATOMIC_SEQ_CST)) {
-			int ret = syscall( __NR_io_uring_enter, ring.fd, 0, 1, IORING_ENTER_GETEVENTS, &mask, _NSIG / 8);
-			if( ret < 0 ) {
-				switch((int)errno) {
-				case EAGAIN:
-				case EINTR:
-					continue LOOP;
-				default:
-					abort( "KERNEL ERROR: IO_URING WAIT - %s\n", strerror(errno) );
-				}
-			}
-
-			// Drain the queue
-			while(__io_process(ring)) {}
+		while(!__atomic_load_n(&ring.done, __ATOMIC_SEQ_CST)) {
+			__drain_io( ring, mask, 1 );
 		}
 
@@ -343,4 +377,7 @@
 		}
 
+		ring.submit_q.stats.submit_avg.val += 1;
+		ring.submit_q.stats.submit_avg.cnt += 1;
+
 		unlock(ring.submit_q.lock);
 		// Make sure that idx was submitted
@@ -512,7 +549,10 @@
 int cfa_accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags) {
 	#if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_ACCEPT)
-		__SOCKADDR_ARG _addr;
-		_addr.__sockaddr__ = addr;
-		return accept4( sockfd, _addr, addrlen, flags );
+		#pragma GCC diagnostic push
+		#pragma GCC diagnostic ignored "-Wattributes"
+			__SOCKADDR_ARG _addr;
+			_addr.__sockaddr__ = addr;
+			return accept4( sockfd, _addr, addrlen, flags );
+		#pragma GCC diagnostic pop
 	#else
 		__submit_prelude
@@ -529,7 +569,10 @@
 int cfa_connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen) {
 	#if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_CONNECT)
-		__CONST_SOCKADDR_ARG _addr;
-		_addr.__sockaddr__ = addr;
-		return connect( sockfd, _addr, addrlen );
+		#pragma GCC diagnostic push
+		#pragma GCC diagnostic ignored "-Wattributes"
+			__CONST_SOCKADDR_ARG _addr;
+			_addr.__sockaddr__ = addr;
+			return connect( sockfd, _addr, addrlen );
+		#pragma GCC diagnostic pop
 	#else
 		__submit_prelude
Index: libcfa/src/concurrency/iofwd.hfa
===================================================================
--- libcfa/src/concurrency/iofwd.hfa	(revision d384787be999e142272516fb61f200e092c7a722)
+++ libcfa/src/concurrency/iofwd.hfa	(revision d384787be999e142272516fb61f200e092c7a722)
@@ -0,0 +1,39 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2020 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// iofwd.hfa --
+//
+// Author           : Thierry Delisle
+// Created On       : Thu Apr 23 17:31:00 2020
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+#pragma once
+
+ssize_t cfa_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags);
+ssize_t cfa_pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags);
+int cfa_fsync(int fd);
+int cfa_sync_file_range(int fd, int64_t offset, int64_t nbytes, unsigned int flags);
+ssize_t cfa_sendmsg(int sockfd, const struct msghdr *msg, int flags);
+ssize_t cfa_recvmsg(int sockfd, struct msghdr *msg, int flags);
+ssize_t cfa_send(int sockfd, const void *buf, size_t len, int flags);
+ssize_t cfa_recv(int sockfd, void *buf, size_t len, int flags);
+int cfa_accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags);
+int cfa_connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen);
+int cfa_fallocate(int fd, int mode, uint64_t offset, uint64_t len);
+int cfa_fadvise(int fd, uint64_t offset, uint64_t len, int advice);
+int cfa_madvise(void *addr, size_t length, int advice);
+int cfa_openat(int dirfd, const char *pathname, int flags, mode_t mode);
+int cfa_close(int fd);
+int cfa_statx(int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf);
+ssize_t cfa_read(int fd, void *buf, size_t count);
+ssize_t cfa_write(int fd, void *buf, size_t count)
+
+//-----------------------------------------------------------------------------
+// Check if a function is blocks a only the user thread
+bool has_user_level_blocking( fptr_t func );
Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision ecf6b466cf33910b43c4c1e9b980e00690da2140)
+++ libcfa/src/concurrency/kernel.cfa	(revision d384787be999e142272516fb61f200e092c7a722)
@@ -1004,4 +1004,18 @@
 }
 
+bool V(semaphore & this, unsigned diff) with( this ) {
+	$thread * thrd = 0p;
+	lock( lock __cfaabi_dbg_ctx2 );
+	int release = max(-count, (int)diff);
+	count += diff;
+	for(release) {
+		unpark( pop_head( waiting ) __cfaabi_dbg_ctx2 );
+	}
+
+	unlock( lock );
+
+	return thrd != 0p;
+}
+
 //-----------------------------------------------------------------------------
 // Global Queues
Index: libcfa/src/concurrency/kernel.hfa
===================================================================
--- libcfa/src/concurrency/kernel.hfa	(revision ecf6b466cf33910b43c4c1e9b980e00690da2140)
+++ libcfa/src/concurrency/kernel.hfa	(revision d384787be999e142272516fb61f200e092c7a722)
@@ -40,4 +40,5 @@
 void   P (semaphore & this);
 bool   V (semaphore & this);
+bool   V (semaphore & this, unsigned count);
 
 
@@ -144,4 +145,12 @@
 	void * ring_ptr;
 	size_t ring_sz;
+
+	// Statistics
+	struct {
+		struct {
+			unsigned long long int val;
+			unsigned long long int cnt;
+		} submit_avg;
+	} stats;
 };
 
@@ -164,4 +173,12 @@
 	void * ring_ptr;
 	size_t ring_sz;
+
+	// Statistics
+	struct {
+		struct {
+			unsigned long long int val;
+			unsigned long long int cnt;
+		} completed_avg;
+	} stats;
 };
 
