Index: libcfa/src/concurrency/io.cfa
===================================================================
--- libcfa/src/concurrency/io.cfa	(revision 1d5e47115e38bbee1bdc1642bf5ac53dc15d0ddc)
+++ libcfa/src/concurrency/io.cfa	(revision 34b6188221de19c375b4d52ee3cdd3d84f96d558)
@@ -109,4 +109,5 @@
 		volatile uint32_t * head;
 		volatile uint32_t * tail;
+		volatile uint32_t prev_head;
 
 		// The actual kernel ring which uses head/tail
@@ -259,4 +260,5 @@
 		sq.dropped = (         uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.dropped);
 		sq.array   = (         uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.array);
+		sq.prev_head = *sq.head;
 
 		{
@@ -430,4 +432,5 @@
 //=============================================================================================
 	static unsigned __collect_submitions( struct __io_data & ring );
+	static uint32_t __release_consumed_submission( struct __io_data & ring );
 
 	// Process a single completion message from the io_uring
@@ -444,5 +447,4 @@
 
 		if (to_submit > 0 || waitcnt > 0) {
-			uint32_t shead = *ring.submit_q.head;
 			int ret = syscall( __NR_io_uring_enter, ring.fd, to_submit, waitcnt, IORING_ENTER_GETEVENTS, mask, _NSIG / 8);
 			if( ret < 0 ) {
@@ -457,8 +459,5 @@
 
 			// Release the consumed SQEs
-			for( i; ret ) {
-				uint32_t idx = ring.submit_q.array[ (i + shead) & smask ];
-				ring.submit_q.sqes[ idx ].user_data = 0;
-			}
+			__release_consumed_submission( ring );
 
 			// update statistics
@@ -738,5 +737,11 @@
 
 			block++;
-			yield();
+			if( try_lock(ring.submit_q.lock __cfaabi_dbg_ctx2) ) {
+				__release_consumed_submission( ring );
+				unlock( ring.submit_q.lock );
+			}
+			else {
+				yield();
+			}
 		}
 
@@ -794,5 +799,4 @@
 			// We got the lock
 			unsigned to_submit = __collect_submitions( ring );
-			uint32_t shead = *ring.submit_q.head;
 			int ret = syscall( __NR_io_uring_enter, ring.fd, to_submit, 0, 0, 0p, _NSIG / 8);
 			if( ret < 0 ) {
@@ -810,9 +814,5 @@
 
 			// Release the consumed SQEs
-			const uint32_t smask = *ring.submit_q.mask;
-			for( i; ret ) {
-				uint32_t idx = ring.submit_q.array[ (i + shead) & smask ];
-				ring.submit_q.sqes[ idx ].user_data = 0;
-			}
+			__release_consumed_submission( ring );
 
 			// update statistics
@@ -851,5 +851,6 @@
 			)
 
-			ring.submit_q.sqes[ idx & mask ].user_data = 0;
+			// Release the consumed SQEs
+			__release_consumed_submission( ring );
 
 			unlock(ring.submit_q.lock);
@@ -885,3 +886,16 @@
 		return to_submit;
 	}
+
+	static uint32_t __release_consumed_submission( struct __io_data & ring ) {
+		const uint32_t smask = *ring.submit_q.mask;
+		uint32_t chead = *ring.submit_q.head;
+		uint32_t phead = ring.submit_q.prev_head;
+		ring.submit_q.prev_head = chead;
+		uint32_t count = chead - phead;
+		for( i; count ) {
+			uint32_t idx = ring.submit_q.array[ (phead + i) & smask ];
+			ring.submit_q.sqes[ idx ].user_data = 0;
+		}
+		return count;
+	}
 #endif
