Index: libcfa/src/concurrency/io.cfa
===================================================================
--- libcfa/src/concurrency/io.cfa	(revision 7fdae3874b68d51f19443c62dab63f3f7ab101db)
+++ libcfa/src/concurrency/io.cfa	(revision 2fafe7eaade023aeffc0c15a4090d21980e65706)
@@ -41,4 +41,67 @@
 	#include "kernel/fwd.hfa"
 	#include "io/types.hfa"
+
+	// returns true of acquired as leader or second leader
+	static inline bool try_lock( __leaderlock_t & this ) {
+		const uintptr_t thrd = 1z | (uintptr_t)active_thread();
+		bool block;
+		disable_interrupts();
+		for() {
+			struct $thread * expected = this.value;
+			if( 1p != expected && 0p != expected ) {
+				/* paranoid */ verify( thrd != (uintptr_t)expected ); // We better not already be the next leader
+				enable_interrupts( __cfaabi_dbg_ctx );
+				return false;
+			}
+			struct $thread * desired;
+			if( 0p == expected ) {
+				// If the lock isn't locked acquire it, no need to block
+				desired = 1p;
+				block = false;
+			}
+			else {
+				// If the lock is already locked try becomming the next leader
+				desired = (struct $thread *)thrd;
+				block = true;
+			}
+			if( __atomic_compare_exchange_n(&this.value, &expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) ) break;
+		}
+		if( block ) {
+			enable_interrupts( __cfaabi_dbg_ctx );
+			park( __cfaabi_dbg_ctx );
+			disable_interrupts();
+		}
+		return true;
+	}
+
+	static inline bool next( __leaderlock_t & this ) {
+		/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+		struct $thread * nextt;
+		for() {
+			struct $thread * expected = this.value;
+			/* paranoid */ verify( (1 & (uintptr_t)expected) == 1 ); // The lock better be locked
+
+			struct $thread * desired;
+			if( 1p == expected ) {
+				// No next leader, just unlock
+				desired = 0p;
+				nextt   = 0p;
+			}
+			else {
+				// There is a next leader, remove but keep locked
+				desired = 1p;
+				nextt   = (struct $thread *)(~1z & (uintptr_t)expected);
+			}
+			if( __atomic_compare_exchange_n(&this.value, &expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) ) break;
+		}
+
+		if(nextt) {
+			unpark( nextt __cfaabi_dbg_ctx2 );
+			enable_interrupts( __cfaabi_dbg_ctx );
+			return true;
+		}
+		enable_interrupts( __cfaabi_dbg_ctx );
+		return false;
+	}
 
 //=============================================================================================
@@ -297,9 +360,7 @@
 
 			block++;
-			if( try_lock(ring.submit_q.lock __cfaabi_dbg_ctx2) ) {
-				__release_consumed_submission( ring );
-				unlock( ring.submit_q.lock );
-			}
-			else {
+
+			__u32 released = __release_consumed_submission( ring );
+			if( released == 0 ) {
 				yield();
 			}
@@ -334,10 +395,6 @@
 			__u32 picked = __submit_to_ready_array( ring, idx, mask );
 
-			for() {
-				yield();
-
-				// If some one else collected our index, we are done
-				#warning ABA problem
-				if( ring.submit_q.ready[picked] != idx ) {
+			#if defined(LEADER_LOCK)
+				if( !try_lock(ring.submit_q.submit_lock) ) {
 					__STATS__( false,
 						io.submit_q.helped += 1;
@@ -345,16 +402,33 @@
 					return;
 				}
-
-				if( try_lock(ring.submit_q.lock __cfaabi_dbg_ctx2) ) {
+				/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+				__STATS__( true,
+					io.submit_q.leader += 1;
+				)
+			#else
+				for() {
+					yield();
+
+					if( try_lock(ring.submit_q.submit_lock __cfaabi_dbg_ctx2) ) {
+						__STATS__( false,
+							io.submit_q.leader += 1;
+						)
+						break;
+					}
+
+					// If some one else collected our index, we are done
+					#warning ABA problem
+					if( ring.submit_q.ready[picked] != idx ) {
+						__STATS__( false,
+							io.submit_q.helped += 1;
+						)
+						return;
+					}
+
 					__STATS__( false,
-						io.submit_q.leader += 1;
+						io.submit_q.busy += 1;
 					)
-					break;
-				}
-
-				__STATS__( false,
-					io.submit_q.busy += 1;
-				)
-			}
+				}
+			#endif
 
 			// We got the lock
@@ -365,5 +439,10 @@
 			int ret = __io_uring_enter( ring, to_submit, false );
 
-			unlock(ring.submit_q.lock);
+			#if defined(LEADER_LOCK)
+				/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+				next(ring.submit_q.submit_lock);
+			#else
+				unlock(ring.submit_q.submit_lock);
+			#endif
 			if( ret < 0 ) return;
 
@@ -380,5 +459,9 @@
 		else {
 			// get mutual exclusion
-			lock(ring.submit_q.lock __cfaabi_dbg_ctx2);
+			#if defined(LEADER_LOCK)
+				while(!try_lock(ring.submit_q.submit_lock));
+			#else
+				lock(ring.submit_q.submit_lock __cfaabi_dbg_ctx2);
+			#endif
 
 			/* paranoid */ verifyf( ring.submit_q.sqes[ idx ].user_data != 0,
@@ -418,5 +501,9 @@
 			__release_consumed_submission( ring );
 
-			unlock(ring.submit_q.lock);
+			#if defined(LEADER_LOCK)
+				next(ring.submit_q.submit_lock);
+			#else
+				unlock(ring.submit_q.submit_lock);
+			#endif
 
 			__cfadbg_print_safe( io, "Kernel I/O : Performed io_submit for %p, returned %d\n", active_thread(), ret );
Index: libcfa/src/concurrency/io/setup.cfa
===================================================================
--- libcfa/src/concurrency/io/setup.cfa	(revision 7fdae3874b68d51f19443c62dab63f3f7ab101db)
+++ libcfa/src/concurrency/io/setup.cfa	(revision 2fafe7eaade023aeffc0c15a4090d21980e65706)
@@ -378,5 +378,5 @@
 		}
 
-		(sq.lock){};
+		(sq.submit_lock){};
 		(sq.release_lock){};
 
Index: libcfa/src/concurrency/io/types.hfa
===================================================================
--- libcfa/src/concurrency/io/types.hfa	(revision 7fdae3874b68d51f19443c62dab63f3f7ab101db)
+++ libcfa/src/concurrency/io/types.hfa	(revision 2fafe7eaade023aeffc0c15a4090d21980e65706)
@@ -22,4 +22,11 @@
 
       #include "bits/locks.hfa"
+
+	#define LEADER_LOCK
+	struct __leaderlock_t {
+		struct $thread * volatile value;	// ($thread) next_leader | (bool:1) is_locked
+	};
+
+	static inline void ?{}( __leaderlock_t & this ) { this.value = 0p; }
 
 	//-----------------------------------------------------------------------
@@ -49,6 +56,10 @@
 		__u32 ready_cnt;
 
-		__spinlock_t lock;
-		__spinlock_t release_lock;
+		#if defined(LEADER_LOCK)
+			__leaderlock_t submit_lock;
+		#else
+			__spinlock_t submit_lock;
+		#endif
+		__spinlock_t  release_lock;
 
 		// A buffer of sqes (not the actual ring)
