Index: libcfa/src/concurrency/io.cfa
===================================================================
--- libcfa/src/concurrency/io.cfa	(revision 6c12fd28ccd5d0df5de4277788cba0edd4a3bc8c)
+++ libcfa/src/concurrency/io.cfa	(revision 5c581cca46abb934f2a7c0a0f48e2c96c4cf8c9b)
@@ -18,4 +18,5 @@
 
 #include "kernel.hfa"
+#include "bitmanip.hfa"
 
 #if !defined(HAVE_LINUX_IO_URING_H)
@@ -196,4 +197,5 @@
 				void * stack;
 				pthread_t kthrd;
+				volatile bool blocked;
 			} slow;
 			__io_poller_fast fast;
@@ -280,4 +282,5 @@
 
 		if( io_flags & CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS ) {
+			/* paranoid */ verify( is_pow2( io_flags >> CFA_CLUSTER_IO_BUFFLEN_OFFSET ) || ((io_flags >> CFA_CLUSTER_IO_BUFFLEN_OFFSET) < 8)  );
 			sq.ready_cnt = max(io_flags >> CFA_CLUSTER_IO_BUFFLEN_OFFSET, 8);
 			sq.ready = alloc_align( 64, sq.ready_cnt );
@@ -344,4 +347,5 @@
 		// Create the poller thread
 		__cfadbg_print_safe(io_core, "Kernel I/O : Creating slow poller for cluter %p\n", &this);
+		this.io->poller.slow.blocked = false;
 		this.io->poller.slow.stack = __create_pthread( &this.io->poller.slow.kthrd, __io_poller_slow, &this );
 	}
@@ -580,9 +584,13 @@
 			while(!__atomic_load_n(&ring.done, __ATOMIC_SEQ_CST)) {
 
+				__atomic_store_n( &ring.poller.slow.blocked, true, __ATOMIC_SEQ_CST );
+
 				// In the user-thread approach drain and if anything was drained,
 				// batton pass to the user-thread
 				int count;
 				bool again;
-				[count, again] = __drain_io( ring, &mask, 0, true );
+				[count, again] = __drain_io( ring, &mask, 1, true );
+
+				__atomic_store_n( &ring.poller.slow.blocked, false, __ATOMIC_SEQ_CST );
 
 				// Update statistics
@@ -667,6 +675,8 @@
 	static inline void __wake_poller( struct __io_data & ring ) __attribute__((artificial));
 	static inline void __wake_poller( struct __io_data & ring ) {
-		// sigval val = { 1 };
-		// pthread_sigqueue( ring.poller.slow.kthrd, SIGUSR1, val );
+		if(!__atomic_load_n( &ring.poller.slow.blocked, __ATOMIC_SEQ_CST)) return;
+
+		sigval val = { 1 };
+		pthread_sigqueue( ring.poller.slow.kthrd, SIGUSR1, val );
 	}
 
@@ -732,7 +742,10 @@
 			__attribute((unused)) int block = 0;
 			uint32_t expected = -1ul32;
-			LOOKING: for(;;) {
+			uint32_t ready_mask = ring.submit_q.ready_cnt - 1;
+			uint32_t off = __tls_rand();
+			LOOKING: for() {
 				for(i; ring.submit_q.ready_cnt) {
-					if( __atomic_compare_exchange_n( &ring.submit_q.ready[i], &expected, idx, true, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED ) ) {
+					uint32_t ii = (i + off) & ready_mask;
+					if( __atomic_compare_exchange_n( &ring.submit_q.ready[ii], &expected, idx, true, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED ) ) {
 						break LOOKING;
 					}
