Index: libcfa/src/concurrency/io.cfa
===================================================================
--- libcfa/src/concurrency/io.cfa	(revision 8f01ad7153c7eaa08ba052530901201317088623)
+++ libcfa/src/concurrency/io.cfa	(revision ff7b2deefe55d515e51204c5b9dfe98a7c925ced)
@@ -42,4 +42,5 @@
 	#include "kernel/fwd.hfa"
 	#include "kernel/private.hfa"
+	#include "kernel/cluster.hfa"
 	#include "io/types.hfa"
 
@@ -93,5 +94,5 @@
 	extern void __kernel_unpark( thread$ * thrd, unpark_hint );
 
-	bool __cfa_io_drain( $io_context * ctx ) {
+	bool __cfa_io_drain( $io_context * ctx, cluster * cltr ) {
 		/* paranoid */ verify( ! __preemption_enabled() );
 		/* paranoid */ verify( ready_schedule_islocked() );
@@ -112,4 +113,6 @@
 		}
 
+		unsigned long long ts_prev = ctx->cq.ts;
+
 		for(i; count) {
 			unsigned idx = (head + i) & mask;
@@ -125,4 +128,5 @@
 
 		__cfadbg_print_safe(io, "Kernel I/O : %u completed\n", count);
+		unsigned long long ts_next = ctx->cq.ts = rdtscl();
 
 		// Mark to the kernel that the cqe has been seen
@@ -134,4 +138,6 @@
 
 		__atomic_unlock(&ctx->cq.lock);
+
+		touch_tsc( cltr->sched.io.tscs, ctx->cq.id, ts_prev, ts_next );
 
 		return true;
@@ -143,5 +149,5 @@
 		/* paranoid */ verify( proc->io.ctx );
 
-		__attribute__((unused)) cluster * cltr = proc->cltr;
+		cluster * cltr = proc->cltr;
 		$io_context & ctx = *proc->io.ctx;
 
@@ -183,5 +189,5 @@
 
 		ready_schedule_lock();
-		bool ret = __cfa_io_drain( &ctx );
+		bool ret = __cfa_io_drain( &ctx, cltr );
 		ready_schedule_unlock();
 		return ret;
Index: libcfa/src/concurrency/io/setup.cfa
===================================================================
--- libcfa/src/concurrency/io/setup.cfa	(revision 8f01ad7153c7eaa08ba052530901201317088623)
+++ libcfa/src/concurrency/io/setup.cfa	(revision ff7b2deefe55d515e51204c5b9dfe98a7c925ced)
@@ -60,4 +60,5 @@
 	#include "fstream.hfa"
 	#include "kernel/private.hfa"
+	#include "limits.hfa"
 	#include "thread.hfa"
 #pragma GCC diagnostic pop
@@ -215,4 +216,6 @@
 		// completion queue
 		cq.lock      = 0;
+		cq.id        = MAX;
+		cq.ts        = rdtscl();
 		cq.head      = (volatile __u32 *)(((intptr_t)cq.ring_ptr) + params.cq_off.head);
 		cq.tail      = (volatile __u32 *)(((intptr_t)cq.ring_ptr) + params.cq_off.tail);
Index: libcfa/src/concurrency/io/types.hfa
===================================================================
--- libcfa/src/concurrency/io/types.hfa	(revision 8f01ad7153c7eaa08ba052530901201317088623)
+++ libcfa/src/concurrency/io/types.hfa	(revision ff7b2deefe55d515e51204c5b9dfe98a7c925ced)
@@ -79,4 +79,8 @@
 	struct __cmp_ring_t {
 		volatile bool lock;
+
+		unsigned id;
+
+		unsigned long long ts;
 
 		// Head and tail of the ring
Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision 8f01ad7153c7eaa08ba052530901201317088623)
+++ libcfa/src/concurrency/kernel.cfa	(revision ff7b2deefe55d515e51204c5b9dfe98a7c925ced)
@@ -136,5 +136,5 @@
 static void mark_awake(__cluster_proc_list & idles, processor & proc);
 
-extern bool __cfa_io_drain( $io_context * );
+extern bool __cfa_io_drain( $io_context *, cluster * cltr );
 extern bool __cfa_io_flush( processor *, int min_comp );
 static inline bool __maybe_io_drain( processor * );
@@ -839,5 +839,5 @@
 		if(head == tail) return false;
 		ready_schedule_lock();
-		ret = __cfa_io_drain( ctx );
+		ret = __cfa_io_drain( ctx, proc->cltr );
 		ready_schedule_unlock();
 	#endif
Index: libcfa/src/concurrency/kernel.hfa
===================================================================
--- libcfa/src/concurrency/kernel.hfa	(revision 8f01ad7153c7eaa08ba052530901201317088623)
+++ libcfa/src/concurrency/kernel.hfa	(revision ff7b2deefe55d515e51204c5b9dfe98a7c925ced)
@@ -108,5 +108,4 @@
 	struct {
 		$io_context * ctx;
-		unsigned id;
 		unsigned target;
 		volatile bool pending;
Index: libcfa/src/concurrency/kernel/cluster.cfa
===================================================================
--- libcfa/src/concurrency/kernel/cluster.cfa	(revision 8f01ad7153c7eaa08ba052530901201317088623)
+++ libcfa/src/concurrency/kernel/cluster.cfa	(revision ff7b2deefe55d515e51204c5b9dfe98a7c925ced)
@@ -27,4 +27,5 @@
 
 #include "ready_subqueue.hfa"
+#include "io/types.hfa"
 
 #include <errno.h>
@@ -259,5 +260,5 @@
 		it->rdq.id = valrq;
 		it->rdq.target = MAX;
-		it->io.id = valio;
+		it->io.ctx->cq.id = valio;
 		it->io.target = MAX;
 		valrq += __shard_factor.readyq;
@@ -278,6 +279,6 @@
 	while(it) {
 		/* paranoid */ verifyf( it, "Unexpected null iterator\n");
-		/* paranoid */ verifyf( it->io.id < count, "Processor %p has id %u above count %zu\n", it, it->rdq.id, count);
-		data[it->io.id] = it->io.ctx;
+		/* paranoid */ verifyf( it->io.ctx->cq.id < count, "Processor %p has id %u above count %zu\n", it, it->rdq.id, count);
+		data[it->io.ctx->cq.id] = it->io.ctx;
 		it = &(*it)`next;
 	}
Index: libcfa/src/concurrency/kernel/cluster.hfa
===================================================================
--- libcfa/src/concurrency/kernel/cluster.hfa	(revision 8f01ad7153c7eaa08ba052530901201317088623)
+++ libcfa/src/concurrency/kernel/cluster.hfa	(revision ff7b2deefe55d515e51204c5b9dfe98a7c925ced)
@@ -16,5 +16,8 @@
 #pragma once
 
+#include "device/cpu.hfa"
 #include "kernel/private.hfa"
+
+#include "limits.hfa"
 
 //-----------------------------------------------------------------------
@@ -31,4 +34,12 @@
 	const unsigned long long ret = ((new_weight * new_val) + (old_weight * old_avg)) / total_weight;
 	return ret;
+}
+
+static inline void touch_tsc(__timestamp_t * tscs, size_t idx, unsigned long long ts_prev, unsigned long long ts_next) {
+	if (ts_next == MAX) return;
+	unsigned long long now = rdtscl();
+	unsigned long long pma = __atomic_load_n(&tscs[ idx ].ma, __ATOMIC_RELAXED);
+	__atomic_store_n(&tscs[ idx ].tv, ts_next, __ATOMIC_RELAXED);
+	__atomic_store_n(&tscs[ idx ].ma, moving_average(now, ts_prev, pma), __ATOMIC_RELAXED);
 }
 
Index: libcfa/src/concurrency/kernel/startup.cfa
===================================================================
--- libcfa/src/concurrency/kernel/startup.cfa	(revision 8f01ad7153c7eaa08ba052530901201317088623)
+++ libcfa/src/concurrency/kernel/startup.cfa	(revision ff7b2deefe55d515e51204c5b9dfe98a7c925ced)
@@ -233,6 +233,6 @@
 	/* paranoid */ verify( sizeof(storage_mainIdleEventFd) == sizeof(eventfd_t) );
 
+	__cfa_io_start( mainProcessor );
 	register_tls( mainProcessor );
-	__cfa_io_start( mainProcessor );
 
 	// Start by initializing the main thread
@@ -314,6 +314,6 @@
 	mainProcessor->local_data = 0p;
 
+	unregister_tls( mainProcessor );
 	__cfa_io_stop( mainProcessor );
-	unregister_tls( mainProcessor );
 
 	// Destroy the main processor and its context in reverse order of construction
@@ -364,7 +364,6 @@
 	proc->local_data = &__cfaabi_tls;
 
+	__cfa_io_start( proc );
 	register_tls( proc );
-
-	__cfa_io_start( proc );
 
 	// used for idle sleep when io_uring is present
@@ -401,6 +400,4 @@
 	// Main routine of the core returned, the core is now fully terminated
 	__cfadbg_print_safe(runtime_core, "Kernel : core %p main ended (%p)\n", proc, &proc->runner);
-
-	__cfa_io_stop( proc );
 
 	#if !defined(__CFA_NO_STATISTICS__)
@@ -417,4 +414,5 @@
 
 	unregister_tls( proc );
+	__cfa_io_stop( proc );
 
 	return 0p;
Index: libcfa/src/concurrency/ready_queue.cfa
===================================================================
--- libcfa/src/concurrency/ready_queue.cfa	(revision 8f01ad7153c7eaa08ba052530901201317088623)
+++ libcfa/src/concurrency/ready_queue.cfa	(revision ff7b2deefe55d515e51204c5b9dfe98a7c925ced)
@@ -26,6 +26,4 @@
 #include "kernel/cluster.hfa"
 #include "kernel/private.hfa"
-
-#include "limits.hfa"
 
 // #include <errno.h>
@@ -202,10 +200,10 @@
 	// Actually pop the list
 	struct thread$ * thrd;
-	unsigned long long tsc_before = ts(lane);
-	unsigned long long tsv;
-	[thrd, tsv] = pop(lane);
+	unsigned long long ts_prev = ts(lane);
+	unsigned long long ts_next;
+	[thrd, ts_next] = pop(lane);
 
 	/* paranoid */ verify(thrd);
-	/* paranoid */ verify(tsv);
+	/* paranoid */ verify(ts_next);
 	/* paranoid */ verify(lane.lock);
 
@@ -216,10 +214,5 @@
 	__STATS( stats.success++; )
 
-	if (tsv != MAX) {
-		unsigned long long now = rdtscl();
-		unsigned long long pma = __atomic_load_n(&readyQ.tscs[w].ma, __ATOMIC_RELAXED);
-		__atomic_store_n(&readyQ.tscs[w].tv, tsv, __ATOMIC_RELAXED);
-		__atomic_store_n(&readyQ.tscs[w].ma, moving_average(now, tsc_before, pma), __ATOMIC_RELAXED);
-	}
+	touch_tsc(readyQ.tscs, w, ts_prev, ts_next);
 
 	thrd->preferred = w / __shard_factor.readyq;
