Index: benchmark/readyQ/transfer.cfa
===================================================================
--- benchmark/readyQ/transfer.cfa	(revision b5f17e14065831c1bd56680005fa1f733098d29f)
+++ benchmark/readyQ/transfer.cfa	(revision 038a0bd82f1bc2ad25f823b8d9291fb043a0a201)
@@ -1,4 +1,5 @@
 #include "rq_bench.hfa"
 #include <fstream.hfa>
+#include <locale.h>
 
 Duration default_preemption() {
@@ -8,5 +9,5 @@
 #define PRINT(...)
 
-__lehmer64_state_t lead_seed;
+__uint128_t lead_seed;
 volatile unsigned leader;
 volatile size_t lead_idx;
@@ -68,5 +69,5 @@
 	waitgroup();
 
-	unsigned nleader = __lehmer64( lead_seed ) % nthreads;
+	unsigned nleader = lehmer64( lead_seed ) % nthreads;
 	__atomic_store_n( &leader, nleader, __ATOMIC_SEQ_CST );
 
@@ -105,6 +106,6 @@
 // ==================================================
 int main(int argc, char * argv[]) {
-	__lehmer64_state_t lead_seed = getpid();
-	for(10) __lehmer64( lead_seed );
+	uint64_t lead_seed = getpid();
+	for(10) lehmer64( lead_seed );
 	unsigned nprocs = 2;
 
@@ -126,5 +127,5 @@
 
 	lead_idx = 0;
-	leader = __lehmer64( lead_seed ) % nthreads;
+	leader = lehmer64( lead_seed ) % nthreads;
 
 	size_t rechecks = 0;
@@ -167,4 +168,5 @@
 	}
 
+	setlocale( LC_NUMERIC, getenv( "LANG" ) );
 	sout | "Duration (ms)           : " | ws(3, 3, unit(eng((end - start)`dms)));
 	sout | "Number of processors    : " | nprocs;
Index: libcfa/src/common.hfa
===================================================================
--- libcfa/src/common.hfa	(revision b5f17e14065831c1bd56680005fa1f733098d29f)
+++ libcfa/src/common.hfa	(revision 038a0bd82f1bc2ad25f823b8d9291fb043a0a201)
@@ -1,10 +1,10 @@
-// 
+//
 // Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
 //
 // The contents of this file are covered under the licence agreement in the
 // file "LICENCE" distributed with Cforall.
-// 
-// common -- 
-// 
+//
+// common.hfa --
+//
 // Author           : Peter A. Buhr
 // Created On       : Wed Jul 11 17:54:36 2018
@@ -12,5 +12,5 @@
 // Last Modified On : Wed May  5 14:02:04 2021
 // Update Count     : 18
-// 
+//
 
 #pragma once
Index: libcfa/src/concurrency/io.cfa
===================================================================
--- libcfa/src/concurrency/io.cfa	(revision b5f17e14065831c1bd56680005fa1f733098d29f)
+++ libcfa/src/concurrency/io.cfa	(revision 038a0bd82f1bc2ad25f823b8d9291fb043a0a201)
@@ -144,34 +144,38 @@
 		__ioarbiter_flush( ctx );
 
-		__STATS__( true, io.calls.flush++; )
-		int ret = syscall( __NR_io_uring_enter, ctx.fd, ctx.sq.to_submit, min_comp, min_comp > 0 ? IORING_ENTER_GETEVENTS : 0, (sigset_t *)0p, _NSIG / 8);
-		if( ret < 0 ) {
-			switch((int)errno) {
-			case EAGAIN:
-			case EINTR:
-			case EBUSY:
-				// Update statistics
-				__STATS__( false, io.calls.errors.busy ++; )
-				return false;
-			default:
-				abort( "KERNEL ERROR: IO_URING SYSCALL - (%d) %s\n", (int)errno, strerror(errno) );
+		if(ctx.sq.to_submit != 0 || min_comp > 0) {
+
+			__STATS__( true, io.calls.flush++; )
+			int ret = syscall( __NR_io_uring_enter, ctx.fd, ctx.sq.to_submit, min_comp, min_comp > 0 ? IORING_ENTER_GETEVENTS : 0, (sigset_t *)0p, _NSIG / 8);
+			if( ret < 0 ) {
+				switch((int)errno) {
+				case EAGAIN:
+				case EINTR:
+				case EBUSY:
+					// Update statistics
+					__STATS__( false, io.calls.errors.busy ++; )
+					return false;
+				default:
+					abort( "KERNEL ERROR: IO_URING SYSCALL - (%d) %s\n", (int)errno, strerror(errno) );
+				}
 			}
-		}
-
-		__cfadbg_print_safe(io, "Kernel I/O : %u submitted to io_uring %d\n", ret, ctx.fd);
-		__STATS__( true, io.calls.submitted += ret; )
-		/* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num );
-		/* paranoid */ verify( ctx.sq.to_submit >= ret );
-
-		ctx.sq.to_submit -= ret;
-
-		/* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num );
-
-		// Release the consumed SQEs
-		__release_sqes( ctx );
-
-		/* paranoid */ verify( ! __preemption_enabled() );
-
-		ctx.proc->io.pending = false;
+
+			__cfadbg_print_safe(io, "Kernel I/O : %u submitted to io_uring %d\n", ret, ctx.fd);
+			__STATS__( true, io.calls.submitted += ret; )
+			/* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num );
+			/* paranoid */ verify( ctx.sq.to_submit >= ret );
+
+			ctx.sq.to_submit -= ret;
+
+			/* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num );
+
+			// Release the consumed SQEs
+			__release_sqes( ctx );
+
+			/* paranoid */ verify( ! __preemption_enabled() );
+
+			ctx.proc->io.pending = false;
+		}
+
 		ready_schedule_lock();
 		bool ret = __cfa_io_drain( proc );
Index: libcfa/src/concurrency/kernel.hfa
===================================================================
--- libcfa/src/concurrency/kernel.hfa	(revision b5f17e14065831c1bd56680005fa1f733098d29f)
+++ libcfa/src/concurrency/kernel.hfa	(revision 038a0bd82f1bc2ad25f823b8d9291fb043a0a201)
@@ -68,5 +68,4 @@
 		unsigned last;
 		signed   cpu;
-		// unsigned long long int cutoff;
 	} rdq;
 
@@ -154,5 +153,5 @@
 };
 
-struct __attribute__((aligned(128))) __cache_id_t {
+struct __attribute__((aligned(16))) __cache_id_t {
 	volatile unsigned id;
 };
Index: libcfa/src/concurrency/ready_queue.cfa
===================================================================
--- libcfa/src/concurrency/ready_queue.cfa	(revision b5f17e14065831c1bd56680005fa1f733098d29f)
+++ libcfa/src/concurrency/ready_queue.cfa	(revision 038a0bd82f1bc2ad25f823b8d9291fb043a0a201)
@@ -303,9 +303,4 @@
 			lanes.help[idx].dst = 0;
 			lanes.help[idx].tri = 0;
-		}
-
-		caches = alloc( cpu_info.llc_count );
-		for( idx; (size_t)cpu_info.llc_count ) {
-			(caches[idx]){};
 		}
 	#else
@@ -404,5 +399,9 @@
 		/* paranoid */ verify(cpu < cpu_info.hthrd_count);
 		unsigned this_cache = cpu_info.llc_map[cpu].cache;
-		__atomic_store_n(&lanes.caches[this / READYQ_SHARD_FACTOR].id, this_cache, __ATOMIC_RELAXED);
+
+		// Super important: don't write the same value over and over again
+		// We want to maximise our chances that his particular values stays in cache
+		if(lanes.caches[this / READYQ_SHARD_FACTOR].id != this_cache)
+			__atomic_store_n(&lanes.caches[this / READYQ_SHARD_FACTOR].id, this_cache, __ATOMIC_RELAXED);
 
 		const unsigned long long ctsc = rdtscl();
@@ -506,22 +505,4 @@
 	}
 
-	static inline int pop_getcpu(processor * proc, __ready_queue_caches_t * caches) {
-		const int prv = proc->rdq.cpu;
-		const int cpu = __kernel_getcpu();
-		if( prv != proc->rdq.cpu ) {
-			unsigned pidx = cpu_info.llc_map[prv].cache;
-			/* paranoid */ verify(pidx < cpu_info.llc_count);
-
-			unsigned nidx = cpu_info.llc_map[cpu].cache;
-			/* paranoid */ verify(pidx < cpu_info.llc_count);
-
-			depart(caches[pidx]);
-			arrive(caches[nidx]);
-
-			__STATS( /* cpu migs++ */ )
-		}
-		return proc->rdq.cpu = cpu;
-	}
-
 	// Pop from the ready queue from a given cluster
 	__attribute__((hot)) thread$ * pop_fast(struct cluster * cltr) with (cltr->ready_queue) {
@@ -530,6 +511,5 @@
 
 		processor * const proc = kernelTLS().this_processor;
-		const int cpu = pop_getcpu( proc, caches );
-		// const int cpu = __kernel_getcpu();
+		const int cpu = __kernel_getcpu();
 		/* paranoid */ verify(cpu >= 0);
 		/* paranoid */ verify(cpu < cpu_info.hthrd_count);
@@ -548,5 +528,5 @@
 			unsigned long long max = 0;
 			for(i; READYQ_SHARD_FACTOR) {
-				unsigned long long tsc = moving_average(ctsc - ts(lanes.data[start + i]), lanes.tscs[start + i].ma);
+				unsigned long long tsc = moving_average(ctsc, ts(lanes.data[start + i]), lanes.tscs[start + i].ma);
 				if(tsc > max) max = tsc;
 			}
@@ -569,5 +549,5 @@
 			unsigned long long max = 0;
 			for(i; READYQ_SHARD_FACTOR) {
-				unsigned long long tsc = moving_average(ctsc - ts(lanes.data[start + i]), lanes.tscs[start + i].ma);
+				unsigned long long tsc = moving_average(ctsc, ts(lanes.data[start + i]), lanes.tscs[start + i].ma);
 				if(tsc > max) max = tsc;
 			}
@@ -577,6 +557,5 @@
 				proc->rdq.target = MAX;
 				lanes.help[target / READYQ_SHARD_FACTOR].tri++;
-				if(moving_average(ctsc - lanes.tscs[target].tv, lanes.tscs[target].ma) > cutoff) {
-					__STATS( __tls_stats()->ready.pop.helped[target]++; )
+				if(moving_average(ctsc, lanes.tscs[target].tv, lanes.tscs[target].ma) > cutoff) {
 					thread$ * t = try_pop(cltr, target __STATS(, __tls_stats()->ready.pop.help));
 					proc->rdq.last = target;
@@ -587,6 +566,5 @@
 
 			unsigned last = proc->rdq.last;
-			if(last != MAX && moving_average(ctsc - lanes.tscs[last].tv, lanes.tscs[last].ma) > cutoff) {
-				__STATS( __tls_stats()->ready.pop.helped[last]++; )
+			if(last != MAX && moving_average(ctsc, lanes.tscs[last].tv, lanes.tscs[last].ma) > cutoff) {
 				thread$ * t = try_pop(cltr, last __STATS(, __tls_stats()->ready.pop.help));
 				if(t) return t;
Index: src/AST/Decl.hpp
===================================================================
--- src/AST/Decl.hpp	(revision b5f17e14065831c1bd56680005fa1f733098d29f)
+++ src/AST/Decl.hpp	(revision 038a0bd82f1bc2ad25f823b8d9291fb043a0a201)
@@ -34,5 +34,5 @@
 // Must be included in *all* AST classes; should be #undef'd at the end of the file
 #define MUTATE_FRIEND \
-    template<typename node_t> friend node_t * mutate(const node_t * node); \
+	template<typename node_t> friend node_t * mutate(const node_t * node); \
 	template<typename node_t> friend node_t * shallowCopy(const node_t * node);
 
@@ -135,5 +135,4 @@
 	std::vector< ptr<Expr> > withExprs;
 
-
 	FunctionDecl( const CodeLocation & loc, const std::string & name, std::vector<ptr<TypeDecl>>&& forall,
 		std::vector<ptr<DeclWithType>>&& params, std::vector<ptr<DeclWithType>>&& returns,
Index: src/AST/Eval.hpp
===================================================================
--- src/AST/Eval.hpp	(revision b5f17e14065831c1bd56680005fa1f733098d29f)
+++ src/AST/Eval.hpp	(revision 038a0bd82f1bc2ad25f823b8d9291fb043a0a201)
@@ -24,6 +24,6 @@
 template< typename... Args >
 UntypedExpr * call( const CodeLocation & loc, const std::string & name, Args &&... args ) {
-	return new UntypedExpr { 
-		loc, new NameExpr { loc, name }, 
+	return new UntypedExpr {
+		loc, new NameExpr { loc, name },
 		std::vector< ptr< Expr > > { std::forward< Args >( args )... } };
 }
Index: src/Validate/InitializerLength.cpp
===================================================================
--- src/Validate/InitializerLength.cpp	(revision b5f17e14065831c1bd56680005fa1f733098d29f)
+++ src/Validate/InitializerLength.cpp	(revision 038a0bd82f1bc2ad25f823b8d9291fb043a0a201)
@@ -14,5 +14,5 @@
 //
 
-//#include "InitializerLength.hpp"
+#include "InitializerLength.hpp"
 
 #include "AST/Expr.hpp"
Index: src/Validate/InitializerLength.hpp
===================================================================
--- src/Validate/InitializerLength.hpp	(revision b5f17e14065831c1bd56680005fa1f733098d29f)
+++ src/Validate/InitializerLength.hpp	(revision 038a0bd82f1bc2ad25f823b8d9291fb043a0a201)
@@ -14,4 +14,10 @@
 //
 
+#pragma once
+
+namespace ast {
+	class TranslationUnit;
+}
+
 namespace Validate {
 
