Index: libcfa/src/concurrency/kernel/cluster.cfa
===================================================================
--- libcfa/src/concurrency/kernel/cluster.cfa	(revision fc2c57a9f61b0ffd65628b4d2452ccb17ec5fc8a)
+++ libcfa/src/concurrency/kernel/cluster.cfa	(revision c4c8571a1aaa3100966db2d603b3a32857faebdd)
@@ -229,14 +229,14 @@
 			for( idx ; lanes_count ) {
 				__intrusive_lane_t & sl = readyQ.data[idx];
-				assert(!readyQ.data[idx].lock);
+				assert(!readyQ.data[idx].l.lock);
 
 					if(is_empty(sl)) {
-						assert( sl.anchor.next == 0p );
-						assert( sl.anchor.ts   == MAX );
-						assert( mock_head(sl)  == sl.prev );
+						assert( sl.l.anchor.next == 0p );
+						assert( sl.l.anchor.ts   == MAX );
+						assert( mock_head(sl)  == sl.l.prev );
 					} else {
-						assert( sl.anchor.next != 0p );
-						assert( sl.anchor.ts   != MAX );
-						assert( mock_head(sl)  != sl.prev );
+						assert( sl.l.anchor.next != 0p );
+						assert( sl.l.anchor.ts   != MAX );
+						assert( mock_head(sl)  != sl.l.prev );
 					}
 			}
@@ -249,6 +249,6 @@
 static inline void fix(__intrusive_lane_t & ll) {
 	if(is_empty(ll)) {
-		verify(ll.anchor.next == 0p);
-		ll.prev = mock_head(ll);
+		verify(ll.l.anchor.next == 0p);
+		ll.l.prev = mock_head(ll);
 	}
 }
@@ -299,6 +299,6 @@
 	tscs = alloc(count, tscs`realloc);
 	for(i; count) {
-		tscs[i].tv = rdtscl();
-		tscs[i].ma = 0;
+		tscs[i].t.tv = rdtscl();
+		tscs[i].t.ma = 0;
 	}
 }
@@ -400,5 +400,5 @@
 		for( idx; ncount ~ ocount) {
 			// Lock is not strictly needed but makes checking invariants much easier
-			__attribute__((unused)) bool locked = __atomic_try_acquire(&readyQ.data[idx].lock);
+			__attribute__((unused)) bool locked = __atomic_try_acquire(&readyQ.data[idx].l.lock);
 			verify(locked);
 
@@ -418,5 +418,5 @@
 
 			// Unlock the lane
-			__atomic_unlock(&readyQ.data[idx].lock);
+			__atomic_unlock(&readyQ.data[idx].l.lock);
 
 			// TODO print the queue statistics here
@@ -467,34 +467,38 @@
 }
 
+#define nested_offsetof(type, field) ((off_t)(&(((type*)0)-> field)))
+
 // Ctor
 void ?{}( __intrusive_lane_t & this ) {
-	this.lock = false;
-	this.prev = mock_head(this);
-	this.anchor.next = 0p;
-	this.anchor.ts   = MAX;
+	this.l.lock = false;
+	this.l.prev = mock_head(this);
+	this.l.anchor.next = 0p;
+	this.l.anchor.ts   = MAX;
 	#if !defined(__CFA_NO_STATISTICS__)
-		this.cnt  = 0;
+		this.l.cnt  = 0;
 	#endif
 
 	// We add a boat-load of assertions here because the anchor code is very fragile
-	/* paranoid */ _Static_assert( offsetof( thread$, link ) == offsetof(__intrusive_lane_t, anchor) );
-	/* paranoid */ verify( offsetof( thread$, link ) == offsetof(__intrusive_lane_t, anchor) );
-	/* paranoid */ verify( ((uintptr_t)( mock_head(this) ) + offsetof( thread$, link )) == (uintptr_t)(&this.anchor) );
-	/* paranoid */ verify( &mock_head(this)->link.next == &this.anchor.next );
-	/* paranoid */ verify( &mock_head(this)->link.ts   == &this.anchor.ts   );
+	/* paranoid */ _Static_assert( offsetof( thread$, link ) == nested_offsetof(__intrusive_lane_t, l.anchor) );
+	/* paranoid */ verify( offsetof( thread$, link ) == nested_offsetof(__intrusive_lane_t, l.anchor) );
+	/* paranoid */ verify( ((uintptr_t)( mock_head(this) ) + offsetof( thread$, link )) == (uintptr_t)(&this.l.anchor) );
+	/* paranoid */ verify( &mock_head(this)->link.next == &this.l.anchor.next );
+	/* paranoid */ verify( &mock_head(this)->link.ts   == &this.l.anchor.ts   );
 	/* paranoid */ verify( mock_head(this)->link.next == 0p );
 	/* paranoid */ verify( mock_head(this)->link.ts   == MAX );
-	/* paranoid */ verify( mock_head(this) == this.prev );
-	/* paranoid */ verify( __alignof__(__intrusive_lane_t) == 128 );
-	/* paranoid */ verify( __alignof__(this) == 128 );
-	/* paranoid */ verifyf( ((intptr_t)(&this) % 128) == 0, "Expected address to be aligned %p %% 128 == %zd", &this, ((intptr_t)(&this) % 128) );
-}
+	/* paranoid */ verify( mock_head(this) == this.l.prev );
+	/* paranoid */ verify( __alignof__(__intrusive_lane_t) == 64 );
+	/* paranoid */ verify( __alignof__(this) == 64 );
+	/* paranoid */ verifyf( ((intptr_t)(&this) % 64) == 0, "Expected address to be aligned %p %% 64 == %zd", &this, ((intptr_t)(&this) % 64) );
+}
+
+#undef nested_offsetof
 
 // Dtor is trivial
 void ^?{}( __intrusive_lane_t & this ) {
 	// Make sure the list is empty
-	/* paranoid */ verify( this.anchor.next == 0p );
-	/* paranoid */ verify( this.anchor.ts   == MAX );
-	/* paranoid */ verify( mock_head(this)  == this.prev );
+	/* paranoid */ verify( this.l.anchor.next == 0p );
+	/* paranoid */ verify( this.l.anchor.ts   == MAX );
+	/* paranoid */ verify( mock_head(this)    == this.l.prev );
 }
 
Index: libcfa/src/concurrency/kernel/cluster.hfa
===================================================================
--- libcfa/src/concurrency/kernel/cluster.hfa	(revision fc2c57a9f61b0ffd65628b4d2452ccb17ec5fc8a)
+++ libcfa/src/concurrency/kernel/cluster.hfa	(revision c4c8571a1aaa3100966db2d603b3a32857faebdd)
@@ -39,7 +39,7 @@
 	if (ts_next == ULLONG_MAX) return;
 	unsigned long long now = rdtscl();
-	unsigned long long pma = __atomic_load_n(&tscs[ idx ].ma, __ATOMIC_RELAXED);
-	__atomic_store_n(&tscs[ idx ].tv, ts_next, __ATOMIC_RELAXED);
-	__atomic_store_n(&tscs[ idx ].ma, moving_average(now, ts_prev, pma), __ATOMIC_RELAXED);
+	unsigned long long pma = __atomic_load_n(&tscs[ idx ].t.ma, __ATOMIC_RELAXED);
+	__atomic_store_n(&tscs[ idx ].t.tv, ts_next, __ATOMIC_RELAXED);
+	__atomic_store_n(&tscs[ idx ].t.ma, moving_average(now, ts_prev, pma), __ATOMIC_RELAXED);
 }
 
@@ -61,5 +61,5 @@
 		if(ptsc != ULLONG_MAX) {
 			/* paranoid */ verify( start + i < count );
-			unsigned long long tsc = moving_average(ctsc, ptsc, tscs[start + i].ma);
+			unsigned long long tsc = moving_average(ctsc, ptsc, tscs[start + i].t.ma);
 			if(tsc > max) max = tsc;
 		}
Index: libcfa/src/concurrency/kernel/fwd.hfa
===================================================================
--- libcfa/src/concurrency/kernel/fwd.hfa	(revision fc2c57a9f61b0ffd65628b4d2452ccb17ec5fc8a)
+++ libcfa/src/concurrency/kernel/fwd.hfa	(revision c4c8571a1aaa3100966db2d603b3a32857faebdd)
@@ -35,5 +35,5 @@
 extern "C" {
 	extern "Cforall" {
-		extern __attribute__((aligned(128))) thread_local struct KernelThreadData {
+		extern __attribute__((aligned(64))) thread_local struct KernelThreadData {
 			struct thread$          * volatile this_thread;
 			struct processor        * volatile this_processor;
Index: libcfa/src/concurrency/kernel/private.hfa
===================================================================
--- libcfa/src/concurrency/kernel/private.hfa	(revision fc2c57a9f61b0ffd65628b4d2452ccb17ec5fc8a)
+++ libcfa/src/concurrency/kernel/private.hfa	(revision c4c8571a1aaa3100966db2d603b3a32857faebdd)
@@ -88,5 +88,5 @@
 #elif defined(CFA_HAVE_LINUX_RSEQ_H)
 	extern "Cforall" {
-		extern __attribute__((aligned(128))) thread_local volatile struct rseq __cfaabi_rseq;
+		extern __attribute__((aligned(64))) thread_local volatile struct rseq __cfaabi_rseq;
 	}
 #else
Index: libcfa/src/concurrency/kernel/startup.cfa
===================================================================
--- libcfa/src/concurrency/kernel/startup.cfa	(revision fc2c57a9f61b0ffd65628b4d2452ccb17ec5fc8a)
+++ libcfa/src/concurrency/kernel/startup.cfa	(revision c4c8571a1aaa3100966db2d603b3a32857faebdd)
@@ -152,5 +152,5 @@
 #elif defined(CFA_HAVE_LINUX_RSEQ_H)
 	extern "Cforall" {
-		__attribute__((aligned(128))) thread_local volatile struct rseq __cfaabi_rseq @= {
+		__attribute__((aligned(64))) thread_local volatile struct rseq __cfaabi_rseq @= {
 			.cpu_id : RSEQ_CPU_ID_UNINITIALIZED,
 		};
