Index: libcfa/src/concurrency/coroutine.cfa
===================================================================
--- libcfa/src/concurrency/coroutine.cfa	(revision 95472ee122aac2bc6a4be39aef8393326cc0aeb6)
+++ libcfa/src/concurrency/coroutine.cfa	(revision af4487d91ae84341a1ef6af963e9caf724f57d50)
@@ -215,8 +215,4 @@
 		return cor;
 	}
-
-	struct $coroutine * __cfactx_cor_active(void) {
-		return active_coroutine();
-	}
 }
 
Index: libcfa/src/concurrency/exception.cfa
===================================================================
--- libcfa/src/concurrency/exception.cfa	(revision af4487d91ae84341a1ef6af963e9caf724f57d50)
+++ libcfa/src/concurrency/exception.cfa	(revision af4487d91ae84341a1ef6af963e9caf724f57d50)
@@ -0,0 +1,91 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// exception.cfa -- Exceptions in a concurrent environment.
+//
+// Author           : Andrew Beach
+// Created On       : Mon Aug 17 10:41:00 2020
+// Last Modified By : Andrew Beach
+// Last Modified On : Tue Aug 25 14:41:00 2020
+// Update Count     : 0
+//
+
+extern "C" {
+// use this define to make unwind.h play nice, definitely a hack
+#define HIDE_EXPORTS
+#include <unwind.h>
+#undef HIDE_EXPORTS
+}
+
+#include "invoke.h"
+#include "exception.hfa"
+#include "coroutine.hfa"
+
+extern struct $thread * mainThread;
+
+// Common pattern for all the stop functions, wait until the end then act.
+#define STOP_AT_END_FUNCTION(NAME, ...) \
+static _Unwind_Reason_Code NAME( \
+		int version, \
+		_Unwind_Action actions, \
+		_Unwind_Exception_Class exception_class, \
+		struct _Unwind_Exception * unwind_exception, \
+		struct _Unwind_Context * unwind_context, \
+		void * stop_param) { \
+	verify(actions & _UA_CLEANUP_PHASE); \
+	verify(actions & _UA_FORCE_UNWIND); \
+	verify(!(actions & _UA_SEARCH_PHASE)); \
+	verify(!(actions & _UA_HANDLER_FRAME)); \
+	if ( actions & _UA_END_OF_STACK ) { \
+		__VA_ARGS__ \
+	} else { \
+		return _URC_NO_REASON; \
+	} \
+}
+
+STOP_AT_END_FUNCTION(main_cancelstop,
+	abort();
+)
+
+STOP_AT_END_FUNCTION(thread_cancelstop,
+	// TODO: Instead pass information to the joiner.
+	abort();
+)
+
+STOP_AT_END_FUNCTION(coroutine_cancelstop,
+	// TODO: Instead pass information to the last resumer.
+	abort();
+)
+
+extern "C" {
+
+struct exception_context_t * this_exception_context(void) {
+	return &__get_stack( active_coroutine() )->exception_context;
+}
+
+_Unwind_Reason_Code __cfaehm_cancellation_unwind( struct _Unwind_Exception * unwind_exception ) {
+	_Unwind_Stop_Fn stop_func;
+	void * stop_param;
+
+	struct $thread * this_thread = TL_GET( this_thread );
+	if ( &this_thread->self_cor != this_thread->curr_cor ) {
+		struct $coroutine * cor = this_thread->curr_cor;
+		cor->cancellation = unwind_exception;
+
+		stop_func = coroutine_cancelstop;
+		stop_param = cor;
+	} else if ( mainThread == this_thread ) {
+		stop_func = main_cancelstop;
+		stop_param = (void *)0x22;
+	} else {
+		stop_func = thread_cancelstop;
+		stop_param = this_thread;
+	}
+
+	return _Unwind_ForcedUnwind( unwind_exception, stop_func, stop_param );
+}
+
+}
Index: libcfa/src/concurrency/exception.hfa
===================================================================
--- libcfa/src/concurrency/exception.hfa	(revision af4487d91ae84341a1ef6af963e9caf724f57d50)
+++ libcfa/src/concurrency/exception.hfa	(revision af4487d91ae84341a1ef6af963e9caf724f57d50)
@@ -0,0 +1,35 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// exception.hfa -- Exceptions in a concurrent environment.
+//
+// Author           : Andrew Beach
+// Created On       : Mon Aug 24 10:41:00 2020
+// Last Modified By : Andrew Beach
+// Last Modified On : Mon Aug 24 14:27:00 2020
+// Update Count     : 0
+//
+
+#pragma once
+
+#include "bits/defs.hfa"
+#include "invoke.h"
+struct _Unwind_Exception;
+
+// It must also be usable as a C header file.
+
+#ifdef __cforall
+extern "C" {
+#endif
+
+struct exception_context_t * this_exception_context(void) OPTIONAL_THREAD;
+
+_Unwind_Reason_Code __cfaehm_cancellation_unwind(
+		struct _Unwind_Exception * unwind_exception ) OPTIONAL_THREAD;
+
+#ifdef __cforall
+}
+#endif
Index: libcfa/src/concurrency/invoke.c
===================================================================
--- libcfa/src/concurrency/invoke.c	(revision 95472ee122aac2bc6a4be39aef8393326cc0aeb6)
+++ libcfa/src/concurrency/invoke.c	(revision af4487d91ae84341a1ef6af963e9caf724f57d50)
@@ -29,5 +29,4 @@
 // Called from the kernel when starting a coroutine or task so must switch back to user mode.
 
-extern struct $coroutine * __cfactx_cor_active(void);
 extern struct $coroutine * __cfactx_cor_finish(void);
 extern void __cfactx_cor_leave ( struct $coroutine * );
@@ -36,8 +35,4 @@
 extern void disable_interrupts() OPTIONAL_THREAD;
 extern void enable_interrupts( __cfaabi_dbg_ctx_param );
-
-struct exception_context_t * this_exception_context() {
-	return &__get_stack( __cfactx_cor_active() )->exception_context;
-}
 
 void __cfactx_invoke_coroutine(
Index: libcfa/src/concurrency/invoke.h
===================================================================
--- libcfa/src/concurrency/invoke.h	(revision 95472ee122aac2bc6a4be39aef8393326cc0aeb6)
+++ libcfa/src/concurrency/invoke.h	(revision af4487d91ae84341a1ef6af963e9caf724f57d50)
@@ -98,6 +98,4 @@
 	}
 
-	struct exception_context_t * this_exception_context();
-
 	// struct which calls the monitor is accepting
 	struct __waitfor_mask_t {
Index: libcfa/src/concurrency/io/setup.cfa
===================================================================
--- libcfa/src/concurrency/io/setup.cfa	(revision 95472ee122aac2bc6a4be39aef8393326cc0aeb6)
+++ libcfa/src/concurrency/io/setup.cfa	(revision af4487d91ae84341a1ef6af963e9caf724f57d50)
@@ -384,5 +384,5 @@
 			/* paranoid */ verify( is_pow2( params_in.num_ready ) || (params_in.num_ready < 8) );
 			sq.ready_cnt = max( params_in.num_ready, 8 );
-			sq.ready = alloc_align( 64, sq.ready_cnt );
+			sq.ready = alloc( sq.ready_cnt, 64`align );
 			for(i; sq.ready_cnt) {
 				sq.ready[i] = -1ul32;
Index: libcfa/src/concurrency/kernel/startup.cfa
===================================================================
--- libcfa/src/concurrency/kernel/startup.cfa	(revision 95472ee122aac2bc6a4be39aef8393326cc0aeb6)
+++ libcfa/src/concurrency/kernel/startup.cfa	(revision af4487d91ae84341a1ef6af963e9caf724f57d50)
@@ -579,4 +579,5 @@
 
 	// Lock the RWlock so no-one pushes/pops while we are changing the queue
+	disable_interrupts();
 	uint_fast32_t last_size = ready_mutate_lock();
 
@@ -586,4 +587,6 @@
 	// Unlock the RWlock
 	ready_mutate_unlock( last_size );
+	enable_interrupts_noPoll(); // Don't poll, could be in main cluster
+
 
 	this.io.cnt  = num_io;
@@ -601,4 +604,5 @@
 
 	// Lock the RWlock so no-one pushes/pops while we are changing the queue
+	disable_interrupts();
 	uint_fast32_t last_size = ready_mutate_lock();
 
@@ -608,4 +612,5 @@
 	// Unlock the RWlock
 	ready_mutate_unlock( last_size );
+	enable_interrupts_noPoll(); // Don't poll, could be in main cluster
 
 	#if !defined(__CFA_NO_STATISTICS__)
Index: libcfa/src/concurrency/ready_queue.cfa
===================================================================
--- libcfa/src/concurrency/ready_queue.cfa	(revision 95472ee122aac2bc6a4be39aef8393326cc0aeb6)
+++ libcfa/src/concurrency/ready_queue.cfa	(revision af4487d91ae84341a1ef6af963e9caf724f57d50)
@@ -215,4 +215,29 @@
 }
 
+static inline [unsigned, bool] idx_from_r(unsigned r, unsigned preferred) {
+	unsigned i;
+	bool local;
+	#if defined(BIAS)
+		unsigned rlow  = r % BIAS;
+		unsigned rhigh = r / BIAS;
+		if((0 != rlow) && preferred >= 0) {
+			// (BIAS - 1) out of BIAS chances
+			// Use perferred queues
+			i = preferred + (rhigh % 4);
+			local = true;
+		}
+		else {
+			// 1 out of BIAS chances
+			// Use all queues
+			i = rhigh;
+			local = false;
+		}
+	#else
+		i = r;
+		local = false;
+	#endif
+	return [i, local];
+}
+
 //-----------------------------------------------------------------------
 __attribute__((hot)) bool push(struct cluster * cltr, struct $thread * thrd) with (cltr->ready_queue) {
@@ -222,7 +247,8 @@
 	thrd->link.ts = rdtscl();
 
-	#if defined(BIAS) && !defined(__CFA_NO_STATISTICS__)
-		bool local = false;
-		int preferred =
+	__attribute__((unused)) bool local;
+	__attribute__((unused)) int preferred;
+	#if defined(BIAS)
+		preferred =
 			//*
 			kernelTLS.this_processor ? kernelTLS.this_processor->id * 4 : -1;
@@ -230,6 +256,4 @@
 			thrd->link.preferred * 4;
 			//*/
-
-
 	#endif
 
@@ -238,26 +262,12 @@
 	do {
 		// Pick the index of a lane
-		#if defined(BIAS)
-			unsigned r = __tls_rand();
-			unsigned rlow  = r % BIAS;
-			unsigned rhigh = r / BIAS;
-			if((0 != rlow) && preferred >= 0) {
-				// (BIAS - 1) out of BIAS chances
-				// Use perferred queues
-				i = preferred + (rhigh % 4);
-
-				#if !defined(__CFA_NO_STATISTICS__)
-					local = true;
-					__tls_stats()->ready.pick.push.local++;
-				#endif
-			}
-			else {
-				// 1 out of BIAS chances
-				// Use all queues
-				i = rhigh;
-				local = false;
-			}
-		#else
-			i = __tls_rand();
+		// unsigned r = __tls_rand();
+		unsigned r = __tls_rand_fwd();
+		[i, local] = idx_from_r(r, preferred);
+
+		#if !defined(__CFA_NO_STATISTICS__)
+			if(local) {
+				__tls_stats()->ready.pick.push.local++;
+			}
 		#endif
 
@@ -274,5 +284,9 @@
 
 	// Actually push it
-	bool lane_first = push(lanes.data[i], thrd);
+	#ifdef USE_SNZI
+		bool lane_first =
+	#endif
+
+	push(lanes.data[i], thrd);
 
 	#ifdef USE_SNZI
@@ -287,4 +301,6 @@
 	#endif
 
+	__tls_rand_advance_bck();
+
 	// Unlock and return
 	__atomic_unlock( &lanes.data[i].lock );
@@ -311,8 +327,11 @@
 	/* paranoid */ verify( lanes.count > 0 );
 	unsigned count = __atomic_load_n( &lanes.count, __ATOMIC_RELAXED );
+	int preferred;
 	#if defined(BIAS)
 		// Don't bother trying locally too much
 		int local_tries = 8;
-	#endif
+		preferred = kernelTLS.this_processor->id * 4;
+	#endif
+
 
 	// As long as the list is not empty, try finding a lane that isn't empty and pop from it
@@ -323,36 +342,21 @@
 	#endif
 		// Pick two lists at random
-		unsigned i,j;
-		#if defined(BIAS)
-			#if !defined(__CFA_NO_STATISTICS__)
-				bool local = false;
-			#endif
-			uint64_t r = __tls_rand();
-			unsigned rlow  = r % BIAS;
-			uint64_t rhigh = r / BIAS;
-			if(local_tries && 0 != rlow) {
-				// (BIAS - 1) out of BIAS chances
-				// Use perferred queues
-				unsigned pid = kernelTLS.this_processor->id * 4;
-				i = pid + (rhigh % 4);
-				j = pid + ((rhigh >> 32ull) % 4);
-
-				// count the tries
-				local_tries--;
-
-				#if !defined(__CFA_NO_STATISTICS__)
-					local = true;
-					__tls_stats()->ready.pick.pop.local++;
-				#endif
-			}
-			else {
-				// 1 out of BIAS chances
-				// Use all queues
-				i = rhigh;
-				j = rhigh >> 32ull;
-			}
-		#else
-			i = __tls_rand();
-			j = __tls_rand();
+		// unsigned ri = __tls_rand();
+		// unsigned rj = __tls_rand();
+		unsigned ri = __tls_rand_bck();
+		unsigned rj = __tls_rand_bck();
+
+		unsigned i, j;
+		__attribute__((unused)) bool locali, localj;
+		[i, locali] = idx_from_r(ri, preferred);
+		[j, localj] = idx_from_r(rj, preferred);
+
+		#if !defined(__CFA_NO_STATISTICS__)
+			if(locali) {
+				__tls_stats()->ready.pick.pop.local++;
+			}
+			if(localj) {
+				__tls_stats()->ready.pick.pop.local++;
+			}
 		#endif
 
@@ -364,5 +368,5 @@
 		if(thrd) {
 			#if defined(BIAS) && !defined(__CFA_NO_STATISTICS__)
-				if( local ) __tls_stats()->ready.pick.pop.lsuccess++;
+				if( locali || localj ) __tls_stats()->ready.pick.pop.lsuccess++;
 			#endif
 			return thrd;
@@ -543,5 +547,5 @@
 
 		// Allocate new array (uses realloc and memcpies the data)
-		lanes.data = alloc(lanes.data, ncount);
+		lanes.data = alloc( ncount, lanes.data`realloc );
 
 		// Fix the moved data
@@ -634,5 +638,5 @@
 
 		// Allocate new array (uses realloc and memcpies the data)
-		lanes.data = alloc(lanes.data, lanes.count);
+		lanes.data = alloc( lanes.count, lanes.data`realloc );
 
 		// Fix the moved data
