Index: libcfa/src/bits/random.hfa
===================================================================
--- libcfa/src/bits/random.hfa	(revision 78de1e5e2ddae0963f8daf4eb79473dc16b5ef30)
+++ libcfa/src/bits/random.hfa	(revision dd46fd32b4edcf85ecf2f4f00b042e3a3852db2e)
@@ -10,6 +10,6 @@
 // Created On       : Fri Jan 14 07:18:11 2022
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Mon Nov 21 17:50:12 2022
-// Update Count     : 15
+// Last Modified On : Wed Nov 30 18:32:25 2022
+// Update Count     : 111
 // 
 
@@ -17,37 +17,143 @@
 
 #include <stdint.h>
+
+#define GLUE2( x, y ) x##y
+#define GLUE( x, y ) GLUE2( x, y )
 
 // Set default PRNG for architecture size.
 #ifdef __x86_64__										// 64-bit architecture
 #define LEHMER64
+#define XORSHIFT_6_21_7
+//#define XOSHIRO256PP
+//#define XOSHIRO128PP
 #else													// 32-bit architecture
+#define LEHMER64
 #define XORSHIFT_6_21_7
 #endif // __x86_64__
 
 // C/CFA PRNG name and random-state.
+
 #ifdef LEHMER64
-#define PRNG_NAME lehmer64
-#define PRNG_ARG_T __uint128_t
+#define PRNG_NAME_64 lehmer64
+#define PRNG_STATE_64_T __uint128_t
 #endif // LEHMER64
 
 #ifdef XORSHIFT_6_21_7
-#define PRNG_NAME xorshift_6_21_7
-#define PRNG_ARG_T uint32_t
+#define PRNG_NAME_32 xorshift_6_21_7
+#define PRNG_STATE_32_T uint32_t
 #endif // XORSHIFT_6_21_7
 
+#ifdef XOSHIRO256PP
+#define PRNG_NAME_64 xoshiro256pp
+#define PRNG_STATE_64_T struct GLUE(PRNG_NAME_64,_t)
+PRNG_STATE_64_T { uint64_t s[4]; };
+#endif // XOSHIRO256PP
+
+#ifdef XOSHIRO128PP
+#define PRNG_NAME_32 xoshiro128pp
+#define PRNG_STATE_32_T struct GLUE(PRNG_NAME_32,_t)
+PRNG_STATE_32_T { uint32_t s[4]; };
+#endif // XOSHIRO128PP
+
+#define PRNG_SET_SEED_64 GLUE(PRNG_NAME_64,_set_seed)
+#define PRNG_SET_SEED_32 GLUE(PRNG_NAME_32,_set_seed)
+
+
+// Default PRNG used by runtime.
+#ifdef __x86_64__										// 64-bit architecture
+#define PRNG_NAME PRNG_NAME_64
+#define PRNG_STATE_T PRNG_STATE_64_T
+#else													// 32-bit architecture
+#define PRNG_NAME PRNG_NAME_32
+#define PRNG_STATE_T PRNG_STATE_32_T
+#endif // __x86_64__
+
+#define PRNG_SET_SEED GLUE(PRNG_NAME,_set_seed)
+
+
 #ifdef __cforall										// don't include in C code (invoke.h)
 
-// Pipelined to allow out-of-order overlap with reduced dependencies. Critically, the current random state is returned
-// (copied), and then compute and store the next random value.
-
-#if defined(__SIZEOF_INT128__)
-//--------------------------------------------------
+// https://prng.di.unimi.it/xoshiro128plusplus.c
+//
+// This is xoshiro128++ 1.0, one of our 32-bit all-purpose, rock-solid generators. It has excellent speed, a state size
+// (128 bits) that is large enough for mild parallelism, and it passes all tests we are aware of.
+//
+// For generating just single-precision (i.e., 32-bit) floating-point numbers, xoshiro128+ is even faster.
+//
+// The state must be seeded so that it is not everywhere zero.
+
+#ifndef XOSHIRO128PP
+struct xoshiro128pp_t { uint32_t s[4]; };
+#endif // ! XOSHIRO128PP
+
+static inline uint32_t xoshiro128pp( xoshiro128pp_t & rs ) with(rs) {
+	inline uint32_t rotl( const uint32_t x, int k ) {
+		return (x << k) | (x >> (32 - k));
+	}
+
+	const uint32_t result = rotl( s[0] + s[3], 7 ) + s[0];
+	const uint32_t t = s[1] << 9;
+
+	s[2] ^= s[0];
+	s[3] ^= s[1];
+	s[1] ^= s[2];
+	s[0] ^= s[3];
+	s[2] ^= t;
+	s[3] = rotl( s[3], 11 );
+	return result;
+}
+
+static inline void xoshiro128pp_set_seed( xoshiro128pp_t & state, uint32_t seed ) {
+	state = (xoshiro128pp_t){ {seed, seed, seed, seed} };
+} // xoshiro128pp_set_seed
+
+// This is xoshiro256++ 1.0, one of our all-purpose, rock-solid generators.  It has excellent (sub-ns) speed, a state
+// (256 bits) that is large enough for any parallel application, and it passes all tests we are aware of.
+//
+// For generating just floating-point numbers, xoshiro256+ is even faster.
+//
+// The state must be seeded so that it is not everywhere zero. If you have a 64-bit seed, we suggest to seed a
+// splitmix64 generator and use its output to fill s.
+
+#ifndef XOSHIRO256PP
+struct xoshiro256pp_t { uint64_t s[4]; };
+#endif // ! XOSHIRO256PP
+
+static inline uint64_t xoshiro256pp( xoshiro256pp_t & rs ) with(rs) {
+	inline uint64_t rotl(const uint64_t x, int k) {
+		return (x << k) | (x >> (64 - k));
+	}
+
+	const uint64_t result = rotl( s[0] + s[3], 23 ) + s[0];
+	const uint64_t t = s[1] << 17;
+
+	s[2] ^= s[0];
+	s[3] ^= s[1];
+	s[1] ^= s[2];
+	s[0] ^= s[3];
+	s[2] ^= t;
+	s[3] = rotl( s[3], 45 );
+	return result;
+}
+
+static inline void xoshiro256pp_set_seed( xoshiro256pp_t & state,  uint64_t seed ) {
+	state = (xoshiro256pp_t){ {seed, seed, seed, seed} };
+} // xoshiro256pp_set_seed
+
+#ifdef __SIZEOF_INT128__
+	// Pipelined to allow out-of-order overlap with reduced dependencies. Critically, the current random state is
+	// returned (copied), and then compute and store the next random value.
+	//--------------------------------------------------
 	static inline uint64_t lehmer64( __uint128_t & state ) {
 		__uint128_t ret = state;
 		state *= 0xda942042e4dd58b5;
 		return ret >> 64;
-	}
-
-//--------------------------------------------------
+	} // lehmer64
+
+	static inline void lehmer64_set_seed( __uint128_t & state, uint64_t seed ) {
+		state = seed;
+	} // lehmer64_set_seed
+
+	//--------------------------------------------------
 	static inline uint64_t wyhash64( uint64_t & state ) {
 		state += 0x60bee2bee120fc15;
@@ -59,5 +165,9 @@
 		return m2;
 	}
-#endif
+
+	static inline void wyhash64_set_seed( __uint128_t & state, uint64_t seed ) {
+		state = seed;
+	} // lehmer64_set_seed
+#endif // __SIZEOF_INT128__
 
 //--------------------------------------------------
@@ -68,4 +178,8 @@
 	state ^= state << 17;
 	return ret;
+}
+
+static inline void xorshift_13_7_17_set_seed( uint64_t & state, uint32_t seed ) {
+	state = seed;
 }
 
@@ -79,4 +193,8 @@
 } // xorshift_6_21_7
 
+static inline void xorshift_6_21_7_set_seed( uint32_t & state, uint32_t seed ) {
+	state = seed;
+}
+
 //--------------------------------------------------
 typedef struct {
@@ -105,11 +223,5 @@
 }
 
-//--------------------------------------------------
-static inline uint32_t LCG( uint32_t & state ) {		// linear congruential generator
-	uint32_t ret = state;
-	state = 36969 * (state & 65535) + (state >> 16);	// 36969 is NOT prime! No not change it!
-	return ret;
-} // LCG
-
+// Used in __tls_rand_fwd
 //--------------------------------------------------
 #define M  (1_l64u << 48_l64u)
Index: libcfa/src/concurrency/invoke.h
===================================================================
--- libcfa/src/concurrency/invoke.h	(revision 78de1e5e2ddae0963f8daf4eb79473dc16b5ef30)
+++ libcfa/src/concurrency/invoke.h	(revision dd46fd32b4edcf85ecf2f4f00b042e3a3852db2e)
@@ -10,6 +10,6 @@
 // Created On       : Tue Jan 17 12:27:26 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Mon Nov 21 17:40:24 2022
-// Update Count     : 55
+// Last Modified On : Tue Nov 29 20:42:21 2022
+// Update Count     : 56
 //
 
@@ -223,5 +223,5 @@
 		struct processor * last_proc;
 
-		PRNG_ARG_T random_state;						// fast random numbers
+		PRNG_STATE_T random_state;						// fast random numbers
 
 		#if defined( __CFA_WITH_VERIFY__ )
Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision 78de1e5e2ddae0963f8daf4eb79473dc16b5ef30)
+++ libcfa/src/concurrency/kernel.cfa	(revision dd46fd32b4edcf85ecf2f4f00b042e3a3852db2e)
@@ -10,6 +10,6 @@
 // Created On       : Tue Jan 17 12:27:26 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Mon Aug 31 07:08:20 2020
-// Update Count     : 71
+// Last Modified On : Wed Nov 30 18:14:08 2022
+// Update Count     : 76
 //
 
@@ -151,5 +151,5 @@
 	// Because of a bug, we couldn't initialized the seed on construction
 	// Do it here
-	__cfaabi_tls.rand_seed ^= rdtscl();
+	PRNG_SET_SEED( __cfaabi_tls.random_state, rdtscl() );
 	__cfaabi_tls.ready_rng.fwd_seed = 25214903917_l64u * (rdtscl() ^ (uintptr_t)&runner);
 	__tls_rand_advance_bck();
Index: libcfa/src/concurrency/kernel/fwd.hfa
===================================================================
--- libcfa/src/concurrency/kernel/fwd.hfa	(revision 78de1e5e2ddae0963f8daf4eb79473dc16b5ef30)
+++ libcfa/src/concurrency/kernel/fwd.hfa	(revision dd46fd32b4edcf85ecf2f4f00b042e3a3852db2e)
@@ -46,9 +46,6 @@
 			} preemption_state;
 
-			#if defined(__SIZEOF_INT128__)
-				__uint128_t rand_seed;
-			#else
-				uint64_t rand_seed;
-			#endif
+			PRNG_STATE_T random_state;
+
 			struct {
 				uint64_t fwd_seed;
@@ -57,5 +54,4 @@
 
 			struct __stats_t        * volatile this_stats;
-
 
 			#ifdef __CFA_WITH_VERIFY__
@@ -76,11 +72,12 @@
 		#define publicTLS_get( member ) ((typeof(__cfaabi_tls.member))__cfatls_get( __builtin_offsetof(KernelThreadData, member) ))
 
-		static inline uint64_t __tls_rand() {
-			return
-			#if defined(__SIZEOF_INT128__)
-				lehmer64( kernelTLS().rand_seed );
-			#else
-				xorshift_13_7_17( kernelTLS().rand_seed );
-			#endif
+		static inline
+			#ifdef __x86_64__							// 64-bit architecture
+			uint64_t
+			#else										// 32-bit architecture
+			uint32_t
+			#endif // __x86_64__
+		__tls_rand() {
+			return PRNG_NAME( kernelTLS().random_state );
 		}
 
Index: libcfa/src/concurrency/kernel/startup.cfa
===================================================================
--- libcfa/src/concurrency/kernel/startup.cfa	(revision 78de1e5e2ddae0963f8daf4eb79473dc16b5ef30)
+++ libcfa/src/concurrency/kernel/startup.cfa	(revision dd46fd32b4edcf85ecf2f4f00b042e3a3852db2e)
@@ -108,5 +108,5 @@
 extern void __wake_proc(processor *);
 extern int cfa_main_returned;							// from interpose.cfa
-PRNG_ARG_T __global_random_prime = 4_294_967_291u;
+size_t __global_random_prime = 4_294_967_291u;
 bool __global_random_mask = false;
 
@@ -135,14 +135,14 @@
 // Global state
 __thread struct KernelThreadData __cfaabi_tls __attribute__ ((tls_model ( "initial-exec" ))) @= {
-	NULL,												// cannot use 0p
-	NULL,
-	false,
-	{ 1, false, false },
-	0,
-	{ 0, 0 },
-	NULL,
+	.this_thread : NULL,								// cannot use 0p
+	.this_processor : NULL,
+	.sched_lock : false,
+	.preemption_state : { .disable_count : 1, .enabled : false, .in_progress : false },
+	// random_state uninitialized
+	.ready_rng : { .fwd_seed : 0, .bck_seed : 0 },
+	.this_stats : NULL,
 	#ifdef __CFA_WITH_VERIFY__
-		false,
-		0,
+		.in_sched_lock : false,
+		.sched_id : 0,
 	#endif
 };
@@ -521,5 +521,5 @@
 	preferred = ready_queue_new_preferred();
 	last_proc = 0p;
-	random_state = __global_random_mask ? __global_random_prime : __global_random_prime ^ rdtscl();
+	PRNG_SET_SEED( random_state,  __global_random_mask ? __global_random_prime : __global_random_prime ^ rdtscl() );
 	#if defined( __CFA_WITH_VERIFY__ )
 		executing = 0p;
Index: libcfa/src/concurrency/thread.cfa
===================================================================
--- libcfa/src/concurrency/thread.cfa	(revision 78de1e5e2ddae0963f8daf4eb79473dc16b5ef30)
+++ libcfa/src/concurrency/thread.cfa	(revision dd46fd32b4edcf85ecf2f4f00b042e3a3852db2e)
@@ -10,6 +10,6 @@
 // Created On       : Tue Jan 17 12:27:26 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Nov 22 22:18:37 2022
-// Update Count     : 81
+// Last Modified On : Wed Nov 30 18:14:07 2022
+// Update Count     : 95
 //
 
@@ -26,5 +26,6 @@
 #include "invoke.h"
 
-extern PRNG_ARG_T __global_random_seed, __global_random_prime;
+extern size_t __global_random_seed;
+extern size_t __global_random_prime;
 extern bool __global_random_mask;
 
@@ -53,5 +54,5 @@
 	preferred = ready_queue_new_preferred();
 	last_proc = 0p;
-	random_state = __global_random_mask ? __global_random_prime : __global_random_prime ^ rdtscl();
+	PRNG_SET_SEED( random_state, __global_random_mask ? __global_random_prime : __global_random_prime ^ rdtscl() );
 	#if defined( __CFA_WITH_VERIFY__ )
 		executing = 0p;
@@ -228,13 +229,14 @@
 
 void set_seed( size_t seed ) {
-	PRNG_ARG_T & state = active_thread()->random_state;
-	state = __global_random_seed = seed;
+	PRNG_STATE_T & state = active_thread()->random_state;
+	__global_random_seed = seed;
+	PRNG_SET_SEED( state, seed );
 	(void)PRNG_NAME( state );							// prime PRNG
-	__global_random_prime = state;
+	__global_random_prime = seed;
 	__global_random_mask = true;
 } // set_seed
 
 size_t prng( void ) {									// [0,UINT_MAX]
-	PRNG_ARG_T & state = active_thread()->random_state;
+	PRNG_STATE_T( & state ) = active_thread()->random_state;
 	return PRNG_NAME( state );
 } // prng
Index: libcfa/src/startup.cfa
===================================================================
--- libcfa/src/startup.cfa	(revision 78de1e5e2ddae0963f8daf4eb79473dc16b5ef30)
+++ libcfa/src/startup.cfa	(revision dd46fd32b4edcf85ecf2f4f00b042e3a3852db2e)
@@ -10,6 +10,6 @@
 // Created On       : Tue Jul 24 16:21:57 2018
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sun Nov 20 21:26:40 2022
-// Update Count     : 59
+// Last Modified On : Wed Nov 30 18:14:06 2022
+// Update Count     : 68
 //
 
@@ -21,6 +21,6 @@
 #include "startup.hfa"
 
-extern PRNG_ARG_T __global_random_seed;					// sequential/concurrent
-extern PRNG_ARG_T __global_random_state;				// sequential
+extern size_t __global_random_seed;						// sequential/concurrent
+extern PRNG_STATE_T __global_random_state;				// sequential
 
 extern "C" {
@@ -69,5 +69,6 @@
 	void __cfaabi_core_startup( void ) __attribute__(( constructor( STARTUP_PRIORITY_CORE ) ));
 	void __cfaabi_core_startup( void ) {
-		__global_random_state = __global_random_seed = rdtscl();
+		__global_random_seed = rdtscl();
+		PRNG_SET_SEED( __global_random_state, __global_random_seed );
 		__cfaabi_interpose_startup();
 		__cfaabi_device_startup();
Index: libcfa/src/stdlib.cfa
===================================================================
--- libcfa/src/stdlib.cfa	(revision 78de1e5e2ddae0963f8daf4eb79473dc16b5ef30)
+++ libcfa/src/stdlib.cfa	(revision dd46fd32b4edcf85ecf2f4f00b042e3a3852db2e)
@@ -10,6 +10,6 @@
 // Created On       : Thu Jan 28 17:10:29 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Nov 22 22:18:36 2022
-// Update Count     : 613
+// Last Modified On : Wed Nov 30 18:14:05 2022
+// Update Count     : 622
 //
 
@@ -226,8 +226,8 @@
 
 // would be cool to make hidden but it's needed for libcfathread
-__attribute__((visibility("default"))) PRNG_ARG_T __global_random_seed; // sequential/concurrent
-__attribute__((visibility("hidden"))) PRNG_ARG_T __global_random_state; // sequential only
-
-void set_seed( size_t seed ) { __global_random_state = __global_random_seed = seed; PRNG_NAME( __global_random_state ); }
+__attribute__((visibility("default"))) size_t __global_random_seed; // sequential/concurrent
+__attribute__((visibility("hidden"))) PRNG_STATE_T __global_random_state; // sequential only
+
+void set_seed( size_t seed ) { __global_random_seed = seed; PRNG_SET_SEED( __global_random_state, seed ); PRNG_NAME( __global_random_state ); }
 size_t get_seed() { return __global_random_seed; }
 size_t prng( void ) { return PRNG_NAME( __global_random_state ); } // [0,UINT_MAX]
Index: libcfa/src/stdlib.hfa
===================================================================
--- libcfa/src/stdlib.hfa	(revision 78de1e5e2ddae0963f8daf4eb79473dc16b5ef30)
+++ libcfa/src/stdlib.hfa	(revision dd46fd32b4edcf85ecf2f4f00b042e3a3852db2e)
@@ -10,6 +10,6 @@
 // Created On       : Thu Jan 28 17:12:35 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Nov 22 22:48:59 2022
-// Update Count     : 736
+// Last Modified On : Wed Nov 30 18:18:26 2022
+// Update Count     : 760
 //
 
@@ -422,11 +422,11 @@
 	uint32_t callcnt;									// call count
 	uint32_t seed;										// current seed
-	PRNG_ARG_T state;									// random state
+	PRNG_STATE_32_T state;								// random state
 }; // PRNG
 
 static inline {
-	void set_seed( PRNG32 & prng, uint32_t seed_ ) with( prng ) { state = seed = seed_; PRNG_NAME( state ); } // set seed
+	void set_seed( PRNG32 & prng, uint32_t seed_ ) with( prng ) { seed = seed_; PRNG_SET_SEED_32( state, seed ); PRNG_NAME_32( state ); } // set seed
 	uint32_t get_seed( PRNG32 & prng ) __attribute__(( warn_unused_result )) with( prng ) { return seed; } // get seed
-	uint32_t prng( PRNG32 & prng ) __attribute__(( warn_unused_result )) with( prng ) { callcnt += 1; return PRNG_NAME( state ); } // [0,UINT_MAX]
+	uint32_t prng( PRNG32 & prng ) __attribute__(( warn_unused_result )) with( prng ) { callcnt += 1; return PRNG_NAME_32( state ); } // [0,UINT_MAX]
 	uint32_t prng( PRNG32 & prng, size_t u ) __attribute__(( warn_unused_result )) { return prng( prng ) % u; } // [0,u)
 	uint32_t prng( PRNG32 & prng, size_t l, size_t u ) __attribute__(( warn_unused_result )) { return prng( prng, u - l + 1 ) + l; } // [l,u]
@@ -439,11 +439,11 @@
 	uint64_t callcnt;									// call count
 	uint64_t seed;										// current seed
-	PRNG_ARG_T state;									// random state
+	PRNG_STATE_64_T state;								// random state
 }; // PRNG
 
 static inline {
-	void set_seed( PRNG64 & prng, uint64_t seed_ ) with( prng ) { state = seed = seed_; PRNG_NAME( state ); } // set seed
+	void set_seed( PRNG64 & prng, uint64_t seed_ ) with( prng ) { seed = seed_; PRNG_SET_SEED_64( state, seed ); PRNG_NAME_64( state ); } // set seed
 	uint64_t get_seed( PRNG64 & prng ) __attribute__(( warn_unused_result )) with( prng ) { return seed; } // get seed
-	uint64_t prng( PRNG64 & prng ) __attribute__(( warn_unused_result )) with( prng ) { callcnt += 1; return PRNG_NAME( state ); } // [0,UINT_MAX]
+	uint64_t prng( PRNG64 & prng ) __attribute__(( warn_unused_result )) with( prng ) { callcnt += 1; return PRNG_NAME_64( state ); } // [0,UINT_MAX]
 	uint64_t prng( PRNG64 & prng, size_t u ) __attribute__(( warn_unused_result )) { return prng( prng ) % u; } // [0,u)
 	uint64_t prng( PRNG64 & prng, size_t l, size_t u ) __attribute__(( warn_unused_result )) { return prng( prng, u - l + 1 ) + l; } // [l,u]
