Context Navigation

random.hfa@ 258aaab8

Visit:

ADT ast-experimental

Last change on this file since 258aaab8 was 4020f09, checked in by Peter A. Buhr <pabuhr@…>, 3 years ago
formatting, switch to typedef for PRNG complex state
Property mode set to `100644`
File size: 7.8 KB

Line
1	//
2	// Cforall Version 1.0.0 Copyright (C) 2022 University of Waterloo
3	//
4	// The contents of this file are covered under the licence agreement in the
5	// file "LICENCE" distributed with Cforall.
6	//
7	// random.hfa --
8	//
9	// Author : Peter A. Buhr
10	// Created On : Fri Jan 14 07:18:11 2022
11	// Last Modified By : Peter A. Buhr
12	// Last Modified On : Mon Dec 5 13:13:14 2022
13	// Update Count : 128
14	//
15
16	#pragma once
17
18	#include <stdint.h>
19
20	#define GLUE2( x, y ) x##y
21	#define GLUE( x, y ) GLUE2( x, y )
22
23	// Set default PRNG for architecture size.
24	#ifdef __x86_64__ // 64-bit architecture
25	#define LEHMER64
26	#define XORSHIFT_6_21_7
27	//#define XOSHIRO256PP
28	//#define XOSHIRO128PP
29	#else // 32-bit architecture
30	#define XORSHIFT_13_7_17
31	#define XORSHIFT_6_21_7
32	#endif // __x86_64__
33
34	// Define C/CFA PRNG name and random-state.
35
36	// SKULLDUGGERY: typedefs name struct and typedef with the same name to deal with CFA typedef numbering problem.
37
38	#ifdef LEHMER64
39	#define PRNG_NAME_64 lehmer64
40	#define PRNG_STATE_64_T __uint128_t
41	#endif // LEHMER64
42
43	#ifdef XORSHIFT_13_7_17
44	#define PRNG_NAME_64 xorshift_13_7_17
45	#define PRNG_STATE_64_T uint64_t
46	#endif // XORSHIFT_13_7_17
47
48	#ifdef XORSHIFT_6_21_7
49	#define PRNG_NAME_32 xorshift_6_21_7
50	#define PRNG_STATE_32_T uint32_t
51	#endif // XORSHIFT_6_21_7
52
53	#ifdef XOSHIRO256PP
54	#define PRNG_NAME_64 xoshiro256pp
55	#define PRNG_STATE_64_T GLUE(PRNG_NAME_64,_t)
56	typedef struct PRNG_STATE_64_T { uint64_t s[4]; } PRNG_STATE_64_T;
57	#endif // XOSHIRO256PP
58
59	#ifdef XOSHIRO128PP
60	#define PRNG_NAME_32 xoshiro128pp
61	#define PRNG_STATE_32_T GLUE(PRNG_NAME_32,_t)
62	typedef struct PRNG_STATE_32_T { uint32_t s[4]; } PRNG_STATE_32_T;
63	#endif // XOSHIRO128PP
64
65	#ifdef XORWOW
66	#define PRNG_NAME_32 xorwow
67	#define PRNG_STATE_32_T GLUE(PRNG_NAME_32,_t)
68	typedef struct PRNG_STATE_32_T { uint32_t a, b, c, d, counter; } PRNG_STATE_32_T;
69	#endif // XOSHIRO128PP
70
71	#define PRNG_SET_SEED_64 GLUE(PRNG_NAME_64,_set_seed)
72	#define PRNG_SET_SEED_32 GLUE(PRNG_NAME_32,_set_seed)
73
74
75	// Default PRNG used by runtime.
76	#ifdef __x86_64__ // 64-bit architecture
77	#define PRNG_NAME PRNG_NAME_64
78	#define PRNG_STATE_T PRNG_STATE_64_T
79	#else // 32-bit architecture
80	#define PRNG_NAME PRNG_NAME_32
81	#define PRNG_STATE_T PRNG_STATE_32_T
82	#endif // __x86_64__
83
84	#define PRNG_SET_SEED GLUE(PRNG_NAME,_set_seed)
85
86
87	#ifdef __cforall // don't include in C code (invoke.h)
88
89	// https://prng.di.unimi.it/xoshiro256starstar.c
90	//
91	// This is xoshiro256++ 1.0, one of our all-purpose, rock-solid generators. It has excellent (sub-ns) speed, a state
92	// (256 bits) that is large enough for any parallel application, and it passes all tests we are aware of.
93	//
94	// For generating just floating-point numbers, xoshiro256+ is even faster.
95	//
96	// The state must be seeded so that it is not everywhere zero. If you have a 64-bit seed, we suggest to seed a
97	// splitmix64 generator and use its output to fill s.
98
99	#ifndef XOSHIRO256PP
100	typedef struct xoshiro256pp_t { uint64_t s[4]; } xoshiro256pp_t;
101	#endif // ! XOSHIRO256PP
102
103	static inline uint64_t xoshiro256pp( xoshiro256pp_t & rs ) with(rs) {
104	inline uint64_t rotl(const uint64_t x, int k) {
105	return (x << k) \| (x >> (64 - k));
106	} // rotl
107
108	const uint64_t result = rotl( s[0] + s[3], 23 ) + s[0];
109	const uint64_t t = s[1] << 17;
110
111	s[2] ^= s[0];
112	s[3] ^= s[1];
113	s[1] ^= s[2];
114	s[0] ^= s[3];
115	s[2] ^= t;
116	s[3] = rotl( s[3], 45 );
117	return result;
118	} // xoshiro256pp
119
120	static inline void xoshiro256pp_set_seed( xoshiro256pp_t & state, uint64_t seed ) {
121	state = (xoshiro256pp_t){ {seed, seed, seed, seed} };
122	} // xoshiro256pp_set_seed
123
124	// https://prng.di.unimi.it/xoshiro128plusplus.c
125	//
126	// This is xoshiro128++ 1.0, one of our 32-bit all-purpose, rock-solid generators. It has excellent speed, a state size
127	// (128 bits) that is large enough for mild parallelism, and it passes all tests we are aware of.
128	//
129	// For generating just single-precision (i.e., 32-bit) floating-point numbers, xoshiro128+ is even faster.
130	//
131	// The state must be seeded so that it is not everywhere zero.
132
133	#ifndef XOSHIRO128PP
134	typedef struct xoshiro128pp_t { uint32_t s[4]; } xoshiro128pp_t;
135	#endif // ! XOSHIRO128PP
136
137	static inline uint32_t xoshiro128pp( xoshiro128pp_t & rs ) with(rs) {
138	inline uint32_t rotl( const uint32_t x, int k ) {
139	return (x << k) \| (x >> (32 - k));
140	} // rotl
141
142	const uint32_t result = rotl( s[0] + s[3], 7 ) + s[0];
143	const uint32_t t = s[1] << 9;
144
145	s[2] ^= s[0];
146	s[3] ^= s[1];
147	s[1] ^= s[2];
148	s[0] ^= s[3];
149	s[2] ^= t;
150	s[3] = rotl( s[3], 11 );
151	return result;
152	} // xoshiro128pp
153
154	static inline void xoshiro128pp_set_seed( xoshiro128pp_t & state, uint32_t seed ) {
155	state = (xoshiro128pp_t){ {seed, seed, seed, seed} };
156	} // xoshiro128pp_set_seed
157
158	#ifdef __SIZEOF_INT128__
159	// Pipelined to allow out-of-order overlap with reduced dependencies. Critically, the current random state is
160	// returned (copied), and then compute and store the next random value.
161	//--------------------------------------------------
162	static inline uint64_t lehmer64( __uint128_t & state ) {
163	__uint128_t ret = state;
164	state *= 0xda942042e4dd58b5;
165	return ret >> 64;
166	} // lehmer64
167
168	static inline void lehmer64_set_seed( __uint128_t & state, uint64_t seed ) {
169	state = seed;
170	} // lehmer64_set_seed
171
172	//--------------------------------------------------
173	static inline uint64_t wyhash64( uint64_t & state ) {
174	state += 0x60bee2bee120fc15;
175	__uint128_t tmp;
176	tmp = (__uint128_t) state * 0xa3b195354a39b70d;
177	uint64_t m1 = (tmp >> 64) ^ tmp;
178	tmp = (__uint128_t)m1 * 0x1b03738712fad5c9;
179	uint64_t m2 = (tmp >> 64) ^ tmp;
180	return m2;
181	}
182
183	static inline void wyhash64_set_seed( __uint128_t & state, uint64_t seed ) {
184	state = seed;
185	} // lehmer64_set_seed
186	#endif // __SIZEOF_INT128__
187
188	//--------------------------------------------------
189	static inline uint64_t xorshift_13_7_17( uint64_t & state ) {
190	uint64_t ret = state;
191	state ^= state << 13;
192	state ^= state >> 7;
193	state ^= state << 17;
194	return ret;
195	} // xorshift_13_7_17
196
197	static inline void xorshift_13_7_17_set_seed( uint64_t & state, uint32_t seed ) {
198	state = seed;
199	} // xorshift_13_7_17_set_seed
200
201	//--------------------------------------------------
202	// Marsaglia shift-XOR PRNG with thread-local state
203	// Period is 4G-1
204	// 0 is absorbing and must be avoided
205	// Low-order bits are not particularly random
206	static inline uint32_t xorshift_6_21_7( uint32_t & state ) {
207	uint32_t ret = state;
208	state ^= state << 6;
209	state ^= state >> 21;
210	state ^= state << 7;
211	return ret;
212	} // xorshift_6_21_7
213
214	static inline void xorshift_6_21_7_set_seed( uint32_t & state, uint32_t seed ) {
215	state = seed;
216	} // xorshift_6_21_7_set_seed
217
218	//--------------------------------------------------
219	// The state array must be initialized to non-zero in the first four words.
220	#ifndef XORWOW
221	typedef struct xorwow_t { uint32_t a, b, c, d, counter; } xorwow_t;
222	#endif // ! XORWOW
223
224	static inline uint32_t xorwow( xorwow_t & state ) {
225	// Algorithm "xorwow" from p. 5 of Marsaglia, "Xorshift RNGs".
226	uint32_t ret = state.a + state.counter;
227	uint32_t t = state.d;
228
229	uint32_t const s = state.a;
230	state.d = state.c;
231	state.c = state.b;
232	state.b = s;
233
234	t ^= t >> 2;
235	t ^= t << 1;
236	t ^= s ^ (s << 4);
237	state.a = t;
238
239	state.counter += 362437;
240	return ret;
241	} // xorwow
242
243	static inline void xorwow_set_seed( xorwow_t & state, uint32_t seed ) {
244	state = (xorwow_t){ seed, seed, seed, seed, 0 };
245	} // xorwow_set_seed
246
247	//--------------------------------------------------
248	// Used in __tls_rand_fwd
249	#define M (1_l64u << 48_l64u)
250	#define A (25214903917_l64u)
251	#define AI (18446708753438544741_l64u)
252	#define C (11_l64u)
253	#define D (16_l64u)
254
255	// Bi-directional LCG random-number generator
256	static inline uint32_t LCGBI_fwd( uint64_t & state ) {
257	state = (A * state + C) & (M - 1);
258	return state >> D;
259	} // LCGBI_fwd
260
261	static inline uint32_t LCGBI_bck( uint64_t & state ) {
262	unsigned int r = state >> D;
263	state = AI * (state - C) & (M - 1);
264	return r;
265	} // LCGBI_bck
266
267	#undef M
268	#undef A
269	#undef AI
270	#undef C
271	#undef D
272
273	#endif // __cforall

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format