Changeset 2dcd80a for libcfa/src/bits
- Timestamp:
- Dec 14, 2022, 12:23:42 PM (3 years ago)
- Branches:
- ADT, ast-experimental, master
- Children:
- 441a6a7
- Parents:
- 7d9598d8 (diff), d8bdf13 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)links above to see all the changes relative to each parent. - File:
-
- 1 edited
-
libcfa/src/bits/random.hfa (modified) (7 diffs)
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/bits/random.hfa
r7d9598d8 r2dcd80a 10 10 // Created On : Fri Jan 14 07:18:11 2022 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Fri Jan 14 07:18:58 202213 // Update Count : 1 12 // Last Modified On : Sun Dec 11 18:43:58 2022 13 // Update Count : 171 14 14 // 15 15 … … 18 18 #include <stdint.h> 19 19 20 // Pipelined to allow out-of-order overlap with reduced dependencies. Critically, the current random state is returned 21 // (copied), and then compute and store the next random value. 22 23 #if defined(__SIZEOF_INT128__) 24 //-------------------------------------------------- 20 #define GLUE2( x, y ) x##y 21 #define GLUE( x, y ) GLUE2( x, y ) 22 23 // Set default PRNG for architecture size. 24 #ifdef __x86_64__ // 64-bit architecture 25 // 64-bit generators 26 #define LEHMER64 27 //#define XORSHIFT_12_25_27 28 //#define XOSHIRO256PP 29 //#define KISS_64 30 31 // 32-bit generators 32 #define XORSHIFT_6_21_7 33 //#define XOSHIRO128PP 34 #else // 32-bit architecture 35 // 64-bit generators 36 #define XORSHIFT_13_7_17 37 38 // 32-bit generators 39 #define XORSHIFT_6_21_7 40 #endif // __x86_64__ 41 42 // Define C/CFA PRNG name and random-state. 43 44 // SKULLDUGGERY: typedefs name struct and typedef with the same name to deal with CFA typedef numbering problem. 45 46 #ifdef XOSHIRO256PP 47 #define PRNG_NAME_64 xoshiro256pp 48 #define PRNG_STATE_64_T GLUE(PRNG_NAME_64,_t) 49 typedef struct PRNG_STATE_64_T { uint64_t s[4]; } PRNG_STATE_64_T; 50 #endif // XOSHIRO256PP 51 52 #ifdef XOSHIRO128PP 53 #define PRNG_NAME_32 xoshiro128pp 54 #define PRNG_STATE_32_T GLUE(PRNG_NAME_32,_t) 55 typedef struct PRNG_STATE_32_T { uint32_t s[4]; } PRNG_STATE_32_T; 56 #endif // XOSHIRO128PP 57 58 #ifdef LEHMER64 59 #define PRNG_NAME_64 lehmer64 60 #define PRNG_STATE_64_T __uint128_t 61 #endif // LEHMER64 62 63 #ifdef WYHASH64 64 #define PRNG_NAME_64 wyhash64 65 #define PRNG_STATE_64_T uint64_t 66 #endif // LEHMER64 67 68 #ifdef XORSHIFT_13_7_17 69 #define PRNG_NAME_64 xorshift_13_7_17 70 #define PRNG_STATE_64_T uint64_t 71 #endif // XORSHIFT_13_7_17 72 73 #ifdef XORSHIFT_6_21_7 74 #define PRNG_NAME_32 xorshift_6_21_7 75 #define PRNG_STATE_32_T uint32_t 76 #endif // XORSHIFT_6_21_7 77 78 #ifdef XORSHIFT_12_25_27 79 #define PRNG_NAME_64 xorshift_12_25_27 80 #define PRNG_STATE_64_T uint64_t 81 #endif // XORSHIFT_12_25_27 82 83 #ifdef KISS_64 84 #define PRNG_NAME_64 kiss_64 85 #define PRNG_STATE_64_T GLUE(PRNG_NAME_64,_t) 86 typedef struct PRNG_STATE_64_T { uint64_t z, w, jsr, jcong; } PRNG_STATE_64_T; 87 #endif // KISS_^64 88 89 #ifdef XORWOW 90 #define PRNG_NAME_32 xorwow 91 #define PRNG_STATE_32_T GLUE(PRNG_NAME_32,_t) 92 typedef struct PRNG_STATE_32_T { uint32_t a, b, c, d, counter; } PRNG_STATE_32_T; 93 #endif // XOSHIRO128PP 94 95 #define PRNG_SET_SEED_64 GLUE(PRNG_NAME_64,_set_seed) 96 #define PRNG_SET_SEED_32 GLUE(PRNG_NAME_32,_set_seed) 97 98 99 // Default PRNG used by runtime. 100 #ifdef __x86_64__ // 64-bit architecture 101 #define PRNG_NAME PRNG_NAME_64 102 #define PRNG_STATE_T PRNG_STATE_64_T 103 #else // 32-bit architecture 104 #define PRNG_NAME PRNG_NAME_32 105 #define PRNG_STATE_T PRNG_STATE_32_T 106 #endif // __x86_64__ 107 108 #define PRNG_SET_SEED GLUE(PRNG_NAME,_set_seed) 109 110 111 // ALL PRNG ALGORITHMS ARE OPTIMIZED SO THAT THE PRNG LOGIC CAN HAPPEN IN PARALLEL WITH THE USE OF THE RESULT. 112 // Therefore, the set_seed routine primes the PRNG by calling it with the state so the seed is not return as the 113 // first random value. 114 115 #ifdef __cforall // don't include in C code (invoke.h) 116 117 // https://prng.di.unimi.it/xoshiro256starstar.c 118 // 119 // This is xoshiro256++ 1.0, one of our all-purpose, rock-solid generators. It has excellent (sub-ns) speed, a state 120 // (256 bits) that is large enough for any parallel application, and it passes all tests we are aware of. 121 // 122 // For generating just floating-point numbers, xoshiro256+ is even faster. 123 // 124 // The state must be seeded so that it is not everywhere zero. If you have a 64-bit seed, we suggest to seed a 125 // splitmix64 generator and use its output to fill s. 126 127 #ifndef XOSHIRO256PP 128 typedef struct xoshiro256pp_t { uint64_t s[4]; } xoshiro256pp_t; 129 #endif // ! XOSHIRO256PP 130 131 static inline uint64_t xoshiro256pp( xoshiro256pp_t & rs ) with(rs) { 132 inline uint64_t rotl(const uint64_t x, int k) { 133 return (x << k) | (x >> (64 - k)); 134 } // rotl 135 136 const uint64_t result = rotl( s[0] + s[3], 23 ) + s[0]; 137 const uint64_t t = s[1] << 17; 138 139 s[2] ^= s[0]; 140 s[3] ^= s[1]; 141 s[1] ^= s[2]; 142 s[0] ^= s[3]; 143 s[2] ^= t; 144 s[3] = rotl( s[3], 45 ); 145 return result; 146 } // xoshiro256pp 147 148 static inline void xoshiro256pp_set_seed( xoshiro256pp_t & state, uint64_t seed ) { 149 state = (xoshiro256pp_t){ {seed, seed, seed, seed} }; 150 xoshiro256pp( state ); 151 } // xoshiro256pp_set_seed 152 153 // https://prng.di.unimi.it/xoshiro128plusplus.c 154 // 155 // This is xoshiro128++ 1.0, one of our 32-bit all-purpose, rock-solid generators. It has excellent speed, a state size 156 // (128 bits) that is large enough for mild parallelism, and it passes all tests we are aware of. 157 // 158 // For generating just single-precision (i.e., 32-bit) floating-point numbers, xoshiro128+ is even faster. 159 // 160 // The state must be seeded so that it is not everywhere zero. 161 162 #ifndef XOSHIRO128PP 163 typedef struct xoshiro128pp_t { uint32_t s[4]; } xoshiro128pp_t; 164 #endif // ! XOSHIRO128PP 165 166 static inline uint32_t xoshiro128pp( xoshiro128pp_t & rs ) with(rs) { 167 inline uint32_t rotl( const uint32_t x, int k ) { 168 return (x << k) | (x >> (32 - k)); 169 } // rotl 170 171 const uint32_t result = rotl( s[0] + s[3], 7 ) + s[0]; 172 const uint32_t t = s[1] << 9; 173 174 s[2] ^= s[0]; 175 s[3] ^= s[1]; 176 s[1] ^= s[2]; 177 s[0] ^= s[3]; 178 s[2] ^= t; 179 s[3] = rotl( s[3], 11 ); 180 return result; 181 } // xoshiro128pp 182 183 static inline void xoshiro128pp_set_seed( xoshiro128pp_t & state, uint32_t seed ) { 184 state = (xoshiro128pp_t){ {seed, seed, seed, seed} }; 185 xoshiro128pp( state ); // prime 186 } // xoshiro128pp_set_seed 187 188 #ifdef __SIZEOF_INT128__ 189 // Pipelined to allow out-of-order overlap with reduced dependencies. Critically, the current random state is 190 // returned (copied), and then compute and store the next random value. 191 //-------------------------------------------------- 25 192 static inline uint64_t lehmer64( __uint128_t & state ) { 26 193 __uint128_t ret = state; 27 194 state *= 0xda942042e4dd58b5; 28 195 return ret >> 64; 29 } 30 31 //-------------------------------------------------- 196 } // lehmer64 197 198 static inline void lehmer64_set_seed( __uint128_t & state, uint64_t seed ) { 199 state = seed; 200 lehmer64( state ); 201 } // lehmer64_set_seed 202 203 //-------------------------------------------------- 32 204 static inline uint64_t wyhash64( uint64_t & state ) { 33 state += 0x60bee2bee120fc15; 205 uint64_t ret = state; 206 state += 0x_60be_e2be_e120_fc15; 34 207 __uint128_t tmp; 35 tmp = (__uint128_t) state * 0xa3b195354a39b70d;208 tmp = (__uint128_t) ret * 0x_a3b1_9535_4a39_b70d; 36 209 uint64_t m1 = (tmp >> 64) ^ tmp; 37 tmp = (__uint128_t)m1 * 0x 1b03738712fad5c9;210 tmp = (__uint128_t)m1 * 0x_1b03_7387_12fa_d5c9; 38 211 uint64_t m2 = (tmp >> 64) ^ tmp; 39 212 return m2; 40 } 41 #endif 213 } // wyhash64 214 215 static inline void wyhash64_set_seed( uint64_t & state, uint64_t seed ) { 216 state = seed; 217 wyhash64( state ); // prime 218 } // wyhash64_set_seed 219 #endif // __SIZEOF_INT128__ 42 220 43 221 //-------------------------------------------------- … … 48 226 state ^= state << 17; 49 227 return ret; 50 } 51 52 //-------------------------------------------------- 228 } // xorshift_13_7_17 229 230 static inline void xorshift_13_7_17_set_seed( uint64_t & state, uint64_t seed ) { 231 state = seed; 232 xorshift_13_7_17( state ); // prime 233 } // xorshift_13_7_17_set_seed 234 235 //-------------------------------------------------- 236 // Marsaglia shift-XOR PRNG with thread-local state 237 // Period is 4G-1 238 // 0 is absorbing and must be avoided 239 // Low-order bits are not particularly random 53 240 static inline uint32_t xorshift_6_21_7( uint32_t & state ) { 54 241 uint32_t ret = state; … … 59 246 } // xorshift_6_21_7 60 247 61 //-------------------------------------------------- 62 typedef struct { 63 uint32_t a, b, c, d; 64 uint32_t counter; 65 } xorwow__state_t; 66 67 // The state array must be initialized to not be all zero in the first four words. 68 static inline uint32_t xorwow( xorwow__state_t & state ) { 248 static inline void xorshift_6_21_7_set_seed( uint32_t & state, uint32_t seed ) { 249 state = seed; 250 xorshift_6_21_7( state ); // prime 251 } // xorshift_6_21_7_set_seed 252 253 //-------------------------------------------------- 254 // The state must be seeded with a nonzero value. 255 static inline uint64_t xorshift_12_25_27( uint64_t & state ) { 256 uint64_t ret = state; 257 state ^= state >> 12; 258 state ^= state << 25; 259 state ^= state >> 27; 260 return ret * 0x_2545_F491_4F6C_DD1D; 261 } // xorshift_12_25_27 262 263 static inline void xorshift_12_25_27_set_seed( uint64_t & state, uint64_t seed ) { 264 state = seed; 265 xorshift_12_25_27( state ); // prime 266 } // xorshift_12_25_27_set_seed 267 268 //-------------------------------------------------- 269 // The state must be seeded with a nonzero value. 270 #ifndef KISS_64 271 typedef struct kiss_64_t { uint64_t z, w, jsr, jcong; } kiss_64_t; 272 #endif // ! KISS_64 273 274 static inline uint64_t kiss_64( kiss_64_t & state ) with(state) { 275 kiss_64_t ret = state; 276 z = 36969 * (z & 65535) + (z >> 16); 277 w = 18000 * (w & 65535) + (w >> 16); 278 jsr ^= (jsr << 17); 279 jsr ^= (jsr << 13); 280 jsr ^= (jsr << 5); 281 jcong = 69069 * jcong + 1234567; 282 return (((ret.z << 16) + ret.w) ^ ret.jcong) + ret.jsr; 283 } // kiss_64 284 285 static inline void kiss_64_set_seed( kiss_64_t & state, uint64_t seed ) with(state) { 286 z = 1; w = 1; jsr = 4; jcong = seed; 287 kiss_64( state ); // prime 288 } // kiss_64_set_seed 289 290 //-------------------------------------------------- 291 // The state array must be initialized to non-zero in the first four words. 292 #ifndef XORWOW 293 typedef struct xorwow_t { uint32_t a, b, c, d, counter; } xorwow_t; 294 #endif // ! XORWOW 295 296 static inline uint32_t xorwow( xorwow_t & state ) with(state) { 69 297 // Algorithm "xorwow" from p. 5 of Marsaglia, "Xorshift RNGs". 70 uint32_t ret = state.a + state.counter;71 uint32_t t = state.d;72 73 uint32_t const s = state.a;74 state.d = state.c;75 state.c = state.b;76 state.b = s;298 uint32_t ret = a + counter; 299 uint32_t t = d; 300 301 uint32_t const s = a; 302 d = c; 303 c = b; 304 b = s; 77 305 78 306 t ^= t >> 2; 79 307 t ^= t << 1; 80 308 t ^= s ^ (s << 4); 81 state.a = t; 82 83 state.counter += 362437; 309 a = t; 310 counter += 362437; 84 311 return ret; 85 } 86 87 //-------------------------------------------------- 88 static inline uint32_t LCG( uint32_t & state ) { // linear congruential generator 89 uint32_t ret = state; 90 state = 36969 * (state & 65535) + (state >> 16); // 36969 is NOT prime! No not change it! 91 return ret; 92 } // LCG 93 94 //-------------------------------------------------- 312 } // xorwow 313 314 static inline void xorwow_set_seed( xorwow_t & state, uint32_t seed ) { 315 state = (xorwow_t){ seed, seed, seed, seed, 0 }; 316 xorwow( state ); // prime 317 } // xorwow_set_seed 318 319 //-------------------------------------------------- 320 // Used in __tls_rand_fwd 95 321 #define M (1_l64u << 48_l64u) 96 322 #define A (25214903917_l64u) … … 103 329 state = (A * state + C) & (M - 1); 104 330 return state >> D; 105 } 331 } // LCGBI_fwd 106 332 107 333 static inline uint32_t LCGBI_bck( uint64_t & state ) { … … 109 335 state = AI * (state - C) & (M - 1); 110 336 return r; 111 } 337 } // LCGBI_bck 112 338 113 339 #undef M … … 116 342 #undef C 117 343 #undef D 344 345 #endif // __cforall
Note:
See TracChangeset
for help on using the changeset viewer.