Changeset 24d6572 for libcfa/src/bits/random.hfa
- Timestamp:
- Jun 12, 2023, 2:45:32 PM (2 years ago)
- Branches:
- ast-experimental, master
- Children:
- 62d62db
- Parents:
- 34b4268 (diff), 251ce80 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)
links above to see all the changes relative to each parent. - File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/bits/random.hfa
r34b4268 r24d6572 10 10 // Created On : Fri Jan 14 07:18:11 2022 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Sun Dec 11 18:43:58 202213 // Update Count : 1 7112 // Last Modified On : Mon Mar 20 21:45:24 2023 13 // Update Count : 186 14 14 // 15 15 16 16 #pragma once 17 17 18 #include <stdint.h> 18 #include <stdint.h> // uintXX_t 19 19 20 20 #define GLUE2( x, y ) x##y … … 24 24 #ifdef __x86_64__ // 64-bit architecture 25 25 // 64-bit generators 26 #define LEHMER6426 //#define LEHMER64 27 27 //#define XORSHIFT_12_25_27 28 //#define XOSHIRO256PP28 #define XOSHIRO256PP 29 29 //#define KISS_64 30 // #define SPLITMIX_64 30 31 31 32 // 32-bit generators 32 #define XORSHIFT_6_21_7 33 //#define XOSHIRO128PP 33 //#define XORSHIFT_6_21_7 34 #define XOSHIRO128PP 35 // #define SPLITMIX_32 34 36 #else // 32-bit architecture 35 37 // 64-bit generators 36 #define XORSHIFT_13_7_17 38 //#define XORSHIFT_13_7_17 39 #define XOSHIRO256PP 40 // #define SPLITMIX_64 37 41 38 42 // 32-bit generators 39 #define XORSHIFT_6_21_7 43 //#define XORSHIFT_6_21_7 44 #define XOSHIRO128PP 45 // #define SPLITMIX_32 40 46 #endif // __x86_64__ 41 47 42 48 // Define C/CFA PRNG name and random-state. 43 44 // SKULLDUGGERY: typedefs name struct and typedef with the same name to deal with CFA typedef numbering problem.45 49 46 50 #ifdef XOSHIRO256PP 47 51 #define PRNG_NAME_64 xoshiro256pp 48 52 #define PRNG_STATE_64_T GLUE(PRNG_NAME_64,_t) 49 typedef struct PRNG_STATE_64_T { uint64_t s[4]; } PRNG_STATE_64_T;53 typedef struct { uint64_t s0, s1, s2, s3; } PRNG_STATE_64_T; 50 54 #endif // XOSHIRO256PP 51 55 … … 53 57 #define PRNG_NAME_32 xoshiro128pp 54 58 #define PRNG_STATE_32_T GLUE(PRNG_NAME_32,_t) 55 typedef struct PRNG_STATE_32_T { uint32_t s[4]; } PRNG_STATE_32_T;59 typedef struct { uint32_t s0, s1, s2, s3; } PRNG_STATE_32_T; 56 60 #endif // XOSHIRO128PP 57 61 … … 81 85 #endif // XORSHIFT_12_25_27 82 86 87 #ifdef SPLITMIX_64 88 #define PRNG_NAME_64 splitmix64 89 #define PRNG_STATE_64_T uint64_t 90 #endif // SPLITMIX32 91 92 #ifdef SPLITMIX_32 93 #define PRNG_NAME_32 splitmix32 94 #define PRNG_STATE_32_T uint32_t 95 #endif // SPLITMIX32 96 83 97 #ifdef KISS_64 84 98 #define PRNG_NAME_64 kiss_64 85 99 #define PRNG_STATE_64_T GLUE(PRNG_NAME_64,_t) 86 typedef struct PRNG_STATE_64_T{ uint64_t z, w, jsr, jcong; } PRNG_STATE_64_T;100 typedef struct { uint64_t z, w, jsr, jcong; } PRNG_STATE_64_T; 87 101 #endif // KISS_^64 88 102 … … 90 104 #define PRNG_NAME_32 xorwow 91 105 #define PRNG_STATE_32_T GLUE(PRNG_NAME_32,_t) 92 typedef struct PRNG_STATE_32_T{ uint32_t a, b, c, d, counter; } PRNG_STATE_32_T;106 typedef struct { uint32_t a, b, c, d, counter; } PRNG_STATE_32_T; 93 107 #endif // XOSHIRO128PP 94 108 … … 110 124 111 125 // ALL PRNG ALGORITHMS ARE OPTIMIZED SO THAT THE PRNG LOGIC CAN HAPPEN IN PARALLEL WITH THE USE OF THE RESULT. 112 // Therefore, the set_seed routine primes the PRNG by calling it with the state so the seed is not return as the 113 // first random value. 126 // Specifically, the current random state is copied for returning, before computing the next value. As a consequence, 127 // the set_seed routine primes the PRNG by calling it with the state so the seed is not return as the first random 128 // value. 129 114 130 115 131 #ifdef __cforall // don't include in C code (invoke.h) 132 133 // https://rosettacode.org/wiki/Pseudo-random_numbers/Splitmix64 134 // 135 // Splitmix64 is not recommended for demanding random number requirements, but is often used to calculate initial states 136 // for other more complex pseudo-random number generators (see https://prng.di.unimi.it). 137 // Also https://rosettacode.org/wiki/Pseudo-random_numbers/Splitmix64. 138 static inline uint64_t splitmix64( uint64_t & state ) { 139 state += 0x9e3779b97f4a7c15; 140 uint64_t z = state; 141 z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; 142 z = (z ^ (z >> 27)) * 0x94d049bb133111eb; 143 return z ^ (z >> 31); 144 } // splitmix64 145 146 static inline void splitmix64_set_seed( uint64_t & state , uint64_t seed ) { 147 state = seed; 148 splitmix64( state ); // prime 149 } // splitmix64_set_seed 150 151 // https://github.com/bryc/code/blob/master/jshash/PRNGs.md#splitmix32 152 // 153 // Splitmix32 is not recommended for demanding random number requirements, but is often used to calculate initial states 154 // for other more complex pseudo-random number generators (see https://prng.di.unimi.it). 155 156 static inline uint32_t splitmix32( uint32_t & state ) { 157 state += 0x9e3779b9; 158 uint64_t z = state; 159 z = (z ^ (z >> 15)) * 0x85ebca6b; 160 z = (z ^ (z >> 13)) * 0xc2b2ae35; 161 return z ^ (z >> 16); 162 } // splitmix32 163 164 static inline void splitmix32_set_seed( uint32_t & state, uint64_t seed ) { 165 state = seed; 166 splitmix32( state ); // prime 167 } // splitmix32_set_seed 168 169 #ifdef __SIZEOF_INT128__ 170 //-------------------------------------------------- 171 static inline uint64_t lehmer64( __uint128_t & state ) { 172 __uint128_t ret = state; 173 state *= 0x_da94_2042_e4dd_58b5; 174 return ret >> 64; 175 } // lehmer64 176 177 static inline void lehmer64_set_seed( __uint128_t & state, uint64_t seed ) { 178 // The seed needs to be coprime with the 2^64 modulus to get the largest period, so no factors of 2 in the seed. 179 state = splitmix64( seed ); // prime 180 } // lehmer64_set_seed 181 182 //-------------------------------------------------- 183 static inline uint64_t wyhash64( uint64_t & state ) { 184 uint64_t ret = state; 185 state += 0x_60be_e2be_e120_fc15; 186 __uint128_t tmp; 187 tmp = (__uint128_t) ret * 0x_a3b1_9535_4a39_b70d; 188 uint64_t m1 = (tmp >> 64) ^ tmp; 189 tmp = (__uint128_t)m1 * 0x_1b03_7387_12fa_d5c9; 190 uint64_t m2 = (tmp >> 64) ^ tmp; 191 return m2; 192 } // wyhash64 193 194 static inline void wyhash64_set_seed( uint64_t & state, uint64_t seed ) { 195 state = splitmix64( seed ); // prime 196 } // wyhash64_set_seed 197 #endif // __SIZEOF_INT128__ 116 198 117 199 // https://prng.di.unimi.it/xoshiro256starstar.c … … 126 208 127 209 #ifndef XOSHIRO256PP 128 typedef struct xoshiro256pp_t { uint64_t s[4]; } xoshiro256pp_t;210 typedef struct { uint64_t s0, s1, s2, s3; } xoshiro256pp_t; 129 211 #endif // ! XOSHIRO256PP 130 212 131 213 static inline uint64_t xoshiro256pp( xoshiro256pp_t & rs ) with(rs) { 132 inline uint64_t rotl( const uint64_t x, int k) {214 inline uint64_t rotl( const uint64_t x, int k ) { 133 215 return (x << k) | (x >> (64 - k)); 134 216 } // rotl 135 217 136 const uint64_t result = rotl( s [0] + s[3], 23 ) + s[0];137 const uint64_t t = s [1]<< 17;138 139 s [2] ^= s[0];140 s [3] ^= s[1];141 s [1] ^= s[2];142 s [0] ^= s[3];143 s [2]^= t;144 s [3] = rotl( s[3], 45 );218 const uint64_t result = rotl( s0 + s3, 23 ) + s0; 219 const uint64_t t = s1 << 17; 220 221 s2 ^= s0; 222 s3 ^= s1; 223 s1 ^= s2; 224 s0 ^= s3; 225 s2 ^= t; 226 s3 = rotl( s3, 45 ); 145 227 return result; 146 228 } // xoshiro256pp 147 229 148 static inline void xoshiro256pp_set_seed( xoshiro256pp_t & state, uint64_t seed ) { 149 state = (xoshiro256pp_t){ {seed, seed, seed, seed} }; 150 xoshiro256pp( state ); 230 static inline void xoshiro256pp_set_seed( xoshiro256pp_t & state, uint64_t seed ) { 231 // To attain repeatable seeding, compute seeds separately because the order of argument evaluation is undefined. 232 uint64_t seed1 = splitmix64( seed ); // prime 233 uint64_t seed2 = splitmix64( seed ); 234 uint64_t seed3 = splitmix64( seed ); 235 uint64_t seed4 = splitmix64( seed ); 236 state = (xoshiro256pp_t){ seed1, seed2, seed3, seed4 }; 151 237 } // xoshiro256pp_set_seed 152 238 … … 161 247 162 248 #ifndef XOSHIRO128PP 163 typedef struct xoshiro128pp_t { uint32_t s[4]; } xoshiro128pp_t;249 typedef struct { uint32_t s0, s1, s2, s3; } xoshiro128pp_t; 164 250 #endif // ! XOSHIRO128PP 165 251 … … 169 255 } // rotl 170 256 171 const uint32_t result = rotl( s [0] + s[3], 7 ) + s[0];172 const uint32_t t = s [1]<< 9;173 174 s [2] ^= s[0];175 s [3] ^= s[1];176 s [1] ^= s[2];177 s [0] ^= s[3];178 s [2]^= t;179 s [3] = rotl( s[3], 11 );257 const uint32_t result = rotl( s0 + s3, 7 ) + s0; 258 const uint32_t t = s1 << 9; 259 260 s2 ^= s0; 261 s3 ^= s1; 262 s1 ^= s2; 263 s0 ^= s3; 264 s2 ^= t; 265 s3 = rotl( s3, 11 ); 180 266 return result; 181 267 } // xoshiro128pp 182 268 183 269 static inline void xoshiro128pp_set_seed( xoshiro128pp_t & state, uint32_t seed ) { 184 state = (xoshiro128pp_t){ {seed, seed, seed, seed} }; 185 xoshiro128pp( state ); // prime 270 // To attain repeatable seeding, compute seeds separately because the order of argument evaluation is undefined. 271 uint32_t seed1 = splitmix32( seed ); // prime 272 uint32_t seed2 = splitmix32( seed ); 273 uint32_t seed3 = splitmix32( seed ); 274 uint32_t seed4 = splitmix32( seed ); 275 state = (xoshiro128pp_t){ seed1, seed2, seed3, seed4 }; 186 276 } // xoshiro128pp_set_seed 187 188 #ifdef __SIZEOF_INT128__189 // Pipelined to allow out-of-order overlap with reduced dependencies. Critically, the current random state is190 // returned (copied), and then compute and store the next random value.191 //--------------------------------------------------192 static inline uint64_t lehmer64( __uint128_t & state ) {193 __uint128_t ret = state;194 state *= 0xda942042e4dd58b5;195 return ret >> 64;196 } // lehmer64197 198 static inline void lehmer64_set_seed( __uint128_t & state, uint64_t seed ) {199 state = seed;200 lehmer64( state );201 } // lehmer64_set_seed202 203 //--------------------------------------------------204 static inline uint64_t wyhash64( uint64_t & state ) {205 uint64_t ret = state;206 state += 0x_60be_e2be_e120_fc15;207 __uint128_t tmp;208 tmp = (__uint128_t) ret * 0x_a3b1_9535_4a39_b70d;209 uint64_t m1 = (tmp >> 64) ^ tmp;210 tmp = (__uint128_t)m1 * 0x_1b03_7387_12fa_d5c9;211 uint64_t m2 = (tmp >> 64) ^ tmp;212 return m2;213 } // wyhash64214 215 static inline void wyhash64_set_seed( uint64_t & state, uint64_t seed ) {216 state = seed;217 wyhash64( state ); // prime218 } // wyhash64_set_seed219 #endif // __SIZEOF_INT128__220 277 221 278 //-------------------------------------------------- … … 229 286 230 287 static inline void xorshift_13_7_17_set_seed( uint64_t & state, uint64_t seed ) { 231 state = seed; 232 xorshift_13_7_17( state ); // prime 288 state = splitmix64( seed ); // prime 233 289 } // xorshift_13_7_17_set_seed 234 290 … … 247 303 248 304 static inline void xorshift_6_21_7_set_seed( uint32_t & state, uint32_t seed ) { 249 state = seed; 250 xorshift_6_21_7( state ); // prime 305 state = splitmix32( seed ); // prime 251 306 } // xorshift_6_21_7_set_seed 252 307 … … 262 317 263 318 static inline void xorshift_12_25_27_set_seed( uint64_t & state, uint64_t seed ) { 264 state = seed; 265 xorshift_12_25_27( state ); // prime 319 state = splitmix64( seed ); // prime 266 320 } // xorshift_12_25_27_set_seed 267 321 … … 269 323 // The state must be seeded with a nonzero value. 270 324 #ifndef KISS_64 271 typedef struct kiss_64_t{ uint64_t z, w, jsr, jcong; } kiss_64_t;325 typedef struct { uint64_t z, w, jsr, jcong; } kiss_64_t; 272 326 #endif // ! KISS_64 273 327 274 static inline uint64_t kiss_64( kiss_64_t & state ) with(state) {275 kiss_64_t ret = state;328 static inline uint64_t kiss_64( kiss_64_t & rs ) with(rs) { 329 kiss_64_t ret = rs; 276 330 z = 36969 * (z & 65535) + (z >> 16); 277 331 w = 18000 * (w & 65535) + (w >> 16); 278 jsr ^= (jsr << 17);279 332 jsr ^= (jsr << 13); 333 jsr ^= (jsr >> 17); 280 334 jsr ^= (jsr << 5); 281 335 jcong = 69069 * jcong + 1234567; … … 283 337 } // kiss_64 284 338 285 static inline void kiss_64_set_seed( kiss_64_t & state, uint64_t seed ) with(state) { 286 z = 1; w = 1; jsr = 4; jcong = seed; 287 kiss_64( state ); // prime 339 static inline void kiss_64_set_seed( kiss_64_t & rs, uint64_t seed ) with(rs) { 340 z = 1; w = 1; jsr = 4; jcong = splitmix64( seed ); // prime 288 341 } // kiss_64_set_seed 289 342 … … 291 344 // The state array must be initialized to non-zero in the first four words. 292 345 #ifndef XORWOW 293 typedef struct xorwow_t{ uint32_t a, b, c, d, counter; } xorwow_t;346 typedef struct { uint32_t a, b, c, d, counter; } xorwow_t; 294 347 #endif // ! XORWOW 295 348 296 static inline uint32_t xorwow( xorwow_t & state ) with(state) {349 static inline uint32_t xorwow( xorwow_t & rs ) with(rs) { 297 350 // Algorithm "xorwow" from p. 5 of Marsaglia, "Xorshift RNGs". 298 351 uint32_t ret = a + counter; … … 312 365 } // xorwow 313 366 314 static inline void xorwow_set_seed( xorwow_t & state, uint32_t seed ) { 315 state = (xorwow_t){ seed, seed, seed, seed, 0 }; 316 xorwow( state ); // prime 367 static inline void xorwow_set_seed( xorwow_t & rs, uint32_t seed ) { 368 // To attain repeatable seeding, compute seeds separately because the order of argument evaluation is undefined. 369 uint32_t seed1 = splitmix32( seed ); // prime 370 uint32_t seed2 = splitmix32( seed ); 371 uint32_t seed3 = splitmix32( seed ); 372 uint32_t seed4 = splitmix32( seed ); 373 rs = (xorwow_t){ seed1, seed2, seed3, seed4, 0 }; 317 374 } // xorwow_set_seed 318 375 … … 320 377 // Used in __tls_rand_fwd 321 378 #define M (1_l64u << 48_l64u) 322 #define A (25 214903917_l64u)323 #define AI (18 446708753438544741_l64u)379 #define A (25_214_903_917_l64u) 380 #define AI (18_446_708_753_438_544_741_l64u) 324 381 #define C (11_l64u) 325 382 #define D (16_l64u) 326 383 327 384 // Bi-directional LCG random-number generator 328 static inline uint32_t LCGBI_fwd( uint64_t & state) {329 state = (A * state+ C) & (M - 1);330 return state>> D;385 static inline uint32_t LCGBI_fwd( uint64_t & rs ) { 386 rs = (A * rs + C) & (M - 1); 387 return rs >> D; 331 388 } // LCGBI_fwd 332 389 333 static inline uint32_t LCGBI_bck( uint64_t & state) {334 unsigned int r = state>> D;335 state = AI * (state- C) & (M - 1);390 static inline uint32_t LCGBI_bck( uint64_t & rs ) { 391 unsigned int r = rs >> D; 392 rs = AI * (rs - C) & (M - 1); 336 393 return r; 337 394 } // LCGBI_bck
Note:
See TracChangeset
for help on using the changeset viewer.