source: libcfa/src/bits/random.hfa @ 74227c6

ADTast-experimental
Last change on this file since 74227c6 was 4020f09, checked in by Peter A. Buhr <pabuhr@…>, 2 years ago

formatting, switch to typedef for PRNG complex state

  • Property mode set to 100644
File size: 7.8 KB
RevLine 
[e57de69]1//
2// Cforall Version 1.0.0 Copyright (C) 2022 University of Waterloo
3//
4// The contents of this file are covered under the licence agreement in the
5// file "LICENCE" distributed with Cforall.
6//
7// random.hfa --
8//
9// Author           : Peter A. Buhr
10// Created On       : Fri Jan 14 07:18:11 2022
11// Last Modified By : Peter A. Buhr
[4020f09]12// Last Modified On : Mon Dec  5 13:13:14 2022
13// Update Count     : 128
[e57de69]14//
15
[13c5e19]16#pragma once
17
18#include <stdint.h>
19
[dd46fd3]20#define GLUE2( x, y ) x##y
21#define GLUE( x, y ) GLUE2( x, y )
22
[9fce2572]23// Set default PRNG for architecture size.
[d2ad151]24#ifdef __x86_64__                                                                               // 64-bit architecture
25#define LEHMER64
[dd46fd3]26#define XORSHIFT_6_21_7
27//#define XOSHIRO256PP
28//#define XOSHIRO128PP
[d2ad151]29#else                                                                                                   // 32-bit architecture
[c8238c0]30#define XORSHIFT_13_7_17
[d2ad151]31#define XORSHIFT_6_21_7
32#endif // __x86_64__
33
[4020f09]34// Define C/CFA PRNG name and random-state.
35
36// SKULLDUGGERY: typedefs name struct and typedef with the same name to deal with CFA typedef numbering problem.
[dd46fd3]37
[9fce2572]38#ifdef LEHMER64
[dd46fd3]39#define PRNG_NAME_64 lehmer64
40#define PRNG_STATE_64_T __uint128_t
[9fce2572]41#endif // LEHMER64
42
[c8238c0]43#ifdef XORSHIFT_13_7_17
44#define PRNG_NAME_64 xorshift_13_7_17
45#define PRNG_STATE_64_T uint64_t
46#endif // XORSHIFT_13_7_17
47
[9fce2572]48#ifdef XORSHIFT_6_21_7
[dd46fd3]49#define PRNG_NAME_32 xorshift_6_21_7
50#define PRNG_STATE_32_T uint32_t
[9fce2572]51#endif // XORSHIFT_6_21_7
52
[dd46fd3]53#ifdef XOSHIRO256PP
54#define PRNG_NAME_64 xoshiro256pp
[4020f09]55#define PRNG_STATE_64_T GLUE(PRNG_NAME_64,_t)
56typedef struct PRNG_STATE_64_T { uint64_t s[4]; } PRNG_STATE_64_T;
[dd46fd3]57#endif // XOSHIRO256PP
58
59#ifdef XOSHIRO128PP
60#define PRNG_NAME_32 xoshiro128pp
[4020f09]61#define PRNG_STATE_32_T GLUE(PRNG_NAME_32,_t)
62typedef struct PRNG_STATE_32_T { uint32_t s[4]; } PRNG_STATE_32_T;
63#endif // XOSHIRO128PP
64
65#ifdef XORWOW
66#define PRNG_NAME_32 xorwow
67#define PRNG_STATE_32_T GLUE(PRNG_NAME_32,_t)
68typedef struct PRNG_STATE_32_T { uint32_t a, b, c, d, counter; } PRNG_STATE_32_T;
[dd46fd3]69#endif // XOSHIRO128PP
70
71#define PRNG_SET_SEED_64 GLUE(PRNG_NAME_64,_set_seed)
72#define PRNG_SET_SEED_32 GLUE(PRNG_NAME_32,_set_seed)
73
74
75// Default PRNG used by runtime.
76#ifdef __x86_64__                                                                               // 64-bit architecture
77#define PRNG_NAME PRNG_NAME_64
78#define PRNG_STATE_T PRNG_STATE_64_T
79#else                                                                                                   // 32-bit architecture
80#define PRNG_NAME PRNG_NAME_32
81#define PRNG_STATE_T PRNG_STATE_32_T
82#endif // __x86_64__
83
84#define PRNG_SET_SEED GLUE(PRNG_NAME,_set_seed)
85
86
[9fce2572]87#ifdef __cforall                                                                                // don't include in C code (invoke.h)
88
[4020f09]89// https://prng.di.unimi.it/xoshiro256starstar.c
[dd46fd3]90//
91// This is xoshiro256++ 1.0, one of our all-purpose, rock-solid generators.  It has excellent (sub-ns) speed, a state
92// (256 bits) that is large enough for any parallel application, and it passes all tests we are aware of.
93//
94// For generating just floating-point numbers, xoshiro256+ is even faster.
95//
96// The state must be seeded so that it is not everywhere zero. If you have a 64-bit seed, we suggest to seed a
97// splitmix64 generator and use its output to fill s.
98
99#ifndef XOSHIRO256PP
[4020f09]100typedef struct xoshiro256pp_t { uint64_t s[4]; } xoshiro256pp_t;
[dd46fd3]101#endif // ! XOSHIRO256PP
102
103static inline uint64_t xoshiro256pp( xoshiro256pp_t & rs ) with(rs) {
104        inline uint64_t rotl(const uint64_t x, int k) {
105                return (x << k) | (x >> (64 - k));
[4020f09]106        } // rotl
[dd46fd3]107
108        const uint64_t result = rotl( s[0] + s[3], 23 ) + s[0];
109        const uint64_t t = s[1] << 17;
110
111        s[2] ^= s[0];
112        s[3] ^= s[1];
113        s[1] ^= s[2];
114        s[0] ^= s[3];
115        s[2] ^= t;
116        s[3] = rotl( s[3], 45 );
117        return result;
[4020f09]118} // xoshiro256pp
[dd46fd3]119
120static inline void xoshiro256pp_set_seed( xoshiro256pp_t & state,  uint64_t seed ) {
121        state = (xoshiro256pp_t){ {seed, seed, seed, seed} };
122} // xoshiro256pp_set_seed
123
[4020f09]124// https://prng.di.unimi.it/xoshiro128plusplus.c
125//
126// This is xoshiro128++ 1.0, one of our 32-bit all-purpose, rock-solid generators. It has excellent speed, a state size
127// (128 bits) that is large enough for mild parallelism, and it passes all tests we are aware of.
128//
129// For generating just single-precision (i.e., 32-bit) floating-point numbers, xoshiro128+ is even faster.
130//
131// The state must be seeded so that it is not everywhere zero.
132
133#ifndef XOSHIRO128PP
134typedef struct xoshiro128pp_t { uint32_t s[4]; } xoshiro128pp_t;
135#endif // ! XOSHIRO128PP
136
137static inline uint32_t xoshiro128pp( xoshiro128pp_t & rs ) with(rs) {
138        inline uint32_t rotl( const uint32_t x, int k ) {
139                return (x << k) | (x >> (32 - k));
140        } // rotl
141
142        const uint32_t result = rotl( s[0] + s[3], 7 ) + s[0];
143        const uint32_t t = s[1] << 9;
144
145        s[2] ^= s[0];
146        s[3] ^= s[1];
147        s[1] ^= s[2];
148        s[0] ^= s[3];
149        s[2] ^= t;
150        s[3] = rotl( s[3], 11 );
151        return result;
152} // xoshiro128pp
153
154static inline void xoshiro128pp_set_seed( xoshiro128pp_t & state, uint32_t seed ) {
155        state = (xoshiro128pp_t){ {seed, seed, seed, seed} };
156} // xoshiro128pp_set_seed
157
[dd46fd3]158#ifdef __SIZEOF_INT128__
159        // Pipelined to allow out-of-order overlap with reduced dependencies. Critically, the current random state is
160        // returned (copied), and then compute and store the next random value.
161        //--------------------------------------------------
[611f29d]162        static inline uint64_t lehmer64( __uint128_t & state ) {
163                __uint128_t ret = state;
[7812a7b5]164                state *= 0xda942042e4dd58b5;
[611f29d]165                return ret >> 64;
[dd46fd3]166        } // lehmer64
[13c5e19]167
[dd46fd3]168        static inline void lehmer64_set_seed( __uint128_t & state, uint64_t seed ) {
169                state = seed;
170        } // lehmer64_set_seed
171
172        //--------------------------------------------------
[611f29d]173        static inline uint64_t wyhash64( uint64_t & state ) {
[7812a7b5]174                state += 0x60bee2bee120fc15;
175                __uint128_t tmp;
176                tmp = (__uint128_t) state * 0xa3b195354a39b70d;
177                uint64_t m1 = (tmp >> 64) ^ tmp;
178                tmp = (__uint128_t)m1 * 0x1b03738712fad5c9;
179                uint64_t m2 = (tmp >> 64) ^ tmp;
180                return m2;
181        }
[dd46fd3]182
183        static inline void wyhash64_set_seed( __uint128_t & state, uint64_t seed ) {
184                state = seed;
185        } // lehmer64_set_seed
186#endif // __SIZEOF_INT128__
[13c5e19]187
188//--------------------------------------------------
[611f29d]189static inline uint64_t xorshift_13_7_17( uint64_t & state ) {
190        uint64_t ret = state;
191        state ^= state << 13;
192        state ^= state >> 7;
193        state ^= state << 17;
194        return ret;
[4020f09]195} // xorshift_13_7_17
[13c5e19]196
[dd46fd3]197static inline void xorshift_13_7_17_set_seed( uint64_t & state, uint32_t seed ) {
198        state = seed;
[4020f09]199} // xorshift_13_7_17_set_seed
[dd46fd3]200
[611f29d]201//--------------------------------------------------
[4020f09]202// Marsaglia shift-XOR PRNG with thread-local state
203// Period is 4G-1
204// 0 is absorbing and must be avoided
205// Low-order bits are not particularly random
[611f29d]206static inline uint32_t xorshift_6_21_7( uint32_t & state ) {
207        uint32_t ret = state;
208        state ^= state << 6;
209        state ^= state >> 21;
210        state ^= state << 7;
211        return ret;
212} // xorshift_6_21_7
213
[dd46fd3]214static inline void xorshift_6_21_7_set_seed( uint32_t & state, uint32_t seed ) {
215        state = seed;
[4020f09]216} // xorshift_6_21_7_set_seed
[dd46fd3]217
[13c5e19]218//--------------------------------------------------
[4020f09]219// The state array must be initialized to non-zero in the first four words.
220#ifndef XORWOW
221typedef struct xorwow_t { uint32_t a, b, c, d, counter; } xorwow_t;
222#endif // ! XORWOW
[13c5e19]223
[4020f09]224static inline uint32_t xorwow( xorwow_t & state ) {
[e57de69]225        // Algorithm "xorwow" from p. 5 of Marsaglia, "Xorshift RNGs".
[611f29d]226        uint32_t ret = state.a + state.counter;
[13c5e19]227        uint32_t t = state.d;
228
229        uint32_t const s = state.a;
230        state.d = state.c;
231        state.c = state.b;
232        state.b = s;
233
234        t ^= t >> 2;
235        t ^= t << 1;
236        t ^= s ^ (s << 4);
237        state.a = t;
238
239        state.counter += 362437;
[611f29d]240        return ret;
[4020f09]241} // xorwow
242
243static inline void xorwow_set_seed( xorwow_t & state, uint32_t seed ) {
244        state = (xorwow_t){ seed, seed, seed, seed, 0 };
245} // xorwow_set_seed
[611f29d]246
247//--------------------------------------------------
[4020f09]248// Used in __tls_rand_fwd
[611f29d]249#define M  (1_l64u << 48_l64u)
250#define A  (25214903917_l64u)
251#define AI (18446708753438544741_l64u)
252#define C  (11_l64u)
253#define D  (16_l64u)
254
[e57de69]255// Bi-directional LCG random-number generator
[611f29d]256static inline uint32_t LCGBI_fwd( uint64_t & state ) {
257        state = (A * state + C) & (M - 1);
258        return state >> D;
[4020f09]259} // LCGBI_fwd
[611f29d]260
261static inline uint32_t LCGBI_bck( uint64_t & state ) {
262        unsigned int r = state >> D;
263        state = AI * (state - C) & (M - 1);
264        return r;
[4020f09]265} // LCGBI_bck
[611f29d]266
267#undef M
268#undef A
269#undef AI
270#undef C
271#undef D
[9fce2572]272
273#endif // __cforall
Note: See TracBrowser for help on using the repository browser.