Ignore:
File:
1 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/bits/random.hfa

    rc2dfa56a ra6bb5fc  
    3232        //#define XORSHIFT_6_21_7
    3333        #define XOSHIRO128PP
     34    // #define SPLITMIX_32
    3435#else                                                                                                   // 32-bit architecture
    3536        // 64-bit generators
     
    4041        //#define XORSHIFT_6_21_7
    4142        #define XOSHIRO128PP
     43    // #define SPLITMIX_32
    4244#endif // __x86_64__
    4345
     
    7577#define PRNG_STATE_32_T uint32_t
    7678#endif // XORSHIFT_6_21_7
     79
     80#ifdef SPLITMIX_32
     81#define PRNG_NAME_32 splitmix
     82#define PRNG_STATE_32_T uint32_t
     83#endif // SPLITMIX32
    7784
    7885#ifdef XORSHIFT_12_25_27
     
    117124#ifdef __cforall                                                                                // don't include in C code (invoke.h)
    118125
    119 // https://prng.di.unimi.it/xoshiro256starstar.c
    120 //
    121 // This is xoshiro256++ 1.0, one of our all-purpose, rock-solid generators.  It has excellent (sub-ns) speed, a state
    122 // (256 bits) that is large enough for any parallel application, and it passes all tests we are aware of.
    123 //
    124 // For generating just floating-point numbers, xoshiro256+ is even faster.
    125 //
    126 // The state must be seeded so that it is not everywhere zero. If you have a 64-bit seed, we suggest to seed a
    127 // splitmix64 generator and use its output to fill s.
    128 
    129 #ifndef XOSHIRO256PP
    130 typedef struct { uint64_t s0, s1, s2, s3; } xoshiro256pp_t;
    131 #endif // ! XOSHIRO256PP
    132 
    133 static inline uint64_t xoshiro256pp( xoshiro256pp_t & rs ) with(rs) {
    134         inline uint64_t rotl( const uint64_t x, int k ) {
    135                 return (x << k) | (x >> (64 - k));
    136         } // rotl
    137 
    138         const uint64_t result = rotl( s0 + s3, 23 ) + s0;
    139         const uint64_t t = s1 << 17;
    140 
    141         s2 ^= s0;
    142         s3 ^= s1;
    143         s1 ^= s2;
    144         s0 ^= s3;
    145         s2 ^= t;
    146         s3 = rotl( s3, 45 );
    147         return result;
    148 } // xoshiro256pp
    149 
    150 static inline void xoshiro256pp_set_seed( xoshiro256pp_t & state, uint64_t seed ) {
    151         state = (xoshiro256pp_t){ seed, seed, seed, seed };
    152         xoshiro256pp( state );                                                          // prime
    153 } // xoshiro256pp_set_seed
    154 
    155 // https://prng.di.unimi.it/xoshiro128plusplus.c
    156 //
    157 // This is xoshiro128++ 1.0, one of our 32-bit all-purpose, rock-solid generators. It has excellent speed, a state size
    158 // (128 bits) that is large enough for mild parallelism, and it passes all tests we are aware of.
    159 //
    160 // For generating just single-precision (i.e., 32-bit) floating-point numbers, xoshiro128+ is even faster.
    161 //
    162 // The state must be seeded so that it is not everywhere zero.
    163 
    164 #ifndef XOSHIRO128PP
    165 typedef struct { uint32_t s0, s1, s2, s3; } xoshiro128pp_t;
    166 #endif // ! XOSHIRO128PP
    167 
    168 static inline uint32_t xoshiro128pp( xoshiro128pp_t & rs ) with(rs) {
    169         inline uint32_t rotl( const uint32_t x, int k ) {
    170                 return (x << k) | (x >> (32 - k));
    171         } // rotl
    172 
    173         const uint32_t result = rotl( s0 + s3, 7 ) + s0;
    174         const uint32_t t = s1 << 9;
    175 
    176         s2 ^= s0;
    177         s3 ^= s1;
    178         s1 ^= s2;
    179         s0 ^= s3;
    180         s2 ^= t;
    181         s3 = rotl( s3, 11 );
    182         return result;
    183 } // xoshiro128pp
    184 
    185 static inline void xoshiro128pp_set_seed( xoshiro128pp_t & state, uint32_t seed ) {
    186         state = (xoshiro128pp_t){ seed, seed, seed, seed };
    187         xoshiro128pp( state );                                                          // prime
    188 } // xoshiro128pp_set_seed
     126// Splitmix32
     127// https://github.com/bryc/code/blob/master/jshash/PRNGs.md#splitmix32
     128// Splitmix32 is not recommended for demanding random number requirements,
     129// but is often used to calculate initial states for other more complex
     130// pseudo-random number generators.
     131// SplitMix32 is a 32 bit variant of Splitmix64
     132
     133static inline uint32_t splitmix32( uint32_t & state ) {
     134    state += 0x9e3779b9;
     135    uint64_t z = state;
     136    z = (z ^ (z >> 15)) * 0x85ebca6b;
     137    z = (z ^ (z >> 13)) * 0xc2b2ae35;
     138    return z ^ (z >> 16);
     139}
     140
     141static inline void splitmix32_set_seed( uint32_t & state , uint64_t seed ) {
     142    state = seed;
     143    splitmix32( state );                                                                // prime
     144} // splitmix32_set_seed
    189145
    190146#ifdef __SIZEOF_INT128__
     
    220176#endif // __SIZEOF_INT128__
    221177
     178// https://prng.di.unimi.it/xoshiro256starstar.c
     179//
     180// This is xoshiro256++ 1.0, one of our all-purpose, rock-solid generators.  It has excellent (sub-ns) speed, a state
     181// (256 bits) that is large enough for any parallel application, and it passes all tests we are aware of.
     182//
     183// For generating just floating-point numbers, xoshiro256+ is even faster.
     184//
     185// The state must be seeded so that it is not everywhere zero. If you have a 64-bit seed, we suggest to seed a
     186// splitmix64 generator and use its output to fill s.
     187
     188#ifndef XOSHIRO256PP
     189typedef struct { uint64_t s0, s1, s2, s3; } xoshiro256pp_t;
     190#endif // ! XOSHIRO256PP
     191
     192static inline uint64_t xoshiro256pp( xoshiro256pp_t & rs ) with(rs) {
     193        inline uint64_t rotl( const uint64_t x, int k ) {
     194                return (x << k) | (x >> (64 - k));
     195        } // rotl
     196
     197        const uint64_t result = rotl( s0 + s3, 23 ) + s0;
     198        const uint64_t t = s1 << 17;
     199
     200        s2 ^= s0;
     201        s3 ^= s1;
     202        s1 ^= s2;
     203        s0 ^= s3;
     204        s2 ^= t;
     205        s3 = rotl( s3, 45 );
     206        return result;
     207} // xoshiro256pp
     208
     209static inline void xoshiro256pp_set_seed( xoshiro256pp_t & state, uint64_t seed ) {
     210    uint64_t state;
     211    wyhash64_set_seed( state, seed );
     212    // these are done explicitly in this order to attain repeatable seeding.
     213    // do not call splitmix32 directly in the state init since order of argument evaluation
     214    // may not be consistent leading to irreproducible seeding
     215    uint64_t seed1 = wyhash64( state );
     216    uint64_t seed2 = wyhash64( state );
     217    uint64_t seed3 = wyhash64( state );
     218    uint64_t seed4 = wyhash64( state );
     219        state = (xoshiro256pp_t){ seed1, seed2, seed3, seed4 };
     220        xoshiro256pp( state );                                                          // prime
     221} // xoshiro256pp_set_seed
     222
     223// https://prng.di.unimi.it/xoshiro128plusplus.c
     224//
     225// This is xoshiro128++ 1.0, one of our 32-bit all-purpose, rock-solid generators. It has excellent speed, a state size
     226// (128 bits) that is large enough for mild parallelism, and it passes all tests we are aware of.
     227//
     228// For generating just single-precision (i.e., 32-bit) floating-point numbers, xoshiro128+ is even faster.
     229//
     230// The state must be seeded so that it is not everywhere zero.
     231
     232#ifndef XOSHIRO128PP
     233typedef struct { uint32_t s0, s1, s2, s3; } xoshiro128pp_t;
     234#endif // ! XOSHIRO128PP
     235
     236static inline uint32_t xoshiro128pp( xoshiro128pp_t & rs ) with(rs) {
     237        inline uint32_t rotl( const uint32_t x, int k ) {
     238                return (x << k) | (x >> (32 - k));
     239        } // rotl
     240
     241        const uint32_t result = rotl( s0 + s3, 7 ) + s0;
     242        const uint32_t t = s1 << 9;
     243
     244        s2 ^= s0;
     245        s3 ^= s1;
     246        s1 ^= s2;
     247        s0 ^= s3;
     248        s2 ^= t;
     249        s3 = rotl( s3, 11 );
     250        return result;
     251} // xoshiro128pp
     252
     253static inline void xoshiro128pp_set_seed( xoshiro128pp_t & state, uint32_t seed ) {
     254    // these are done explicitly in this order to attain repeatable seeding.
     255    // do not call splitmix32 directly in the state init since order of argument evaluation
     256    // may not be consistent leading to irreproducible seeding
     257    uint32_t seed1 = splitmix32( seed );
     258    uint32_t seed2 = splitmix32( seed );
     259    uint32_t seed3 = splitmix32( seed );
     260    uint32_t seed4 = splitmix32( seed );
     261        state = (xoshiro128pp_t){ seed1, seed2, seed3, seed4 };
     262        xoshiro128pp( state );                                                          // prime
     263} // xoshiro128pp_set_seed
     264
    222265//--------------------------------------------------
    223266static inline uint64_t xorshift_13_7_17( uint64_t & state ) {
     
    314357
    315358static inline void xorwow_set_seed( xorwow_t & rs, uint32_t seed ) {
    316         rs = (xorwow_t){ seed, seed, seed, seed, 0 };
     359    // these are done explicitly in this order to attain repeatable seeding.
     360    // do not call splitmix32 directly in the state init since order of argument evaluation
     361    // may not be consistent leading to irreproducible seeding
     362    uint32_t seed1 = splitmix32( seed );
     363    uint32_t seed2 = splitmix32( seed );
     364    uint32_t seed3 = splitmix32( seed );
     365    uint32_t seed4 = splitmix32( seed );
     366        rs = (xorwow_t){ seed1, seed2, seed3, seed4, 0 };
    317367        xorwow( rs );                                                                           // prime
    318368} // xorwow_set_seed
Note: See TracChangeset for help on using the changeset viewer.