Changeset 4c6ba5a for libcfa/src


Ignore:
Timestamp:
Mar 20, 2023, 3:42:28 PM (21 months ago)
Author:
caparsons <caparson@…>
Branches:
ADT, ast-experimental, master
Children:
a6bb5fc
Parents:
eac318a
Message:

refactored to use generators for seeding state, added splitmix32 for 32 bit seeding

File:
1 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/bits/random.hfa

    reac318a r4c6ba5a  
    3232        //#define XORSHIFT_6_21_7
    3333        #define XOSHIRO128PP
     34    // #define SPLITMIX_32
    3435#else                                                                                                   // 32-bit architecture
    3536        // 64-bit generators
     
    4041        //#define XORSHIFT_6_21_7
    4142        #define XOSHIRO128PP
     43    // #define SPLITMIX_32
    4244#endif // __x86_64__
    4345
     
    7779#define PRNG_STATE_32_T uint32_t
    7880#endif // XORSHIFT_6_21_7
     81
     82#ifdef SPLITMIX_32
     83#define PRNG_NAME_32 splitmix
     84#define PRNG_STATE_32_T uint32_t
     85#endif // SPLITMIX32
    7986
    8087#ifdef XORSHIFT_12_25_27
     
    119126#ifdef __cforall                                                                                // don't include in C code (invoke.h)
    120127
    121 // https://prng.di.unimi.it/xoshiro256starstar.c
    122 //
    123 // This is xoshiro256++ 1.0, one of our all-purpose, rock-solid generators.  It has excellent (sub-ns) speed, a state
    124 // (256 bits) that is large enough for any parallel application, and it passes all tests we are aware of.
    125 //
    126 // For generating just floating-point numbers, xoshiro256+ is even faster.
    127 //
    128 // The state must be seeded so that it is not everywhere zero. If you have a 64-bit seed, we suggest to seed a
    129 // splitmix64 generator and use its output to fill s.
    130 
    131 #ifndef XOSHIRO256PP
    132 typedef struct xoshiro256pp_t { uint64_t s0, s1, s2, s3; } xoshiro256pp_t;
    133 #endif // ! XOSHIRO256PP
    134 
    135 static inline uint64_t xoshiro256pp( xoshiro256pp_t & rs ) with(rs) {
    136         inline uint64_t rotl( const uint64_t x, int k ) {
    137                 return (x << k) | (x >> (64 - k));
    138         } // rotl
    139 
    140         const uint64_t result = rotl( s0 + s3, 23 ) + s0;
    141         const uint64_t t = s1 << 17;
    142 
    143         s2 ^= s0;
    144         s3 ^= s1;
    145         s1 ^= s2;
    146         s0 ^= s3;
    147         s2 ^= t;
    148         s3 = rotl( s3, 45 );
    149         return result;
    150 } // xoshiro256pp
    151 
    152 static inline void xoshiro256pp_set_seed( xoshiro256pp_t & state, uint64_t seed ) {
    153         state = (xoshiro256pp_t){ seed, seed, seed, seed };
    154         xoshiro256pp( state );
    155 } // xoshiro256pp_set_seed
    156 
    157 // https://prng.di.unimi.it/xoshiro128plusplus.c
    158 //
    159 // This is xoshiro128++ 1.0, one of our 32-bit all-purpose, rock-solid generators. It has excellent speed, a state size
    160 // (128 bits) that is large enough for mild parallelism, and it passes all tests we are aware of.
    161 //
    162 // For generating just single-precision (i.e., 32-bit) floating-point numbers, xoshiro128+ is even faster.
    163 //
    164 // The state must be seeded so that it is not everywhere zero.
    165 
    166 #ifndef XOSHIRO128PP
    167 typedef struct xoshiro128pp_t { uint32_t s0, s1, s2, s3; } xoshiro128pp_t;
    168 #endif // ! XOSHIRO128PP
    169 
    170 static inline uint32_t xoshiro128pp( xoshiro128pp_t & rs ) with(rs) {
    171         inline uint32_t rotl( const uint32_t x, int k ) {
    172                 return (x << k) | (x >> (32 - k));
    173         } // rotl
    174 
    175         const uint32_t result = rotl( s0 + s3, 7 ) + s0;
    176         const uint32_t t = s1 << 9;
    177 
    178         s2 ^= s0;
    179         s3 ^= s1;
    180         s1 ^= s2;
    181         s0 ^= s3;
    182         s2 ^= t;
    183         s3 = rotl( s3, 11 );
    184         return result;
    185 } // xoshiro128pp
    186 
    187 static inline void xoshiro128pp_set_seed( xoshiro128pp_t & state, uint32_t seed ) {
    188         state = (xoshiro128pp_t){ seed, seed, seed, seed };
    189         xoshiro128pp( state );                                                          // prime
    190 } // xoshiro128pp_set_seed
     128// Splitmix32
     129// https://github.com/bryc/code/blob/master/jshash/PRNGs.md#splitmix32
     130// Splitmix32 is not recommended for demanding random number requirements,
     131// but is often used to calculate initial states for other more complex
     132// pseudo-random number generators.
     133// SplitMix32 is a 32 bit variant of Splitmix64
     134
     135static inline uint32_t splitmix32( uint32_t & state ) {
     136    state += 0x9e3779b9;
     137    uint64_t z = state;
     138    z = (z ^ (z >> 15)) * 0x85ebca6b;
     139    z = (z ^ (z >> 13)) * 0xc2b2ae35;
     140    return z ^ (z >> 16);
     141}
     142
     143static inline void splitmix32_set_seed( uint32_t & state , uint64_t seed ) {
     144    state = seed;
     145    splitmix32( state );                                                                // prime
     146} // splitmix32_set_seed
    191147
    192148#ifdef __SIZEOF_INT128__
     
    222178#endif // __SIZEOF_INT128__
    223179
     180// https://prng.di.unimi.it/xoshiro256starstar.c
     181//
     182// This is xoshiro256++ 1.0, one of our all-purpose, rock-solid generators.  It has excellent (sub-ns) speed, a state
     183// (256 bits) that is large enough for any parallel application, and it passes all tests we are aware of.
     184//
     185// For generating just floating-point numbers, xoshiro256+ is even faster.
     186//
     187// The state must be seeded so that it is not everywhere zero. If you have a 64-bit seed, we suggest to seed a
     188// splitmix64 generator and use its output to fill s.
     189
     190#ifndef XOSHIRO256PP
     191typedef struct xoshiro256pp_t { uint64_t s0, s1, s2, s3; } xoshiro256pp_t;
     192#endif // ! XOSHIRO256PP
     193
     194static inline uint64_t xoshiro256pp( xoshiro256pp_t & rs ) with(rs) {
     195        inline uint64_t rotl( const uint64_t x, int k ) {
     196                return (x << k) | (x >> (64 - k));
     197        } // rotl
     198
     199        const uint64_t result = rotl( s0 + s3, 23 ) + s0;
     200        const uint64_t t = s1 << 17;
     201
     202        s2 ^= s0;
     203        s3 ^= s1;
     204        s1 ^= s2;
     205        s0 ^= s3;
     206        s2 ^= t;
     207        s3 = rotl( s3, 45 );
     208        return result;
     209} // xoshiro256pp
     210
     211static inline void xoshiro256pp_set_seed( xoshiro256pp_t & state, uint64_t seed ) {
     212    uint64_t state;
     213    wyhash64_set_seed( state, seed );
     214    // these are done explicitly in this order to attain repeatable seeding.
     215    // do not call splitmix32 directly in the state init since order of argument evaluation
     216    // may not be consistent leading to irreproducible seeding
     217    uint64_t seed1 = wyhash64( state );
     218    uint64_t seed2 = wyhash64( state );
     219    uint64_t seed3 = wyhash64( state );
     220    uint64_t seed4 = wyhash64( state );
     221        state = (xoshiro256pp_t){ seed1, seed2, seed3, seed4 };
     222        xoshiro256pp( state );
     223} // xoshiro256pp_set_seed
     224
     225// https://prng.di.unimi.it/xoshiro128plusplus.c
     226//
     227// This is xoshiro128++ 1.0, one of our 32-bit all-purpose, rock-solid generators. It has excellent speed, a state size
     228// (128 bits) that is large enough for mild parallelism, and it passes all tests we are aware of.
     229//
     230// For generating just single-precision (i.e., 32-bit) floating-point numbers, xoshiro128+ is even faster.
     231//
     232// The state must be seeded so that it is not everywhere zero.
     233
     234#ifndef XOSHIRO128PP
     235typedef struct xoshiro128pp_t { uint32_t s0, s1, s2, s3; } xoshiro128pp_t;
     236#endif // ! XOSHIRO128PP
     237
     238static inline uint32_t xoshiro128pp( xoshiro128pp_t & rs ) with(rs) {
     239        inline uint32_t rotl( const uint32_t x, int k ) {
     240                return (x << k) | (x >> (32 - k));
     241        } // rotl
     242
     243        const uint32_t result = rotl( s0 + s3, 7 ) + s0;
     244        const uint32_t t = s1 << 9;
     245
     246        s2 ^= s0;
     247        s3 ^= s1;
     248        s1 ^= s2;
     249        s0 ^= s3;
     250        s2 ^= t;
     251        s3 = rotl( s3, 11 );
     252        return result;
     253} // xoshiro128pp
     254
     255static inline void xoshiro128pp_set_seed( xoshiro128pp_t & state, uint32_t seed ) {
     256    // these are done explicitly in this order to attain repeatable seeding.
     257    // do not call splitmix32 directly in the state init since order of argument evaluation
     258    // may not be consistent leading to irreproducible seeding
     259    uint32_t seed1 = splitmix32( seed );
     260    uint32_t seed2 = splitmix32( seed );
     261    uint32_t seed3 = splitmix32( seed );
     262    uint32_t seed4 = splitmix32( seed );
     263        state = (xoshiro128pp_t){ seed1, seed2, seed3, seed4 };
     264        xoshiro128pp( state );                                                          // prime
     265} // xoshiro128pp_set_seed
     266
    224267//--------------------------------------------------
    225268static inline uint64_t xorshift_13_7_17( uint64_t & state ) {
     
    316359
    317360static inline void xorwow_set_seed( xorwow_t & rs, uint32_t seed ) {
    318         rs = (xorwow_t){ seed, seed, seed, seed, 0 };
     361    // these are done explicitly in this order to attain repeatable seeding.
     362    // do not call splitmix32 directly in the state init since order of argument evaluation
     363    // may not be consistent leading to irreproducible seeding
     364    uint32_t seed1 = splitmix32( seed );
     365    uint32_t seed2 = splitmix32( seed );
     366    uint32_t seed3 = splitmix32( seed );
     367    uint32_t seed4 = splitmix32( seed );
     368        rs = (xorwow_t){ seed1, seed2, seed3, seed4, 0 };
    319369        xorwow( rs );                                                                           // prime
    320370} // xorwow_set_seed
Note: See TracChangeset for help on using the changeset viewer.