Changeset dd46fd3 for libcfa/src/bits


Ignore:
Timestamp:
Nov 30, 2022, 10:36:25 PM (22 months ago)
Author:
Peter A. Buhr <pabuhr@…>
Branches:
ADT, ast-experimental, master
Children:
5657de9, c8238c0
Parents:
be1d00c
Message:

generalization of PRNG

File:
1 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/bits/random.hfa

    rbe1d00c rdd46fd3  
    1010// Created On       : Fri Jan 14 07:18:11 2022
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Mon Nov 21 17:50:12 2022
    13 // Update Count     : 15
     12// Last Modified On : Wed Nov 30 18:32:25 2022
     13// Update Count     : 111
    1414//
    1515
     
    1717
    1818#include <stdint.h>
     19
     20#define GLUE2( x, y ) x##y
     21#define GLUE( x, y ) GLUE2( x, y )
    1922
    2023// Set default PRNG for architecture size.
    2124#ifdef __x86_64__                                                                               // 64-bit architecture
    2225#define LEHMER64
     26#define XORSHIFT_6_21_7
     27//#define XOSHIRO256PP
     28//#define XOSHIRO128PP
    2329#else                                                                                                   // 32-bit architecture
     30#define LEHMER64
    2431#define XORSHIFT_6_21_7
    2532#endif // __x86_64__
    2633
    2734// C/CFA PRNG name and random-state.
     35
    2836#ifdef LEHMER64
    29 #define PRNG_NAME lehmer64
    30 #define PRNG_ARG_T __uint128_t
     37#define PRNG_NAME_64 lehmer64
     38#define PRNG_STATE_64_T __uint128_t
    3139#endif // LEHMER64
    3240
    3341#ifdef XORSHIFT_6_21_7
    34 #define PRNG_NAME xorshift_6_21_7
    35 #define PRNG_ARG_T uint32_t
     42#define PRNG_NAME_32 xorshift_6_21_7
     43#define PRNG_STATE_32_T uint32_t
    3644#endif // XORSHIFT_6_21_7
    3745
     46#ifdef XOSHIRO256PP
     47#define PRNG_NAME_64 xoshiro256pp
     48#define PRNG_STATE_64_T struct GLUE(PRNG_NAME_64,_t)
     49PRNG_STATE_64_T { uint64_t s[4]; };
     50#endif // XOSHIRO256PP
     51
     52#ifdef XOSHIRO128PP
     53#define PRNG_NAME_32 xoshiro128pp
     54#define PRNG_STATE_32_T struct GLUE(PRNG_NAME_32,_t)
     55PRNG_STATE_32_T { uint32_t s[4]; };
     56#endif // XOSHIRO128PP
     57
     58#define PRNG_SET_SEED_64 GLUE(PRNG_NAME_64,_set_seed)
     59#define PRNG_SET_SEED_32 GLUE(PRNG_NAME_32,_set_seed)
     60
     61
     62// Default PRNG used by runtime.
     63#ifdef __x86_64__                                                                               // 64-bit architecture
     64#define PRNG_NAME PRNG_NAME_64
     65#define PRNG_STATE_T PRNG_STATE_64_T
     66#else                                                                                                   // 32-bit architecture
     67#define PRNG_NAME PRNG_NAME_32
     68#define PRNG_STATE_T PRNG_STATE_32_T
     69#endif // __x86_64__
     70
     71#define PRNG_SET_SEED GLUE(PRNG_NAME,_set_seed)
     72
     73
    3874#ifdef __cforall                                                                                // don't include in C code (invoke.h)
    3975
    40 // Pipelined to allow out-of-order overlap with reduced dependencies. Critically, the current random state is returned
    41 // (copied), and then compute and store the next random value.
    42 
    43 #if defined(__SIZEOF_INT128__)
    44 //--------------------------------------------------
     76// https://prng.di.unimi.it/xoshiro128plusplus.c
     77//
     78// This is xoshiro128++ 1.0, one of our 32-bit all-purpose, rock-solid generators. It has excellent speed, a state size
     79// (128 bits) that is large enough for mild parallelism, and it passes all tests we are aware of.
     80//
     81// For generating just single-precision (i.e., 32-bit) floating-point numbers, xoshiro128+ is even faster.
     82//
     83// The state must be seeded so that it is not everywhere zero.
     84
     85#ifndef XOSHIRO128PP
     86struct xoshiro128pp_t { uint32_t s[4]; };
     87#endif // ! XOSHIRO128PP
     88
     89static inline uint32_t xoshiro128pp( xoshiro128pp_t & rs ) with(rs) {
     90        inline uint32_t rotl( const uint32_t x, int k ) {
     91                return (x << k) | (x >> (32 - k));
     92        }
     93
     94        const uint32_t result = rotl( s[0] + s[3], 7 ) + s[0];
     95        const uint32_t t = s[1] << 9;
     96
     97        s[2] ^= s[0];
     98        s[3] ^= s[1];
     99        s[1] ^= s[2];
     100        s[0] ^= s[3];
     101        s[2] ^= t;
     102        s[3] = rotl( s[3], 11 );
     103        return result;
     104}
     105
     106static inline void xoshiro128pp_set_seed( xoshiro128pp_t & state, uint32_t seed ) {
     107        state = (xoshiro128pp_t){ {seed, seed, seed, seed} };
     108} // xoshiro128pp_set_seed
     109
     110// This is xoshiro256++ 1.0, one of our all-purpose, rock-solid generators.  It has excellent (sub-ns) speed, a state
     111// (256 bits) that is large enough for any parallel application, and it passes all tests we are aware of.
     112//
     113// For generating just floating-point numbers, xoshiro256+ is even faster.
     114//
     115// The state must be seeded so that it is not everywhere zero. If you have a 64-bit seed, we suggest to seed a
     116// splitmix64 generator and use its output to fill s.
     117
     118#ifndef XOSHIRO256PP
     119struct xoshiro256pp_t { uint64_t s[4]; };
     120#endif // ! XOSHIRO256PP
     121
     122static inline uint64_t xoshiro256pp( xoshiro256pp_t & rs ) with(rs) {
     123        inline uint64_t rotl(const uint64_t x, int k) {
     124                return (x << k) | (x >> (64 - k));
     125        }
     126
     127        const uint64_t result = rotl( s[0] + s[3], 23 ) + s[0];
     128        const uint64_t t = s[1] << 17;
     129
     130        s[2] ^= s[0];
     131        s[3] ^= s[1];
     132        s[1] ^= s[2];
     133        s[0] ^= s[3];
     134        s[2] ^= t;
     135        s[3] = rotl( s[3], 45 );
     136        return result;
     137}
     138
     139static inline void xoshiro256pp_set_seed( xoshiro256pp_t & state,  uint64_t seed ) {
     140        state = (xoshiro256pp_t){ {seed, seed, seed, seed} };
     141} // xoshiro256pp_set_seed
     142
     143#ifdef __SIZEOF_INT128__
     144        // Pipelined to allow out-of-order overlap with reduced dependencies. Critically, the current random state is
     145        // returned (copied), and then compute and store the next random value.
     146        //--------------------------------------------------
    45147        static inline uint64_t lehmer64( __uint128_t & state ) {
    46148                __uint128_t ret = state;
    47149                state *= 0xda942042e4dd58b5;
    48150                return ret >> 64;
    49         }
    50 
    51 //--------------------------------------------------
     151        } // lehmer64
     152
     153        static inline void lehmer64_set_seed( __uint128_t & state, uint64_t seed ) {
     154                state = seed;
     155        } // lehmer64_set_seed
     156
     157        //--------------------------------------------------
    52158        static inline uint64_t wyhash64( uint64_t & state ) {
    53159                state += 0x60bee2bee120fc15;
     
    59165                return m2;
    60166        }
    61 #endif
     167
     168        static inline void wyhash64_set_seed( __uint128_t & state, uint64_t seed ) {
     169                state = seed;
     170        } // lehmer64_set_seed
     171#endif // __SIZEOF_INT128__
    62172
    63173//--------------------------------------------------
     
    68178        state ^= state << 17;
    69179        return ret;
     180}
     181
     182static inline void xorshift_13_7_17_set_seed( uint64_t & state, uint32_t seed ) {
     183        state = seed;
    70184}
    71185
     
    79193} // xorshift_6_21_7
    80194
     195static inline void xorshift_6_21_7_set_seed( uint32_t & state, uint32_t seed ) {
     196        state = seed;
     197}
     198
    81199//--------------------------------------------------
    82200typedef struct {
     
    105223}
    106224
    107 //--------------------------------------------------
    108 static inline uint32_t LCG( uint32_t & state ) {                // linear congruential generator
    109         uint32_t ret = state;
    110         state = 36969 * (state & 65535) + (state >> 16);        // 36969 is NOT prime! No not change it!
    111         return ret;
    112 } // LCG
    113 
     225// Used in __tls_rand_fwd
    114226//--------------------------------------------------
    115227#define M  (1_l64u << 48_l64u)
Note: See TracChangeset for help on using the changeset viewer.