Changeset dd46fd3


Ignore:
Timestamp:
Nov 30, 2022, 10:36:25 PM (2 months ago)
Author:
Peter A. Buhr <pabuhr@…>
Branches:
master
Children:
5657de9, c8238c0
Parents:
be1d00c
Message:

generalization of PRNG

Files:
10 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/bits/random.hfa

    rbe1d00c rdd46fd3  
    1010// Created On       : Fri Jan 14 07:18:11 2022
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Mon Nov 21 17:50:12 2022
    13 // Update Count     : 15
     12// Last Modified On : Wed Nov 30 18:32:25 2022
     13// Update Count     : 111
    1414//
    1515
     
    1717
    1818#include <stdint.h>
     19
     20#define GLUE2( x, y ) x##y
     21#define GLUE( x, y ) GLUE2( x, y )
    1922
    2023// Set default PRNG for architecture size.
    2124#ifdef __x86_64__                                                                               // 64-bit architecture
    2225#define LEHMER64
     26#define XORSHIFT_6_21_7
     27//#define XOSHIRO256PP
     28//#define XOSHIRO128PP
    2329#else                                                                                                   // 32-bit architecture
     30#define LEHMER64
    2431#define XORSHIFT_6_21_7
    2532#endif // __x86_64__
    2633
    2734// C/CFA PRNG name and random-state.
     35
    2836#ifdef LEHMER64
    29 #define PRNG_NAME lehmer64
    30 #define PRNG_ARG_T __uint128_t
     37#define PRNG_NAME_64 lehmer64
     38#define PRNG_STATE_64_T __uint128_t
    3139#endif // LEHMER64
    3240
    3341#ifdef XORSHIFT_6_21_7
    34 #define PRNG_NAME xorshift_6_21_7
    35 #define PRNG_ARG_T uint32_t
     42#define PRNG_NAME_32 xorshift_6_21_7
     43#define PRNG_STATE_32_T uint32_t
    3644#endif // XORSHIFT_6_21_7
    3745
     46#ifdef XOSHIRO256PP
     47#define PRNG_NAME_64 xoshiro256pp
     48#define PRNG_STATE_64_T struct GLUE(PRNG_NAME_64,_t)
     49PRNG_STATE_64_T { uint64_t s[4]; };
     50#endif // XOSHIRO256PP
     51
     52#ifdef XOSHIRO128PP
     53#define PRNG_NAME_32 xoshiro128pp
     54#define PRNG_STATE_32_T struct GLUE(PRNG_NAME_32,_t)
     55PRNG_STATE_32_T { uint32_t s[4]; };
     56#endif // XOSHIRO128PP
     57
     58#define PRNG_SET_SEED_64 GLUE(PRNG_NAME_64,_set_seed)
     59#define PRNG_SET_SEED_32 GLUE(PRNG_NAME_32,_set_seed)
     60
     61
     62// Default PRNG used by runtime.
     63#ifdef __x86_64__                                                                               // 64-bit architecture
     64#define PRNG_NAME PRNG_NAME_64
     65#define PRNG_STATE_T PRNG_STATE_64_T
     66#else                                                                                                   // 32-bit architecture
     67#define PRNG_NAME PRNG_NAME_32
     68#define PRNG_STATE_T PRNG_STATE_32_T
     69#endif // __x86_64__
     70
     71#define PRNG_SET_SEED GLUE(PRNG_NAME,_set_seed)
     72
     73
    3874#ifdef __cforall                                                                                // don't include in C code (invoke.h)
    3975
    40 // Pipelined to allow out-of-order overlap with reduced dependencies. Critically, the current random state is returned
    41 // (copied), and then compute and store the next random value.
    42 
    43 #if defined(__SIZEOF_INT128__)
    44 //--------------------------------------------------
     76// https://prng.di.unimi.it/xoshiro128plusplus.c
     77//
     78// This is xoshiro128++ 1.0, one of our 32-bit all-purpose, rock-solid generators. It has excellent speed, a state size
     79// (128 bits) that is large enough for mild parallelism, and it passes all tests we are aware of.
     80//
     81// For generating just single-precision (i.e., 32-bit) floating-point numbers, xoshiro128+ is even faster.
     82//
     83// The state must be seeded so that it is not everywhere zero.
     84
     85#ifndef XOSHIRO128PP
     86struct xoshiro128pp_t { uint32_t s[4]; };
     87#endif // ! XOSHIRO128PP
     88
     89static inline uint32_t xoshiro128pp( xoshiro128pp_t & rs ) with(rs) {
     90        inline uint32_t rotl( const uint32_t x, int k ) {
     91                return (x << k) | (x >> (32 - k));
     92        }
     93
     94        const uint32_t result = rotl( s[0] + s[3], 7 ) + s[0];
     95        const uint32_t t = s[1] << 9;
     96
     97        s[2] ^= s[0];
     98        s[3] ^= s[1];
     99        s[1] ^= s[2];
     100        s[0] ^= s[3];
     101        s[2] ^= t;
     102        s[3] = rotl( s[3], 11 );
     103        return result;
     104}
     105
     106static inline void xoshiro128pp_set_seed( xoshiro128pp_t & state, uint32_t seed ) {
     107        state = (xoshiro128pp_t){ {seed, seed, seed, seed} };
     108} // xoshiro128pp_set_seed
     109
     110// This is xoshiro256++ 1.0, one of our all-purpose, rock-solid generators.  It has excellent (sub-ns) speed, a state
     111// (256 bits) that is large enough for any parallel application, and it passes all tests we are aware of.
     112//
     113// For generating just floating-point numbers, xoshiro256+ is even faster.
     114//
     115// The state must be seeded so that it is not everywhere zero. If you have a 64-bit seed, we suggest to seed a
     116// splitmix64 generator and use its output to fill s.
     117
     118#ifndef XOSHIRO256PP
     119struct xoshiro256pp_t { uint64_t s[4]; };
     120#endif // ! XOSHIRO256PP
     121
     122static inline uint64_t xoshiro256pp( xoshiro256pp_t & rs ) with(rs) {
     123        inline uint64_t rotl(const uint64_t x, int k) {
     124                return (x << k) | (x >> (64 - k));
     125        }
     126
     127        const uint64_t result = rotl( s[0] + s[3], 23 ) + s[0];
     128        const uint64_t t = s[1] << 17;
     129
     130        s[2] ^= s[0];
     131        s[3] ^= s[1];
     132        s[1] ^= s[2];
     133        s[0] ^= s[3];
     134        s[2] ^= t;
     135        s[3] = rotl( s[3], 45 );
     136        return result;
     137}
     138
     139static inline void xoshiro256pp_set_seed( xoshiro256pp_t & state,  uint64_t seed ) {
     140        state = (xoshiro256pp_t){ {seed, seed, seed, seed} };
     141} // xoshiro256pp_set_seed
     142
     143#ifdef __SIZEOF_INT128__
     144        // Pipelined to allow out-of-order overlap with reduced dependencies. Critically, the current random state is
     145        // returned (copied), and then compute and store the next random value.
     146        //--------------------------------------------------
    45147        static inline uint64_t lehmer64( __uint128_t & state ) {
    46148                __uint128_t ret = state;
    47149                state *= 0xda942042e4dd58b5;
    48150                return ret >> 64;
    49         }
    50 
    51 //--------------------------------------------------
     151        } // lehmer64
     152
     153        static inline void lehmer64_set_seed( __uint128_t & state, uint64_t seed ) {
     154                state = seed;
     155        } // lehmer64_set_seed
     156
     157        //--------------------------------------------------
    52158        static inline uint64_t wyhash64( uint64_t & state ) {
    53159                state += 0x60bee2bee120fc15;
     
    59165                return m2;
    60166        }
    61 #endif
     167
     168        static inline void wyhash64_set_seed( __uint128_t & state, uint64_t seed ) {
     169                state = seed;
     170        } // lehmer64_set_seed
     171#endif // __SIZEOF_INT128__
    62172
    63173//--------------------------------------------------
     
    68178        state ^= state << 17;
    69179        return ret;
     180}
     181
     182static inline void xorshift_13_7_17_set_seed( uint64_t & state, uint32_t seed ) {
     183        state = seed;
    70184}
    71185
     
    79193} // xorshift_6_21_7
    80194
     195static inline void xorshift_6_21_7_set_seed( uint32_t & state, uint32_t seed ) {
     196        state = seed;
     197}
     198
    81199//--------------------------------------------------
    82200typedef struct {
     
    105223}
    106224
    107 //--------------------------------------------------
    108 static inline uint32_t LCG( uint32_t & state ) {                // linear congruential generator
    109         uint32_t ret = state;
    110         state = 36969 * (state & 65535) + (state >> 16);        // 36969 is NOT prime! No not change it!
    111         return ret;
    112 } // LCG
    113 
     225// Used in __tls_rand_fwd
    114226//--------------------------------------------------
    115227#define M  (1_l64u << 48_l64u)
  • libcfa/src/concurrency/invoke.h

    rbe1d00c rdd46fd3  
    1010// Created On       : Tue Jan 17 12:27:26 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Mon Nov 21 17:40:24 2022
    13 // Update Count     : 55
     12// Last Modified On : Tue Nov 29 20:42:21 2022
     13// Update Count     : 56
    1414//
    1515
     
    223223                struct processor * last_proc;
    224224
    225                 PRNG_ARG_T random_state;                                                // fast random numbers
     225                PRNG_STATE_T random_state;                                              // fast random numbers
    226226
    227227                #if defined( __CFA_WITH_VERIFY__ )
  • libcfa/src/concurrency/kernel.cfa

    rbe1d00c rdd46fd3  
    1010// Created On       : Tue Jan 17 12:27:26 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Mon Aug 31 07:08:20 2020
    13 // Update Count     : 71
     12// Last Modified On : Wed Nov 30 18:14:08 2022
     13// Update Count     : 76
    1414//
    1515
     
    151151        // Because of a bug, we couldn't initialized the seed on construction
    152152        // Do it here
    153         __cfaabi_tls.rand_seed ^= rdtscl();
     153        PRNG_SET_SEED( __cfaabi_tls.random_state, rdtscl() );
    154154        __cfaabi_tls.ready_rng.fwd_seed = 25214903917_l64u * (rdtscl() ^ (uintptr_t)&runner);
    155155        __tls_rand_advance_bck();
  • libcfa/src/concurrency/kernel/fwd.hfa

    rbe1d00c rdd46fd3  
    4646                        } preemption_state;
    4747
    48                         #if defined(__SIZEOF_INT128__)
    49                                 __uint128_t rand_seed;
    50                         #else
    51                                 uint64_t rand_seed;
    52                         #endif
     48                        PRNG_STATE_T random_state;
     49
    5350                        struct {
    5451                                uint64_t fwd_seed;
     
    5754
    5855                        struct __stats_t        * volatile this_stats;
    59 
    6056
    6157                        #ifdef __CFA_WITH_VERIFY__
     
    7672                #define publicTLS_get( member ) ((typeof(__cfaabi_tls.member))__cfatls_get( __builtin_offsetof(KernelThreadData, member) ))
    7773
    78                 static inline uint64_t __tls_rand() {
    79                         return
    80                         #if defined(__SIZEOF_INT128__)
    81                                 lehmer64( kernelTLS().rand_seed );
    82                         #else
    83                                 xorshift_13_7_17( kernelTLS().rand_seed );
    84                         #endif
     74                static inline
     75                        #ifdef __x86_64__                                                       // 64-bit architecture
     76                        uint64_t
     77                        #else                                                                           // 32-bit architecture
     78                        uint32_t
     79                        #endif // __x86_64__
     80                __tls_rand() {
     81                        return PRNG_NAME( kernelTLS().random_state );
    8582                }
    8683
  • libcfa/src/concurrency/kernel/startup.cfa

    rbe1d00c rdd46fd3  
    108108extern void __wake_proc(processor *);
    109109extern int cfa_main_returned;                                                   // from interpose.cfa
    110 PRNG_ARG_T __global_random_prime = 4_294_967_291u;
     110size_t __global_random_prime = 4_294_967_291u;
    111111bool __global_random_mask = false;
    112112
     
    135135// Global state
    136136__thread struct KernelThreadData __cfaabi_tls __attribute__ ((tls_model ( "initial-exec" ))) @= {
    137         NULL,                                                                                           // cannot use 0p
    138         NULL,
    139         false,
    140         { 1, false, false },
    141         0,
    142         { 0, 0 },
    143         NULL,
     137        .this_thread : NULL,                                                            // cannot use 0p
     138        .this_processor : NULL,
     139        .sched_lock : false,
     140        .preemption_state : { .disable_count : 1, .enabled : false, .in_progress : false },
     141        // random_state uninitialized
     142        .ready_rng : { .fwd_seed : 0, .bck_seed : 0 },
     143        .this_stats : NULL,
    144144        #ifdef __CFA_WITH_VERIFY__
    145                 false,
    146                 0,
     145                .in_sched_lock : false,
     146                .sched_id : 0,
    147147        #endif
    148148};
     
    521521        preferred = ready_queue_new_preferred();
    522522        last_proc = 0p;
    523         random_state = __global_random_mask ? __global_random_prime : __global_random_prime ^ rdtscl();
     523        PRNG_SET_SEED( random_state,  __global_random_mask ? __global_random_prime : __global_random_prime ^ rdtscl() );
    524524        #if defined( __CFA_WITH_VERIFY__ )
    525525                executing = 0p;
  • libcfa/src/concurrency/thread.cfa

    rbe1d00c rdd46fd3  
    1010// Created On       : Tue Jan 17 12:27:26 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Tue Nov 22 22:18:37 2022
    13 // Update Count     : 81
     12// Last Modified On : Wed Nov 30 18:14:07 2022
     13// Update Count     : 95
    1414//
    1515
     
    2626#include "invoke.h"
    2727
    28 extern PRNG_ARG_T __global_random_seed, __global_random_prime;
     28extern size_t __global_random_seed;
     29extern size_t __global_random_prime;
    2930extern bool __global_random_mask;
    3031
     
    5354        preferred = ready_queue_new_preferred();
    5455        last_proc = 0p;
    55         random_state = __global_random_mask ? __global_random_prime : __global_random_prime ^ rdtscl();
     56        PRNG_SET_SEED( random_state, __global_random_mask ? __global_random_prime : __global_random_prime ^ rdtscl() );
    5657        #if defined( __CFA_WITH_VERIFY__ )
    5758                executing = 0p;
     
    228229
    229230void set_seed( size_t seed ) {
    230         PRNG_ARG_T & state = active_thread()->random_state;
    231         state = __global_random_seed = seed;
     231        PRNG_STATE_T & state = active_thread()->random_state;
     232        __global_random_seed = seed;
     233        PRNG_SET_SEED( state, seed );
    232234        (void)PRNG_NAME( state );                                                       // prime PRNG
    233         __global_random_prime = state;
     235        __global_random_prime = seed;
    234236        __global_random_mask = true;
    235237} // set_seed
    236238
    237239size_t prng( void ) {                                                                   // [0,UINT_MAX]
    238         PRNG_ARG_T & state = active_thread()->random_state;
     240        PRNG_STATE_T( & state ) = active_thread()->random_state;
    239241        return PRNG_NAME( state );
    240242} // prng
  • libcfa/src/startup.cfa

    rbe1d00c rdd46fd3  
    1010// Created On       : Tue Jul 24 16:21:57 2018
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Sun Nov 20 21:26:40 2022
    13 // Update Count     : 59
     12// Last Modified On : Wed Nov 30 18:14:06 2022
     13// Update Count     : 68
    1414//
    1515
     
    2121#include "startup.hfa"
    2222
    23 extern PRNG_ARG_T __global_random_seed;                                 // sequential/concurrent
    24 extern PRNG_ARG_T __global_random_state;                                // sequential
     23extern size_t __global_random_seed;                                             // sequential/concurrent
     24extern PRNG_STATE_T __global_random_state;                              // sequential
    2525
    2626extern "C" {
     
    6969        void __cfaabi_core_startup( void ) __attribute__(( constructor( STARTUP_PRIORITY_CORE ) ));
    7070        void __cfaabi_core_startup( void ) {
    71                 __global_random_state = __global_random_seed = rdtscl();
     71                __global_random_seed = rdtscl();
     72                PRNG_SET_SEED( __global_random_state, __global_random_seed );
    7273                __cfaabi_interpose_startup();
    7374                __cfaabi_device_startup();
  • libcfa/src/stdlib.cfa

    rbe1d00c rdd46fd3  
    1010// Created On       : Thu Jan 28 17:10:29 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Tue Nov 22 22:18:36 2022
    13 // Update Count     : 613
     12// Last Modified On : Wed Nov 30 18:14:05 2022
     13// Update Count     : 622
    1414//
    1515
     
    226226
    227227// would be cool to make hidden but it's needed for libcfathread
    228 __attribute__((visibility("default"))) PRNG_ARG_T __global_random_seed; // sequential/concurrent
    229 __attribute__((visibility("hidden"))) PRNG_ARG_T __global_random_state; // sequential only
    230 
    231 void set_seed( size_t seed ) { __global_random_state = __global_random_seed = seed; PRNG_NAME( __global_random_state ); }
     228__attribute__((visibility("default"))) size_t __global_random_seed; // sequential/concurrent
     229__attribute__((visibility("hidden"))) PRNG_STATE_T __global_random_state; // sequential only
     230
     231void set_seed( size_t seed ) { __global_random_seed = seed; PRNG_SET_SEED( __global_random_state, seed ); PRNG_NAME( __global_random_state ); }
    232232size_t get_seed() { return __global_random_seed; }
    233233size_t prng( void ) { return PRNG_NAME( __global_random_state ); } // [0,UINT_MAX]
  • libcfa/src/stdlib.hfa

    rbe1d00c rdd46fd3  
    1010// Created On       : Thu Jan 28 17:12:35 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Tue Nov 22 22:48:59 2022
    13 // Update Count     : 736
     12// Last Modified On : Wed Nov 30 18:18:26 2022
     13// Update Count     : 760
    1414//
    1515
     
    422422        uint32_t callcnt;                                                                       // call count
    423423        uint32_t seed;                                                                          // current seed
    424         PRNG_ARG_T state;                                                                       // random state
     424        PRNG_STATE_32_T state;                                                          // random state
    425425}; // PRNG
    426426
    427427static inline {
    428         void set_seed( PRNG32 & prng, uint32_t seed_ ) with( prng ) { state = seed = seed_; PRNG_NAME( state ); } // set seed
     428        void set_seed( PRNG32 & prng, uint32_t seed_ ) with( prng ) { seed = seed_; PRNG_SET_SEED_32( state, seed ); PRNG_NAME_32( state ); } // set seed
    429429        uint32_t get_seed( PRNG32 & prng ) __attribute__(( warn_unused_result )) with( prng ) { return seed; } // get seed
    430         uint32_t prng( PRNG32 & prng ) __attribute__(( warn_unused_result )) with( prng ) { callcnt += 1; return PRNG_NAME( state ); } // [0,UINT_MAX]
     430        uint32_t prng( PRNG32 & prng ) __attribute__(( warn_unused_result )) with( prng ) { callcnt += 1; return PRNG_NAME_32( state ); } // [0,UINT_MAX]
    431431        uint32_t prng( PRNG32 & prng, size_t u ) __attribute__(( warn_unused_result )) { return prng( prng ) % u; } // [0,u)
    432432        uint32_t prng( PRNG32 & prng, size_t l, size_t u ) __attribute__(( warn_unused_result )) { return prng( prng, u - l + 1 ) + l; } // [l,u]
     
    439439        uint64_t callcnt;                                                                       // call count
    440440        uint64_t seed;                                                                          // current seed
    441         PRNG_ARG_T state;                                                                       // random state
     441        PRNG_STATE_64_T state;                                                          // random state
    442442}; // PRNG
    443443
    444444static inline {
    445         void set_seed( PRNG64 & prng, uint64_t seed_ ) with( prng ) { state = seed = seed_; PRNG_NAME( state ); } // set seed
     445        void set_seed( PRNG64 & prng, uint64_t seed_ ) with( prng ) { seed = seed_; PRNG_SET_SEED_64( state, seed ); PRNG_NAME_64( state ); } // set seed
    446446        uint64_t get_seed( PRNG64 & prng ) __attribute__(( warn_unused_result )) with( prng ) { return seed; } // get seed
    447         uint64_t prng( PRNG64 & prng ) __attribute__(( warn_unused_result )) with( prng ) { callcnt += 1; return PRNG_NAME( state ); } // [0,UINT_MAX]
     447        uint64_t prng( PRNG64 & prng ) __attribute__(( warn_unused_result )) with( prng ) { callcnt += 1; return PRNG_NAME_64( state ); } // [0,UINT_MAX]
    448448        uint64_t prng( PRNG64 & prng, size_t u ) __attribute__(( warn_unused_result )) { return prng( prng ) % u; } // [0,u)
    449449        uint64_t prng( PRNG64 & prng, size_t l, size_t u ) __attribute__(( warn_unused_result )) { return prng( prng, u - l + 1 ) + l; } // [l,u]
  • tests/.expect/nested_function.x64.txt

    rbe1d00c rdd46fd3  
    1 total 80
     1total 75
Note: See TracChangeset for help on using the changeset viewer.