Changeset d800676 for libcfa


Ignore:
Timestamp:
Mar 23, 2023, 12:18:39 PM (2 years ago)
Author:
Peter A. Buhr <pabuhr@…>
Branches:
ADT, ast-experimental, master
Children:
c94b1f0
Parents:
1afd9ccb (diff), 18ea270 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'master' of plg.uwaterloo.ca:software/cfa/cfa-cc

Location:
libcfa/src
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • TabularUnified libcfa/src/Makefile.am

    r1afd9ccb rd800676  
    4848        math.hfa \
    4949        time_t.hfa \
    50     virtual_dtor.hfa \
     50        virtual_dtor.hfa \
    5151        bits/algorithm.hfa \
    5252        bits/align.hfa \
     
    6969        vec/vec2.hfa \
    7070        vec/vec3.hfa \
    71         vec/vec4.hfa 
     71        vec/vec4.hfa
    7272
    7373inst_headers_src = \
  • TabularUnified libcfa/src/bits/random.hfa

    r1afd9ccb rd800676  
    1010// Created On       : Fri Jan 14 07:18:11 2022
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Dec 22 20:54:22 2022
    13 // Update Count     : 178
     12// Last Modified On : Mon Mar 20 21:45:24 2023
     13// Update Count     : 186
    1414//
    1515
     
    2828        #define XOSHIRO256PP
    2929        //#define KISS_64
     30    // #define SPLITMIX_64
    3031
    3132        // 32-bit generators
    3233        //#define XORSHIFT_6_21_7
    3334        #define XOSHIRO128PP
     35    // #define SPLITMIX_32
    3436#else                                                                                                   // 32-bit architecture
    3537        // 64-bit generators
    3638        //#define XORSHIFT_13_7_17
    3739        #define XOSHIRO256PP
     40    // #define SPLITMIX_64
    3841
    3942        // 32-bit generators
    4043        //#define XORSHIFT_6_21_7
    4144        #define XOSHIRO128PP
     45    // #define SPLITMIX_32
    4246#endif // __x86_64__
    4347
    4448// Define C/CFA PRNG name and random-state.
    45 
    46 // SKULLDUGGERY: typedefs name struct and typedef with the same name to deal with CFA typedef numbering problem.
    4749
    4850#ifdef XOSHIRO256PP
    4951#define PRNG_NAME_64 xoshiro256pp
    5052#define PRNG_STATE_64_T GLUE(PRNG_NAME_64,_t)
    51 typedef struct PRNG_STATE_64_T { uint64_t s0, s1, s2, s3; } PRNG_STATE_64_T;
     53typedef struct { uint64_t s0, s1, s2, s3; } PRNG_STATE_64_T;
    5254#endif // XOSHIRO256PP
    5355
     
    5557#define PRNG_NAME_32 xoshiro128pp
    5658#define PRNG_STATE_32_T GLUE(PRNG_NAME_32,_t)
    57 typedef struct PRNG_STATE_32_T { uint32_t s0, s1, s2, s3; } PRNG_STATE_32_T;
     59typedef struct { uint32_t s0, s1, s2, s3; } PRNG_STATE_32_T;
    5860#endif // XOSHIRO128PP
    5961
     
    8385#endif // XORSHIFT_12_25_27
    8486
     87#ifdef SPLITMIX_64
     88#define PRNG_NAME_64 splitmix64
     89#define PRNG_STATE_64_T uint64_t
     90#endif // SPLITMIX32
     91
     92#ifdef SPLITMIX_32
     93#define PRNG_NAME_32 splitmix32
     94#define PRNG_STATE_32_T uint32_t
     95#endif // SPLITMIX32
     96
    8597#ifdef KISS_64
    8698#define PRNG_NAME_64 kiss_64
    8799#define PRNG_STATE_64_T GLUE(PRNG_NAME_64,_t)
    88 typedef struct PRNG_STATE_64_T { uint64_t z, w, jsr, jcong; } PRNG_STATE_64_T;
     100typedef struct { uint64_t z, w, jsr, jcong; } PRNG_STATE_64_T;
    89101#endif // KISS_^64
    90102
     
    92104#define PRNG_NAME_32 xorwow
    93105#define PRNG_STATE_32_T GLUE(PRNG_NAME_32,_t)
    94 typedef struct PRNG_STATE_32_T { uint32_t a, b, c, d, counter; } PRNG_STATE_32_T;
     106typedef struct { uint32_t a, b, c, d, counter; } PRNG_STATE_32_T;
    95107#endif // XOSHIRO128PP
    96108
     
    119131#ifdef __cforall                                                                                // don't include in C code (invoke.h)
    120132
     133// https://rosettacode.org/wiki/Pseudo-random_numbers/Splitmix64
     134//
     135// Splitmix64 is not recommended for demanding random number requirements, but is often used to calculate initial states
     136// for other more complex pseudo-random number generators (see https://prng.di.unimi.it).
     137// Also https://rosettacode.org/wiki/Pseudo-random_numbers/Splitmix64.
     138static inline uint64_t splitmix64( uint64_t & state ) {
     139    state += 0x9e3779b97f4a7c15;
     140    uint64_t z = state;
     141    z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
     142    z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
     143    return z ^ (z >> 31);
     144} // splitmix64
     145
     146static inline void splitmix64_set_seed( uint64_t & state , uint64_t seed ) {
     147    state = seed;
     148    splitmix64( state );                                                                // prime
     149} // splitmix64_set_seed
     150
     151// https://github.com/bryc/code/blob/master/jshash/PRNGs.md#splitmix32
     152//
     153// Splitmix32 is not recommended for demanding random number requirements, but is often used to calculate initial states
     154// for other more complex pseudo-random number generators (see https://prng.di.unimi.it).
     155
     156static inline uint32_t splitmix32( uint32_t & state ) {
     157    state += 0x9e3779b9;
     158    uint64_t z = state;
     159    z = (z ^ (z >> 15)) * 0x85ebca6b;
     160    z = (z ^ (z >> 13)) * 0xc2b2ae35;
     161    return z ^ (z >> 16);
     162} // splitmix32
     163
     164static inline void splitmix32_set_seed( uint32_t & state, uint64_t seed ) {
     165    state = seed;
     166    splitmix32( state );                                                                // prime
     167} // splitmix32_set_seed
     168
     169#ifdef __SIZEOF_INT128__
     170//--------------------------------------------------
     171static inline uint64_t lehmer64( __uint128_t & state ) {
     172        __uint128_t ret = state;
     173        state *= 0x_da94_2042_e4dd_58b5;
     174        return ret >> 64;
     175} // lehmer64
     176
     177static inline void lehmer64_set_seed( __uint128_t & state, uint64_t seed ) {
     178        // The seed needs to be coprime with the 2^64 modulus to get the largest period, so no factors of 2 in the seed.
     179        state = splitmix64( seed );                                                     // prime
     180} // lehmer64_set_seed
     181
     182//--------------------------------------------------
     183static inline uint64_t wyhash64( uint64_t & state ) {
     184        uint64_t ret = state;
     185        state += 0x_60be_e2be_e120_fc15;
     186        __uint128_t tmp;
     187        tmp = (__uint128_t) ret * 0x_a3b1_9535_4a39_b70d;
     188        uint64_t m1 = (tmp >> 64) ^ tmp;
     189        tmp = (__uint128_t)m1 * 0x_1b03_7387_12fa_d5c9;
     190        uint64_t m2 = (tmp >> 64) ^ tmp;
     191        return m2;
     192} // wyhash64
     193
     194static inline void wyhash64_set_seed( uint64_t & state, uint64_t seed ) {
     195        state = splitmix64( seed );                                                     // prime
     196} // wyhash64_set_seed
     197#endif // __SIZEOF_INT128__
     198
    121199// https://prng.di.unimi.it/xoshiro256starstar.c
    122200//
     
    130208
    131209#ifndef XOSHIRO256PP
    132 typedef struct xoshiro256pp_t { uint64_t s0, s1, s2, s3; } xoshiro256pp_t;
     210typedef struct { uint64_t s0, s1, s2, s3; } xoshiro256pp_t;
    133211#endif // ! XOSHIRO256PP
    134212
     
    151229
    152230static inline void xoshiro256pp_set_seed( xoshiro256pp_t & state, uint64_t seed ) {
    153         state = (xoshiro256pp_t){ seed, seed, seed, seed };
    154         xoshiro256pp( state );
     231    // To attain repeatable seeding, compute seeds separately because the order of argument evaluation is undefined.
     232    uint64_t seed1 = splitmix64( seed );                                // prime
     233    uint64_t seed2 = splitmix64( seed );
     234    uint64_t seed3 = splitmix64( seed );
     235    uint64_t seed4 = splitmix64( seed );
     236        state = (xoshiro256pp_t){ seed1, seed2, seed3, seed4 };
    155237} // xoshiro256pp_set_seed
    156238
     
    165247
    166248#ifndef XOSHIRO128PP
    167 typedef struct xoshiro128pp_t { uint32_t s0, s1, s2, s3; } xoshiro128pp_t;
     249typedef struct { uint32_t s0, s1, s2, s3; } xoshiro128pp_t;
    168250#endif // ! XOSHIRO128PP
    169251
     
    186268
    187269static inline void xoshiro128pp_set_seed( xoshiro128pp_t & state, uint32_t seed ) {
    188         state = (xoshiro128pp_t){ seed, seed, seed, seed };
    189         xoshiro128pp( state );                                                          // prime
     270    // To attain repeatable seeding, compute seeds separately because the order of argument evaluation is undefined.
     271    uint32_t seed1 = splitmix32( seed );                                // prime
     272    uint32_t seed2 = splitmix32( seed );
     273    uint32_t seed3 = splitmix32( seed );
     274    uint32_t seed4 = splitmix32( seed );
     275        state = (xoshiro128pp_t){ seed1, seed2, seed3, seed4 };
    190276} // xoshiro128pp_set_seed
    191 
    192 #ifdef __SIZEOF_INT128__
    193         //--------------------------------------------------
    194         static inline uint64_t lehmer64( __uint128_t & state ) {
    195                 __uint128_t ret = state;
    196                 state *= 0x_da94_2042_e4dd_58b5;
    197                 return ret >> 64;
    198         } // lehmer64
    199 
    200         static inline void lehmer64_set_seed( __uint128_t & state, uint64_t seed ) {
    201                 // The seed needs to be coprime with the 2^64 modulus to get the largest period, so no factors of 2 in the seed.
    202                 state = seed;
    203                 lehmer64( state );                                                              // prime
    204         } // lehmer64_set_seed
    205 
    206         //--------------------------------------------------
    207         static inline uint64_t wyhash64( uint64_t & state ) {
    208                 uint64_t ret = state;
    209                 state += 0x_60be_e2be_e120_fc15;
    210                 __uint128_t tmp;
    211                 tmp = (__uint128_t) ret * 0x_a3b1_9535_4a39_b70d;
    212                 uint64_t m1 = (tmp >> 64) ^ tmp;
    213                 tmp = (__uint128_t)m1 * 0x_1b03_7387_12fa_d5c9;
    214                 uint64_t m2 = (tmp >> 64) ^ tmp;
    215                 return m2;
    216         } // wyhash64
    217 
    218         static inline void wyhash64_set_seed( uint64_t & state, uint64_t seed ) {
    219                 state = seed;
    220                 wyhash64( state );                                                              // prime
    221         } // wyhash64_set_seed
    222 #endif // __SIZEOF_INT128__
    223277
    224278//--------------------------------------------------
     
    232286
    233287static inline void xorshift_13_7_17_set_seed( uint64_t & state, uint64_t seed ) {
    234         state = seed;
    235         xorshift_13_7_17( state );                                                      // prime
     288        state = splitmix64( seed );                                                     // prime
    236289} // xorshift_13_7_17_set_seed
    237290
     
    250303
    251304static inline void xorshift_6_21_7_set_seed( uint32_t & state, uint32_t seed ) {
    252         state = seed;
    253         xorshift_6_21_7( state );                                                       // prime
     305    state = splitmix32( seed );                                                 // prime
    254306} // xorshift_6_21_7_set_seed
    255307
     
    265317
    266318static inline void xorshift_12_25_27_set_seed( uint64_t & state, uint64_t seed ) {
    267         state = seed;
    268         xorshift_12_25_27( state );                                                     // prime
     319        state = splitmix64( seed );                                                     // prime
    269320} // xorshift_12_25_27_set_seed
    270321
     
    272323// The state must be seeded with a nonzero value.
    273324#ifndef KISS_64
    274 typedef struct kiss_64_t { uint64_t z, w, jsr, jcong; } kiss_64_t;
     325typedef struct { uint64_t z, w, jsr, jcong; } kiss_64_t;
    275326#endif // ! KISS_64
    276327
     
    287338
    288339static inline void kiss_64_set_seed( kiss_64_t & rs, uint64_t seed ) with(rs) {
    289         z = 1; w = 1; jsr = 4; jcong = seed;
    290         kiss_64( rs );                                                                          // prime
     340        z = 1; w = 1; jsr = 4; jcong = splitmix64( seed );      // prime
    291341} // kiss_64_set_seed
    292342
     
    294344// The state array must be initialized to non-zero in the first four words.
    295345#ifndef XORWOW
    296 typedef struct xorwow_t { uint32_t a, b, c, d, counter; } xorwow_t;
     346typedef struct { uint32_t a, b, c, d, counter; } xorwow_t;
    297347#endif // ! XORWOW
    298348
     
    316366
    317367static inline void xorwow_set_seed( xorwow_t & rs, uint32_t seed ) {
    318         rs = (xorwow_t){ seed, seed, seed, seed, 0 };
    319         xorwow( rs );                                                                           // prime
     368    // To attain repeatable seeding, compute seeds separately because the order of argument evaluation is undefined.
     369    uint32_t seed1 = splitmix32( seed );                                // prime
     370    uint32_t seed2 = splitmix32( seed );
     371    uint32_t seed3 = splitmix32( seed );
     372    uint32_t seed4 = splitmix32( seed );
     373        rs = (xorwow_t){ seed1, seed2, seed3, seed4, 0 };
    320374} // xorwow_set_seed
    321375
     
    323377// Used in __tls_rand_fwd
    324378#define M  (1_l64u << 48_l64u)
    325 #define A  (25214903917_l64u)
    326 #define AI (18446708753438544741_l64u)
     379#define A  (25_214_903_917_l64u)
     380#define AI (18_446_708_753_438_544_741_l64u)
    327381#define C  (11_l64u)
    328382#define D  (16_l64u)
  • TabularUnified libcfa/src/concurrency/channel.hfa

    r1afd9ccb rd800676  
    2828    exp_backoff_then_block_lock c_lock, p_lock;
    2929    __spinlock_t mutex_lock;
     30    char __padding[64]; // avoid false sharing in arrays
    3031};
    3132
  • TabularUnified libcfa/src/concurrency/mutex_stmt.hfa

    r1afd9ccb rd800676  
    2727    // Sort locks based on address
    2828    __libcfa_small_sort(this.lockarr, count);
    29 
    30     // acquire locks in order
    31     // for ( size_t i = 0; i < count; i++ ) {
    32     //     lock(*this.lockarr[i]);
    33     // }
    34 }
    35 
    36 static inline void ^?{}( __mutex_stmt_lock_guard & this ) with(this) {
    37     // for ( size_t i = count; i > 0; i-- ) {
    38     //     unlock(*lockarr[i - 1]);
    39     // }
    4029}
    4130
  • TabularUnified libcfa/src/containers/list.hfa

    r1afd9ccb rd800676  
    3232static inline tytagref(void, T) ?`inner ( T & this ) { tytagref( void, T ) ret = {this}; return ret; }
    3333
    34 // use this on every case of plan-9 inheritance, to make embedded a closure of plan-9 inheritance
    35 #define P9_EMBEDDED( derived, immedBase ) \
    36 forall( Tbase &, TdiscardPath & | { tytagref( TdiscardPath, Tbase ) ?`inner( immedBase & ); } ) \
    37     static inline tytagref(immedBase, Tbase) ?`inner( derived & this ) { \
     34
     35//
     36// P9_EMBEDDED: Use on every case of plan-9 inheritance, to make "implements embedded" be a closure of plan-9 inheritance.
     37//
     38// struct foo {
     39//    int a, b, c;
     40//    inline (bar);
     41// };
     42// P9_EMBEDDED( foo, bar )
     43//
     44
     45// usual version, for structs that are top-level declarations
     46#define P9_EMBEDDED(        derived, immedBase ) P9_EMBEDDED_DECL_( derived, immedBase, static ) P9_EMBEDDED_BDY_( immedBase )
     47
     48// special version, for structs that are declared in functions
     49#define P9_EMBEDDED_INFUNC( derived, immedBase ) P9_EMBEDDED_DECL_( derived, immedBase,        ) P9_EMBEDDED_BDY_( immedBase )
     50
     51// forward declarations of both the above; generally not needed
     52// may help you control where the P9_EMBEEDED cruft goes, in case "right after the stuct" isn't where you want it
     53#define P9_EMBEDDED_FWD(        derived, immedBase )      P9_EMBEDDED_DECL_( derived, immedBase, static ) ;
     54#define P9_EMBEDDED_FWD_INFUNC( derived, immedBase ) auto P9_EMBEDDED_DECL_( derived, immedBase,        ) ;
     55
     56// private helpers
     57#define P9_EMBEDDED_DECL_( derived, immedBase, STORAGE ) \
     58    forall( Tbase &, TdiscardPath & | { tytagref( TdiscardPath, Tbase ) ?`inner( immedBase & ); } ) \
     59    STORAGE inline tytagref(immedBase, Tbase) ?`inner( derived & this )
     60   
     61#define P9_EMBEDDED_BDY_( immedBase ) { \
    3862        immedBase & ib = this; \
    3963        Tbase & b = ib`inner; \
Note: See TracChangeset for help on using the changeset viewer.