Changeset 42daeb4 for libcfa/src


Ignore:
Timestamp:
Jan 13, 2022, 10:53:25 AM (4 years ago)
Author:
caparsons <caparson@…>
Branches:
ADT, ast-experimental, enum, forall-pointer-decay, master, pthread-emulation, qualifiedEnum
Children:
3eaa689
Parents:
3bb12921 (diff), 00f5fde (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'master' of plg.uwaterloo.ca:software/cfa/cfa-cc

Location:
libcfa/src
Files:
14 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/bits/locks.hfa

    r3bb12921 r42daeb4  
    3131                // previous thread to acquire the lock
    3232                void* prev_thrd;
     33                // keep track of number of times we had to spin, just in case the number is unexpectedly huge
     34                size_t spin_count;
    3335        #endif
    3436};
     
    4850        static inline void ?{}( __spinlock_t & this ) {
    4951                this.lock = 0;
     52                #ifdef __CFA_DEBUG__
     53                        this.spin_count = 0;
     54                #endif
    5055        }
    5156
     
    7277                for ( unsigned int i = 1;; i += 1 ) {
    7378                        if ( (this.lock == 0) && (__atomic_test_and_set( &this.lock, __ATOMIC_ACQUIRE ) == 0) ) break;
     79                        #ifdef __CFA_DEBUG__
     80                                this.spin_count++;
     81                        #endif
    7482                        #ifndef NOEXPBACK
    7583                                // exponential spin
  • libcfa/src/concurrency/invoke.h

    r3bb12921 r42daeb4  
    1010// Created On       : Tue Jan 17 12:27:26 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Jan  6 16:37:40 2022
    13 // Update Count     : 47
     12// Last Modified On : Sun Jan  9 19:06:45 2022
     13// Update Count     : 48
    1414//
    1515
     
    211211                struct processor * last_proc;
    212212
     213                uint32_t random_state;                                                  // fast random numbers
     214
    213215                #if defined( __CFA_WITH_VERIFY__ )
    214216                        void * canary;
  • libcfa/src/concurrency/io.cfa

    r3bb12921 r42daeb4  
    548548                        /* paranoid */ verify( proc == __cfaabi_tls.this_processor );
    549549                        /* paranoid */ verify( ! __preemption_enabled() );
     550
     551                        return true;
    550552                }
    551553        #endif
  • libcfa/src/concurrency/kernel.cfa

    r3bb12921 r42daeb4  
    554554        /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd->canary );
    555555
    556         const bool local = thrd->state != Start;
    557556        if (thrd->preempted == __NO_PREEMPTION) thrd->state = Ready;
    558557
     
    737736
    738737        // Check if there is a sleeping processor
    739         int fd = __atomic_load_n(&this->procs.fd, __ATOMIC_SEQ_CST);
     738        // int fd = __atomic_load_n(&this->procs.fd, __ATOMIC_SEQ_CST);
     739        int fd = 0;
     740        if( __atomic_load_n(&this->procs.fd, __ATOMIC_SEQ_CST) != 0 ) {
     741                fd = __atomic_exchange_n(&this->procs.fd, 0, __ATOMIC_RELAXED);
     742        }
    740743
    741744        // If no one is sleeping, we are done
  • libcfa/src/concurrency/kernel/fwd.hfa

    r3bb12921 r42daeb4  
    7777
    7878                static inline uint64_t __tls_rand() {
     79                        return
    7980                        #if defined(__SIZEOF_INT128__)
    80                                 return __lehmer64( kernelTLS().rand_seed );
     81                                __lehmer64( kernelTLS().rand_seed );
    8182                        #else
    82                                 return __xorshift64( kernelTLS().rand_seed );
     83                                __xorshift64( kernelTLS().rand_seed );
    8384                        #endif
    8485                }
     
    9192
    9293                static inline unsigned __tls_rand_fwd() {
    93 
    9494                        kernelTLS().ready_rng.fwd_seed = (A * kernelTLS().ready_rng.fwd_seed + C) & (M - 1);
    9595                        return kernelTLS().ready_rng.fwd_seed >> D;
     
    112112                }
    113113        }
    114 
    115 
    116114
    117115        extern void disable_interrupts();
  • libcfa/src/concurrency/kernel/startup.cfa

    r3bb12921 r42daeb4  
    101101extern void __wake_proc(processor *);
    102102extern int cfa_main_returned;                                                   // from interpose.cfa
     103extern uint32_t __global_random_seed;
    103104
    104105//-----------------------------------------------------------------------------
     
    489490        preferred = ready_queue_new_preferred();
    490491        last_proc = 0p;
     492        random_state = __global_random_seed;
    491493        #if defined( __CFA_WITH_VERIFY__ )
    492494                canary = 0x0D15EA5E0D15EA5Ep;
  • libcfa/src/concurrency/ready_queue.cfa

    r3bb12921 r42daeb4  
    681681        // Actually pop the list
    682682        struct thread$ * thrd;
    683         unsigned long long tsc_before = ts(lane);
     683        #if defined(USE_WORK_STEALING) || defined(USE_CPU_WORK_STEALING)
     684                unsigned long long tsc_before = ts(lane);
     685        #endif
    684686        unsigned long long tsv;
    685687        [thrd, tsv] = pop(lane);
  • libcfa/src/concurrency/thread.cfa

    r3bb12921 r42daeb4  
    1010// Created On       : Tue Jan 17 12:27:26 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Wed Dec  4 09:17:49 2019
    13 // Update Count     : 9
     12// Last Modified On : Wed Jan 12 18:46:48 2022
     13// Update Count     : 36
    1414//
    1515
     
    2727uint64_t thread_rand();
    2828
     29extern uint32_t __global_random_seed;
     30
    2931//-----------------------------------------------------------------------------
    3032// Thread ctors and dtors
    31 void ?{}(thread$ & this, const char * const name, cluster & cl, void * storage, size_t storageSize ) with( this ) {
     33void ?{}( thread$ & this, const char * const name, cluster & cl, void * storage, size_t storageSize ) with( this ) {
    3234        context{ 0p, 0p };
    3335        self_cor{ name, storage, storageSize };
     
    4547        preferred = ready_queue_new_preferred();
    4648        last_proc = 0p;
     49        random_state = __global_random_seed;
    4750        #if defined( __CFA_WITH_VERIFY__ )
    4851                canary = 0x0D15EA5E0D15EA5Ep;
     
    177180        return ret;
    178181}
     182 
     183#define GENERATOR LCG
     184
     185static inline uint32_t MarsagliaXor( uint32_t & state ) {
     186        uint32_t ret = state;
     187        state ^= state << 6;
     188        state ^= state >> 21;
     189        state ^= state << 7;
     190        return ret;
     191} // MarsagliaXor
     192
     193static inline uint32_t LCG( uint32_t & state ) {                // linear congruential generator
     194        uint32_t ret = state;
     195        state = 36969 * (state & 65535) + (state >> 16);        // 36969 is NOT prime! No not change it!
     196        return ret;
     197} // LCG
     198
     199void set_seed( uint32_t seed ) {
     200        active_thread()->random_state = __global_random_seed = seed;
     201        GENERATOR( active_thread()->random_state );
     202} // set_seed
     203uint32_t prng( void ) { return GENERATOR( active_thread()->random_state ); } // [0,UINT_MAX]
    179204
    180205// Local Variables: //
  • libcfa/src/device/cpu.cfa

    r3bb12921 r42daeb4  
    427427                        unsigned c = pairings[i].cpu;
    428428                        unsigned llc_id = pairings[i].id;
    429                         unsigned width = maps[llc_id].raw->width;
    430429                        unsigned start = maps[llc_id].start;
    431                         unsigned self  = start + (maps[llc_id].count++);
    432                         entries[c].count = width;
     430                        entries[c].count = maps[llc_id].raw->width;
    433431                        entries[c].start = start;
    434                         entries[c].self  = self;
     432                        entries[c].self  = start + (maps[llc_id].count++);
     433                        entries[c].cache = llc_id;
    435434                }
    436435
  • libcfa/src/device/cpu.hfa

    r3bb12921 r42daeb4  
    1616#include <stddef.h>
    1717
     18// Map from cpu entry to a structure detailling cpus with common topologies
     19// Note that the cpu-groups are contiguous so the indexing is different from
     20// the cpu indexing
    1821struct cpu_map_entry_t {
     22        // Where this particular cpu is in the group
    1923        unsigned self;
     24
     25        // Starting index of the cpus with the same topology
    2026        unsigned start;
     27
     28        // Number of cpus with the same topology
    2129        unsigned count;
     30
     31        // Index of the cache this entry describes
     32        unsigned cache;
    2233};
    2334
  • libcfa/src/fstream.cfa

    r3bb12921 r42daeb4  
    1010// Created On       : Wed May 27 17:56:53 2015
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Sun Oct 10 11:23:05 2021
    13 // Update Count     : 512
     12// Last Modified On : Mon Jan 10 08:45:05 2022
     13// Update Count     : 513
    1414//
    1515
     
    5252inline void setPrt$( ofstream & os, bool state ) { os.prt$ = state; }
    5353
    54 inline void lock( ofstream & os ) with( os ) {  lock( os.lock$ ); }
     54inline void lock( ofstream & os ) with( os ) { lock( os.lock$ ); }
    5555inline void unlock( ofstream & os ) { unlock( os.lock$ ); }
    5656
  • libcfa/src/startup.cfa

    r3bb12921 r42daeb4  
    1010// Created On       : Tue Jul 24 16:21:57 2018
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Sat Jan  9 23:18:23 2021
    13 // Update Count     : 34
     12// Last Modified On : Wed Jan 12 18:51:24 2022
     13// Update Count     : 51
    1414//
    1515
     
    1818#include <stdlib.h>                                                                             // getenv
    1919#include "startup.hfa"
     20#include "bits/defs.hfa"
     21
     22extern uint32_t __global_random_seed, __global_random_state;
    2023
    2124extern "C" {
     
    4851        void __cfaabi_core_startup( void ) __attribute__(( constructor( STARTUP_PRIORITY_CORE ) ));
    4952        void __cfaabi_core_startup( void ) {
     53                __global_random_state = __global_random_seed = rdtscl();
    5054                __cfaabi_interpose_startup();
    5155                __cfaabi_device_startup();
  • libcfa/src/stdlib.cfa

    r3bb12921 r42daeb4  
    1010// Created On       : Thu Jan 28 17:10:29 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Mon Jan  3 09:36:27 2022
    13 // Update Count     : 519
     12// Last Modified On : Wed Jan 12 18:52:41 2022
     13// Update Count     : 582
    1414//
    1515
    1616#include "stdlib.hfa"
     17//#include "concurrency/kernel/fwd.hfa"
     18#include "concurrency/invoke.h"                                                 // random_state
    1719
    1820//---------------------------------------
     
    221223//---------------------------------------
    222224
    223 static uint32_t seed = 0;                                                               // current seed
    224 static thread_local uint32_t state;                                             // random state
    225 
    226 void set_seed( uint32_t seed_ ) { state = seed = seed_; }
    227 uint32_t get_seed() { return seed; }
     225// Pipelined to allow OoO overlap with reduced dependencies. Critically, return the current value, and compute and store
     226// the next value.
    228227
    229228#define GENERATOR LCG
    230229
    231 inline uint32_t MarsagliaXor( uint32_t & state ) {
    232         if ( unlikely( seed == 0 ) ) set_seed( rdtscl() );
    233         else if ( unlikely( state == 0 ) ) state = seed;
     230static inline uint32_t MarsagliaXor( uint32_t & state ) {
     231        uint32_t ret = state;
    234232        state ^= state << 6;
    235233        state ^= state >> 21;
    236234        state ^= state << 7;
    237         return state;
     235        return ret;
    238236} // MarsagliaXor
    239237
    240 inline uint32_t LCG( uint32_t & state ) {                               // linear congruential generator
    241         if ( unlikely( seed == 0 ) ) set_seed( rdtscl() );
    242         else if ( unlikely( state == 0 ) ) state = seed;
    243         return state = 36969 * (state & 65535) + (state >> 16); // 36969 is NOT prime!
     238static inline uint32_t LCG( uint32_t & state ) {                // linear congruential generator
     239        uint32_t ret = state;
     240        state = 36969 * (state & 65535) + (state >> 16);        // 36969 is NOT prime! No not change it!
     241        return ret;
    244242} // LCG
    245243
     244uint32_t __global_random_seed;                                                  // sequential/concurrent
     245uint32_t __global_random_state;                                                 // sequential only
     246
     247void set_seed( PRNG & prng, uint32_t seed_ ) with( prng ) { state = seed = seed_; GENERATOR( state ); } // set seed
    246248uint32_t prng( PRNG & prng ) with( prng ) { callcnt += 1; return GENERATOR( state ); }
    247249
    248 uint32_t prng( void ) { return GENERATOR( state ); }
     250void set_seed( uint32_t seed ) { __global_random_seed = seed; GENERATOR( __global_random_state ); }
     251uint32_t get_seed() { return __global_random_seed; }
     252uint32_t prng( void ) { return GENERATOR( __global_random_state ); } // [0,UINT_MAX]
    249253
    250254//---------------------------------------
  • libcfa/src/stdlib.hfa

    r3bb12921 r42daeb4  
    1010// Created On       : Thu Jan 28 17:12:35 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Sun Jan  2 22:53:57 2022
    13 // Update Count     : 594
     12// Last Modified On : Wed Jan 12 18:56:13 2022
     13// Update Count     : 621
    1414//
    1515
     
    2121#include <stdlib.h>                                                                             // *alloc, strto*, ato*
    2222#include <heap.hfa>
     23
    2324
    2425// Reduce includes by explicitly defining these routines.
     
    385386//---------------------------------------
    386387
     388// Sequential Pseudo Random-Number Generator : generate repeatable sequence of values that appear random.
     389//
     390// Declaration :
     391//   PRNG sprng = { 1009 } - set starting seed versus random seed
     392//   
     393// Interface :
     394//   set_seed( sprng, 1009 ) - set starting seed for ALL kernel threads versus random seed
     395//   get_seed( sprng ) - read seed
     396//   prng( sprng ) - generate random value in range [0,UINT_MAX]
     397//   prng( sprng, u ) - generate random value in range [0,u)
     398//   prng( sprng, l, u ) - generate random value in range [l,u]
     399//   calls( sprng ) - number of generated random value so far
     400//
     401// Examples : generate random number between 5-21
     402//   prng( sprng ) % 17 + 5;    values 0-16 + 5 = 5-21
     403//   prng( sprng, 16 + 1 ) + 5;
     404//   prng( sprng, 5, 21 );
     405//   calls( sprng );
     406
    387407struct PRNG {
    388408        uint32_t callcnt;                                                                       // call count
     
    391411}; // PRNG
    392412
    393 extern uint32_t prng( PRNG & prng ) __attribute__(( warn_unused_result )); // [0,UINT_MAX]
     413void set_seed( PRNG & prng, uint32_t seed_ );
     414uint32_t prng( PRNG & prng ) __attribute__(( warn_unused_result )); // [0,UINT_MAX]
    394415static inline {
    395         void set_seed( PRNG & prng, uint32_t seed_ ) with( prng ) { state = seed = seed_; } // set seed
    396416        void ?{}( PRNG & prng ) { set_seed( prng, rdtscl() ); } // random seed
    397417        void ?{}( PRNG & prng, uint32_t seed ) { set_seed( prng, seed ); } // fixed seed
     
    402422} // distribution
    403423
    404 extern void set_seed( uint32_t seed );                                  // set per thread seed
    405 extern uint32_t get_seed();                                                             // get seed
    406 extern uint32_t prng( void ) __attribute__(( warn_unused_result )); // [0,UINT_MAX]
     424// Concurrent Pseudo Random-Number Generator : generate repeatable sequence of values that appear random.
     425//
     426// Interface :
     427//   set_seed( 1009 ) - fixed seed for all kernel threads versus random seed
     428//   get_seed() - read seed
     429//   prng() - generate random value in range [0,UINT_MAX]
     430//   prng( u ) - generate random value in range [0,u)
     431//   prng( l, u ) - generate random value in range [l,u]
     432//
     433// Examples : generate random number between 5-21
     434//   prng() % 17 + 5;   values 0-16 + 5 = 5-21
     435//   prng( 16 + 1 ) + 5;
     436//   prng( 5, 21 );
     437
     438void set_seed( uint32_t seed_ ) OPTIONAL_THREAD;
     439uint32_t get_seed() __attribute__(( warn_unused_result ));
     440uint32_t prng( void ) __attribute__(( warn_unused_result )) OPTIONAL_THREAD; // [0,UINT_MAX]
    407441static inline {
    408         uint32_t prng( uint32_t u ) __attribute__(( warn_unused_result ));
    409         uint32_t prng( uint32_t u ) { return prng() % u; }      // [0,u)
    410         uint32_t prng( uint32_t l, uint32_t u ) __attribute__(( warn_unused_result ));
    411         uint32_t prng( uint32_t l, uint32_t u ) { return prng( u - l + 1 ) + l; } // [l,u]
     442        uint32_t prng( uint32_t u ) __attribute__(( warn_unused_result )) { return prng() % u; } // [0,u)
     443        uint32_t prng( uint32_t l, uint32_t u ) __attribute__(( warn_unused_result )) { return prng( u - l + 1 ) + l; } // [l,u]
    412444} // distribution
    413445
Note: See TracChangeset for help on using the changeset viewer.