Changeset 8cfa4ef


Ignore:
Timestamp:
Apr 15, 2021, 12:05:16 PM (3 years ago)
Author:
Peter A. Buhr <pabuhr@…>
Branches:
ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast-unique-expr, pthread-emulation, qualifiedEnum
Children:
8590328
Parents:
2f5ea69 (diff), a4b0aa4 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'master' of plg.uwaterloo.ca:software/cfa/cfa-cc

Location:
libcfa/src/concurrency
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/concurrency/kernel.cfa

    r2f5ea69 r8cfa4ef  
    113113static void __wake_one(cluster * cltr);
    114114
    115 static void push  (__cluster_idles & idles, processor & proc);
    116 static void remove(__cluster_idles & idles, processor & proc);
    117 static [unsigned idle, unsigned total, * processor] query( & __cluster_idles idles );
     115static void mark_idle (__cluster_proc_list & idles, processor & proc);
     116static void mark_awake(__cluster_proc_list & idles, processor & proc);
     117static [unsigned idle, unsigned total, * processor] query_idles( & __cluster_proc_list idles );
    118118
    119119extern void __cfa_io_start( processor * );
     
    189189
    190190                                // Push self to idle stack
    191                                 push(this->cltr->idles, * this);
     191                                mark_idle(this->cltr->procs, * this);
    192192
    193193                                // Confirm the ready-queue is empty
     
    195195                                if( readyThread ) {
    196196                                        // A thread was found, cancel the halt
    197                                         remove(this->cltr->idles, * this);
     197                                        mark_awake(this->cltr->procs, * this);
    198198
    199199                                        #if !defined(__CFA_NO_STATISTICS__)
     
    225225
    226226                                // We were woken up, remove self from idle
    227                                 remove(this->cltr->idles, * this);
     227                                mark_awake(this->cltr->procs, * this);
    228228
    229229                                // DON'T just proceed, start looking again
     
    617617        unsigned idle;
    618618        unsigned total;
    619         [idle, total, p] = query(this->idles);
     619        [idle, total, p] = query_idles(this->procs);
    620620
    621621        // If no one is sleeping, we are done
     
    654654}
    655655
    656 static void push  (__cluster_idles & this, processor & proc) {
     656static void mark_idle(__cluster_proc_list & this, processor & proc) {
    657657        /* paranoid */ verify( ! __preemption_enabled() );
    658658        lock( this );
    659659                this.idle++;
    660660                /* paranoid */ verify( this.idle <= this.total );
    661 
    662                 insert_first(this.list, proc);
     661                remove(proc);
     662                insert_first(this.idles, proc);
    663663        unlock( this );
    664664        /* paranoid */ verify( ! __preemption_enabled() );
    665665}
    666666
    667 static void remove(__cluster_idles & this, processor & proc) {
     667static void mark_awake(__cluster_proc_list & this, processor & proc) {
    668668        /* paranoid */ verify( ! __preemption_enabled() );
    669669        lock( this );
    670670                this.idle--;
    671671                /* paranoid */ verify( this.idle >= 0 );
    672 
    673672                remove(proc);
     673                insert_last(this.actives, proc);
    674674        unlock( this );
    675675        /* paranoid */ verify( ! __preemption_enabled() );
    676676}
    677677
    678 static [unsigned idle, unsigned total, * processor] query( & __cluster_idles this ) {
     678static [unsigned idle, unsigned total, * processor] query_idles( & __cluster_proc_list this ) {
     679        /* paranoid */ verify( ! __preemption_enabled() );
     680        /* paranoid */ verify( ready_schedule_islocked() );
     681
    679682        for() {
    680683                uint64_t l = __atomic_load_n(&this.lock, __ATOMIC_SEQ_CST);
     
    682685                unsigned idle    = this.idle;
    683686                unsigned total   = this.total;
    684                 processor * proc = &this.list`first;
     687                processor * proc = &this.idles`first;
    685688                // Compiler fence is unnecessary, but gcc-8 and older incorrectly reorder code without it
    686689                asm volatile("": : :"memory");
     
    688691                return [idle, total, proc];
    689692        }
     693
     694        /* paranoid */ verify( ready_schedule_islocked() );
     695        /* paranoid */ verify( ! __preemption_enabled() );
    690696}
    691697
  • libcfa/src/concurrency/kernel.hfa

    r2f5ea69 r8cfa4ef  
    180180
    181181// Idle Sleep
    182 struct __cluster_idles {
     182struct __cluster_proc_list {
    183183        // Spin lock protecting the queue
    184184        volatile uint64_t lock;
     
    191191
    192192        // List of idle processors
    193         dlist(processor, processor) list;
     193        dlist(processor, processor) idles;
     194
     195        // List of active processors
     196        dlist(processor, processor) actives;
    194197};
    195198
     
    207210
    208211        // List of idle processors
    209         __cluster_idles idles;
     212        __cluster_proc_list procs;
    210213
    211214        // List of threads
  • libcfa/src/concurrency/kernel/startup.cfa

    r2f5ea69 r8cfa4ef  
    469469        this.name = name;
    470470        this.cltr = &_cltr;
     471        this.cltr_id = -1u;
    471472        do_terminate = false;
    472473        preemption_alarm = 0p;
     
    489490        #endif
    490491
    491         lock( this.cltr->idles );
    492                 int target = this.cltr->idles.total += 1u;
    493         unlock( this.cltr->idles );
    494 
    495         id = doregister((__processor_id_t*)&this);
    496 
     492        // Register and Lock the RWlock so no-one pushes/pops while we are changing the queue
     493        uint_fast32_t last_size = ready_mutate_register((__processor_id_t*)&this);
     494                this.cltr->procs.total += 1u;
     495                insert_last(this.cltr->procs.actives, this);
     496
     497                // Adjust the ready queue size
     498                ready_queue_grow( cltr );
     499
     500        // Unlock the RWlock
     501        ready_mutate_unlock( last_size );
     502
     503        __cfadbg_print_safe(runtime_core, "Kernel : core %p created\n", &this);
     504}
     505
     506// Not a ctor, it just preps the destruction but should not destroy members
     507static void deinit(processor & this) {
    497508        // Lock the RWlock so no-one pushes/pops while we are changing the queue
    498509        uint_fast32_t last_size = ready_mutate_lock();
     510                this.cltr->procs.total -= 1u;
     511                remove(this);
    499512
    500513                // Adjust the ready queue size
    501                 this.cltr_id = ready_queue_grow( cltr, target );
    502 
    503         // Unlock the RWlock
    504         ready_mutate_unlock( last_size );
    505 
    506         __cfadbg_print_safe(runtime_core, "Kernel : core %p created\n", &this);
    507 }
    508 
    509 // Not a ctor, it just preps the destruction but should not destroy members
    510 static void deinit(processor & this) {
    511         lock( this.cltr->idles );
    512                 int target = this.cltr->idles.total -= 1u;
    513         unlock( this.cltr->idles );
    514 
    515         // Lock the RWlock so no-one pushes/pops while we are changing the queue
    516         uint_fast32_t last_size = ready_mutate_lock();
    517 
    518                 // Adjust the ready queue size
    519                 ready_queue_shrink( this.cltr, target );
    520 
    521         // Unlock the RWlock
    522         ready_mutate_unlock( last_size );
    523 
    524         // Finally we don't need the read_lock any more
    525         unregister((__processor_id_t*)&this);
     514                ready_queue_shrink( this.cltr );
     515
     516        // Unlock the RWlock and unregister: we don't need the read_lock any more
     517        ready_mutate_unregister((__processor_id_t*)&this, last_size );
    526518
    527519        close(this.idle);
     
    566558//-----------------------------------------------------------------------------
    567559// Cluster
    568 static void ?{}(__cluster_idles & this) {
     560static void ?{}(__cluster_proc_list & this) {
    569561        this.lock  = 0;
    570562        this.idle  = 0;
    571563        this.total = 0;
    572         (this.list){};
    573564}
    574565
     
    596587
    597588                // Adjust the ready queue size
    598                 ready_queue_grow( &this, 0 );
     589                ready_queue_grow( &this );
    599590
    600591        // Unlock the RWlock
     
    611602
    612603                // Adjust the ready queue size
    613                 ready_queue_shrink( &this, 0 );
     604                ready_queue_shrink( &this );
    614605
    615606        // Unlock the RWlock
  • libcfa/src/concurrency/kernel_private.hfa

    r2f5ea69 r8cfa4ef  
    8383// Cluster lock API
    8484//=======================================================================
    85 // Cells use by the reader writer lock
    86 // while not generic it only relies on a opaque pointer
    87 struct __attribute__((aligned(128))) __scheduler_lock_id_t {
    88         // Spin lock used as the underlying lock
    89         volatile bool lock;
    90 
    91         // Handle pointing to the proc owning this cell
    92         // Used for allocating cells and debugging
    93         __processor_id_t * volatile handle;
    94 
    95         #ifdef __CFA_WITH_VERIFY__
    96                 // Debug, check if this is owned for reading
    97                 bool owned;
    98         #endif
    99 };
    100 
    101 static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t));
    102 
    10385// Lock-Free registering/unregistering of threads
    10486// Register a processor to a given cluster and get its unique id in return
    105 unsigned doregister( struct __processor_id_t * proc );
     87void register_proc_id( struct __processor_id_t * );
    10688
    10789// Unregister a processor from a given cluster using its id, getting back the original pointer
    108 void     unregister( struct __processor_id_t * proc );
    109 
    110 //-----------------------------------------------------------------------
    111 // Cluster idle lock/unlock
    112 static inline void lock(__cluster_idles & this) {
    113         for() {
    114                 uint64_t l = this.lock;
    115                 if(
    116                         (0 == (l % 2))
    117                         && __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
    118                 ) return;
    119                 Pause();
    120         }
    121 }
    122 
    123 static inline void unlock(__cluster_idles & this) {
    124         /* paranoid */ verify( 1 == (this.lock % 2) );
    125         __atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST );
    126 }
     90void unregister_proc_id( struct __processor_id_t * proc );
    12791
    12892//=======================================================================
     
    152116        __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
    153117}
     118
     119// Cells use by the reader writer lock
     120// while not generic it only relies on a opaque pointer
     121struct __attribute__((aligned(128))) __scheduler_lock_id_t {
     122        // Spin lock used as the underlying lock
     123        volatile bool lock;
     124
     125        // Handle pointing to the proc owning this cell
     126        // Used for allocating cells and debugging
     127        __processor_id_t * volatile handle;
     128
     129        #ifdef __CFA_WITH_VERIFY__
     130                // Debug, check if this is owned for reading
     131                bool owned;
     132        #endif
     133};
     134
     135static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t));
    154136
    155137//-----------------------------------------------------------------------
     
    247229void ready_mutate_unlock( uint_fast32_t /* value returned by lock */ );
    248230
     231//-----------------------------------------------------------------------
     232// Lock-Free registering/unregistering of threads
     233// Register a processor to a given cluster and get its unique id in return
     234// For convenience, also acquires the lock
     235static inline uint_fast32_t ready_mutate_register( struct __processor_id_t * proc ) {
     236        register_proc_id( proc );
     237        return ready_mutate_lock();
     238}
     239
     240// Unregister a processor from a given cluster using its id, getting back the original pointer
     241// assumes the lock is acquired
     242static inline void ready_mutate_unregister( struct __processor_id_t * proc, uint_fast32_t last_s ) {
     243        ready_mutate_unlock( last_s );
     244        unregister_proc_id( proc );
     245}
     246
     247//-----------------------------------------------------------------------
     248// Cluster idle lock/unlock
     249static inline void lock(__cluster_proc_list & this) {
     250        /* paranoid */ verify( ! __preemption_enabled() );
     251
     252        // Start by locking the global RWlock so that we know no-one is
     253        // adding/removing processors while we mess with the idle lock
     254        ready_schedule_lock();
     255
     256        // Simple counting lock, acquired, acquired by incrementing the counter
     257        // to an odd number
     258        for() {
     259                uint64_t l = this.lock;
     260                if(
     261                        (0 == (l % 2))
     262                        && __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
     263                ) return;
     264                Pause();
     265        }
     266
     267        /* paranoid */ verify( ! __preemption_enabled() );
     268}
     269
     270static inline void unlock(__cluster_proc_list & this) {
     271        /* paranoid */ verify( ! __preemption_enabled() );
     272
     273        /* paranoid */ verify( 1 == (this.lock % 2) );
     274        // Simple couting lock, release by incrementing to an even number
     275        __atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST );
     276
     277        // Release the global lock, which we acquired when locking
     278        ready_schedule_unlock();
     279
     280        /* paranoid */ verify( ! __preemption_enabled() );
     281}
     282
    249283//=======================================================================
    250284// Ready-Queue API
     
    278312//-----------------------------------------------------------------------
    279313// Increase the width of the ready queue (number of lanes) by 4
    280 unsigned ready_queue_grow  (struct cluster * cltr, int target);
     314void ready_queue_grow  (struct cluster * cltr);
    281315
    282316//-----------------------------------------------------------------------
    283317// Decrease the width of the ready queue (number of lanes) by 4
    284 void ready_queue_shrink(struct cluster * cltr, int target);
     318void ready_queue_shrink(struct cluster * cltr);
    285319
    286320
  • libcfa/src/concurrency/preemption.cfa

    r2f5ea69 r8cfa4ef  
    712712static void * alarm_loop( __attribute__((unused)) void * args ) {
    713713        __processor_id_t id;
    714         id.id = doregister(&id);
     714        register_proc_id(&id);
    715715        __cfaabi_tls.this_proc_id = &id;
    716716
     
    773773EXIT:
    774774        __cfaabi_dbg_print_safe( "Kernel : Preemption thread stopping\n" );
    775         unregister(&id);
     775        register_proc_id(&id);
    776776
    777777        return 0p;
  • libcfa/src/concurrency/ready_queue.cfa

    r2f5ea69 r8cfa4ef  
    9494//=======================================================================
    9595// Lock-Free registering/unregistering of threads
    96 unsigned doregister( struct __processor_id_t * proc ) with(*__scheduler_lock) {
     96void register_proc_id( struct __processor_id_t * proc ) with(*__scheduler_lock) {
    9797        __cfadbg_print_safe(ready_queue, "Kernel : Registering proc %p for RW-Lock\n", proc);
    9898
     
    108108                        /*paranoid*/ verify(0 == (__alignof__(data[i]) % cache_line_size));
    109109                        /*paranoid*/ verify((((uintptr_t)&data[i]) % cache_line_size) == 0);
    110                         return i;
     110                        proc->id = i;
    111111                }
    112112        }
     
    135135        /*paranoid*/ verify(__alignof__(data[n]) == (2 * cache_line_size));
    136136        /*paranoid*/ verify((((uintptr_t)&data[n]) % cache_line_size) == 0);
    137         return n;
    138 }
    139 
    140 void unregister( struct __processor_id_t * proc ) with(*__scheduler_lock) {
     137        proc->id = n;
     138}
     139
     140void unregister_proc_id( struct __processor_id_t * proc ) with(*__scheduler_lock) {
    141141        unsigned id = proc->id;
    142142        /*paranoid*/ verify(id < ready);
     
    254254        __attribute__((unused)) int preferred;
    255255        #if defined(BIAS)
     256                /* paranoid */ verify(external || kernelTLS().this_processor->cltr_id < lanes.count );
    256257                preferred =
    257258                        //*
     
    344345        int preferred;
    345346        #if defined(BIAS)
    346                 // Don't bother trying locally too much
     347                /* paranoid */ verify(kernelTLS().this_processor->cltr_id < lanes.count );
    347348                preferred = kernelTLS().this_processor->cltr_id;
    348349        #endif
     
    541542}
    542543
     544static void assign_list(unsigned & value, const int inc, dlist(processor, processor) & list, unsigned count) {
     545        processor * it = &list`first;
     546        for(unsigned i = 0; i < count; i++) {
     547                /* paranoid */ verifyf( it, "Unexpected null iterator, at index %u of %u\n", i, count);
     548                it->cltr_id = value;
     549                value += inc;
     550                it = &(*it)`next;
     551        }
     552}
     553
     554static void reassign_cltr_id(struct cluster * cltr, const int inc) {
     555        unsigned preferred = 0;
     556        assign_list(preferred, inc, cltr->procs.actives, cltr->procs.total - cltr->procs.idle);
     557        assign_list(preferred, inc, cltr->procs.idles  , cltr->procs.idle );
     558}
     559
    543560// Grow the ready queue
    544 unsigned ready_queue_grow(struct cluster * cltr, int target) {
    545         unsigned preferred;
     561void ready_queue_grow(struct cluster * cltr) {
    546562        size_t ncount;
     563        int target = cltr->procs.total;
    547564
    548565        /* paranoid */ verify( ready_mutate_islocked() );
     
    562579                if(target >= 2) {
    563580                        ncount = target * 4;
    564                         preferred = ncount - 4;
    565581                } else {
    566582                        ncount = 1;
    567                         preferred = 0;
    568583                }
    569584
     
    595610        }
    596611
     612        reassign_cltr_id(cltr, 4);
     613
    597614        // Make sure that everything is consistent
    598615        /* paranoid */ check( cltr->ready_queue );
     
    601618
    602619        /* paranoid */ verify( ready_mutate_islocked() );
    603         return preferred;
    604620}
    605621
    606622// Shrink the ready queue
    607 void ready_queue_shrink(struct cluster * cltr, int target) {
     623void ready_queue_shrink(struct cluster * cltr) {
    608624        /* paranoid */ verify( ready_mutate_islocked() );
    609625        __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue\n");
     
    611627        // Make sure that everything is consistent
    612628        /* paranoid */ check( cltr->ready_queue );
     629
     630        int target = cltr->procs.total;
    613631
    614632        with( cltr->ready_queue ) {
     
    679697        }
    680698
     699        reassign_cltr_id(cltr, 4);
     700
    681701        // Make sure that everything is consistent
    682702        /* paranoid */ check( cltr->ready_queue );
Note: See TracChangeset for help on using the changeset viewer.