Changes in / [8cfa4ef:2f5ea69]


Ignore:
Location:
libcfa/src/concurrency
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/concurrency/kernel.cfa

    r8cfa4ef r2f5ea69  
    113113static void __wake_one(cluster * cltr);
    114114
    115 static void mark_idle (__cluster_proc_list & idles, processor & proc);
    116 static void mark_awake(__cluster_proc_list & idles, processor & proc);
    117 static [unsigned idle, unsigned total, * processor] query_idles( & __cluster_proc_list idles );
     115static void push  (__cluster_idles & idles, processor & proc);
     116static void remove(__cluster_idles & idles, processor & proc);
     117static [unsigned idle, unsigned total, * processor] query( & __cluster_idles idles );
    118118
    119119extern void __cfa_io_start( processor * );
     
    189189
    190190                                // Push self to idle stack
    191                                 mark_idle(this->cltr->procs, * this);
     191                                push(this->cltr->idles, * this);
    192192
    193193                                // Confirm the ready-queue is empty
     
    195195                                if( readyThread ) {
    196196                                        // A thread was found, cancel the halt
    197                                         mark_awake(this->cltr->procs, * this);
     197                                        remove(this->cltr->idles, * this);
    198198
    199199                                        #if !defined(__CFA_NO_STATISTICS__)
     
    225225
    226226                                // We were woken up, remove self from idle
    227                                 mark_awake(this->cltr->procs, * this);
     227                                remove(this->cltr->idles, * this);
    228228
    229229                                // DON'T just proceed, start looking again
     
    617617        unsigned idle;
    618618        unsigned total;
    619         [idle, total, p] = query_idles(this->procs);
     619        [idle, total, p] = query(this->idles);
    620620
    621621        // If no one is sleeping, we are done
     
    654654}
    655655
    656 static void mark_idle(__cluster_proc_list & this, processor & proc) {
     656static void push  (__cluster_idles & this, processor & proc) {
    657657        /* paranoid */ verify( ! __preemption_enabled() );
    658658        lock( this );
    659659                this.idle++;
    660660                /* paranoid */ verify( this.idle <= this.total );
    661                 remove(proc);
    662                 insert_first(this.idles, proc);
     661
     662                insert_first(this.list, proc);
    663663        unlock( this );
    664664        /* paranoid */ verify( ! __preemption_enabled() );
    665665}
    666666
    667 static void mark_awake(__cluster_proc_list & this, processor & proc) {
     667static void remove(__cluster_idles & this, processor & proc) {
    668668        /* paranoid */ verify( ! __preemption_enabled() );
    669669        lock( this );
    670670                this.idle--;
    671671                /* paranoid */ verify( this.idle >= 0 );
     672
    672673                remove(proc);
    673                 insert_last(this.actives, proc);
    674674        unlock( this );
    675675        /* paranoid */ verify( ! __preemption_enabled() );
    676676}
    677677
    678 static [unsigned idle, unsigned total, * processor] query_idles( & __cluster_proc_list this ) {
    679         /* paranoid */ verify( ! __preemption_enabled() );
    680         /* paranoid */ verify( ready_schedule_islocked() );
    681 
     678static [unsigned idle, unsigned total, * processor] query( & __cluster_idles this ) {
    682679        for() {
    683680                uint64_t l = __atomic_load_n(&this.lock, __ATOMIC_SEQ_CST);
     
    685682                unsigned idle    = this.idle;
    686683                unsigned total   = this.total;
    687                 processor * proc = &this.idles`first;
     684                processor * proc = &this.list`first;
    688685                // Compiler fence is unnecessary, but gcc-8 and older incorrectly reorder code without it
    689686                asm volatile("": : :"memory");
     
    691688                return [idle, total, proc];
    692689        }
    693 
    694         /* paranoid */ verify( ready_schedule_islocked() );
    695         /* paranoid */ verify( ! __preemption_enabled() );
    696690}
    697691
  • libcfa/src/concurrency/kernel.hfa

    r8cfa4ef r2f5ea69  
    180180
    181181// Idle Sleep
    182 struct __cluster_proc_list {
     182struct __cluster_idles {
    183183        // Spin lock protecting the queue
    184184        volatile uint64_t lock;
     
    191191
    192192        // List of idle processors
    193         dlist(processor, processor) idles;
    194 
    195         // List of active processors
    196         dlist(processor, processor) actives;
     193        dlist(processor, processor) list;
    197194};
    198195
     
    210207
    211208        // List of idle processors
    212         __cluster_proc_list procs;
     209        __cluster_idles idles;
    213210
    214211        // List of threads
  • libcfa/src/concurrency/kernel/startup.cfa

    r8cfa4ef r2f5ea69  
    469469        this.name = name;
    470470        this.cltr = &_cltr;
    471         this.cltr_id = -1u;
    472471        do_terminate = false;
    473472        preemption_alarm = 0p;
     
    490489        #endif
    491490
    492         // Register and Lock the RWlock so no-one pushes/pops while we are changing the queue
    493         uint_fast32_t last_size = ready_mutate_register((__processor_id_t*)&this);
    494                 this.cltr->procs.total += 1u;
    495                 insert_last(this.cltr->procs.actives, this);
     491        lock( this.cltr->idles );
     492                int target = this.cltr->idles.total += 1u;
     493        unlock( this.cltr->idles );
     494
     495        id = doregister((__processor_id_t*)&this);
     496
     497        // Lock the RWlock so no-one pushes/pops while we are changing the queue
     498        uint_fast32_t last_size = ready_mutate_lock();
    496499
    497500                // Adjust the ready queue size
    498                 ready_queue_grow( cltr );
     501                this.cltr_id = ready_queue_grow( cltr, target );
    499502
    500503        // Unlock the RWlock
     
    506509// Not a ctor, it just preps the destruction but should not destroy members
    507510static void deinit(processor & this) {
     511        lock( this.cltr->idles );
     512                int target = this.cltr->idles.total -= 1u;
     513        unlock( this.cltr->idles );
     514
    508515        // Lock the RWlock so no-one pushes/pops while we are changing the queue
    509516        uint_fast32_t last_size = ready_mutate_lock();
    510                 this.cltr->procs.total -= 1u;
    511                 remove(this);
    512517
    513518                // Adjust the ready queue size
    514                 ready_queue_shrink( this.cltr );
    515 
    516         // Unlock the RWlock and unregister: we don't need the read_lock any more
    517         ready_mutate_unregister((__processor_id_t*)&this, last_size );
     519                ready_queue_shrink( this.cltr, target );
     520
     521        // Unlock the RWlock
     522        ready_mutate_unlock( last_size );
     523
     524        // Finally we don't need the read_lock any more
     525        unregister((__processor_id_t*)&this);
    518526
    519527        close(this.idle);
     
    558566//-----------------------------------------------------------------------------
    559567// Cluster
    560 static void ?{}(__cluster_proc_list & this) {
     568static void ?{}(__cluster_idles & this) {
    561569        this.lock  = 0;
    562570        this.idle  = 0;
    563571        this.total = 0;
     572        (this.list){};
    564573}
    565574
     
    587596
    588597                // Adjust the ready queue size
    589                 ready_queue_grow( &this );
     598                ready_queue_grow( &this, 0 );
    590599
    591600        // Unlock the RWlock
     
    602611
    603612                // Adjust the ready queue size
    604                 ready_queue_shrink( &this );
     613                ready_queue_shrink( &this, 0 );
    605614
    606615        // Unlock the RWlock
  • libcfa/src/concurrency/kernel_private.hfa

    r8cfa4ef r2f5ea69  
    8383// Cluster lock API
    8484//=======================================================================
     85// Cells use by the reader writer lock
     86// while not generic it only relies on a opaque pointer
     87struct __attribute__((aligned(128))) __scheduler_lock_id_t {
     88        // Spin lock used as the underlying lock
     89        volatile bool lock;
     90
     91        // Handle pointing to the proc owning this cell
     92        // Used for allocating cells and debugging
     93        __processor_id_t * volatile handle;
     94
     95        #ifdef __CFA_WITH_VERIFY__
     96                // Debug, check if this is owned for reading
     97                bool owned;
     98        #endif
     99};
     100
     101static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t));
     102
    85103// Lock-Free registering/unregistering of threads
    86104// Register a processor to a given cluster and get its unique id in return
    87 void register_proc_id( struct __processor_id_t * );
     105unsigned doregister( struct __processor_id_t * proc );
    88106
    89107// Unregister a processor from a given cluster using its id, getting back the original pointer
    90 void unregister_proc_id( struct __processor_id_t * proc );
     108void     unregister( struct __processor_id_t * proc );
     109
     110//-----------------------------------------------------------------------
     111// Cluster idle lock/unlock
     112static inline void lock(__cluster_idles & this) {
     113        for() {
     114                uint64_t l = this.lock;
     115                if(
     116                        (0 == (l % 2))
     117                        && __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
     118                ) return;
     119                Pause();
     120        }
     121}
     122
     123static inline void unlock(__cluster_idles & this) {
     124        /* paranoid */ verify( 1 == (this.lock % 2) );
     125        __atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST );
     126}
    91127
    92128//=======================================================================
     
    116152        __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
    117153}
    118 
    119 // Cells use by the reader writer lock
    120 // while not generic it only relies on a opaque pointer
    121 struct __attribute__((aligned(128))) __scheduler_lock_id_t {
    122         // Spin lock used as the underlying lock
    123         volatile bool lock;
    124 
    125         // Handle pointing to the proc owning this cell
    126         // Used for allocating cells and debugging
    127         __processor_id_t * volatile handle;
    128 
    129         #ifdef __CFA_WITH_VERIFY__
    130                 // Debug, check if this is owned for reading
    131                 bool owned;
    132         #endif
    133 };
    134 
    135 static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t));
    136154
    137155//-----------------------------------------------------------------------
     
    229247void ready_mutate_unlock( uint_fast32_t /* value returned by lock */ );
    230248
    231 //-----------------------------------------------------------------------
    232 // Lock-Free registering/unregistering of threads
    233 // Register a processor to a given cluster and get its unique id in return
    234 // For convenience, also acquires the lock
    235 static inline uint_fast32_t ready_mutate_register( struct __processor_id_t * proc ) {
    236         register_proc_id( proc );
    237         return ready_mutate_lock();
    238 }
    239 
    240 // Unregister a processor from a given cluster using its id, getting back the original pointer
    241 // assumes the lock is acquired
    242 static inline void ready_mutate_unregister( struct __processor_id_t * proc, uint_fast32_t last_s ) {
    243         ready_mutate_unlock( last_s );
    244         unregister_proc_id( proc );
    245 }
    246 
    247 //-----------------------------------------------------------------------
    248 // Cluster idle lock/unlock
    249 static inline void lock(__cluster_proc_list & this) {
    250         /* paranoid */ verify( ! __preemption_enabled() );
    251 
    252         // Start by locking the global RWlock so that we know no-one is
    253         // adding/removing processors while we mess with the idle lock
    254         ready_schedule_lock();
    255 
    256         // Simple counting lock, acquired, acquired by incrementing the counter
    257         // to an odd number
    258         for() {
    259                 uint64_t l = this.lock;
    260                 if(
    261                         (0 == (l % 2))
    262                         && __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
    263                 ) return;
    264                 Pause();
    265         }
    266 
    267         /* paranoid */ verify( ! __preemption_enabled() );
    268 }
    269 
    270 static inline void unlock(__cluster_proc_list & this) {
    271         /* paranoid */ verify( ! __preemption_enabled() );
    272 
    273         /* paranoid */ verify( 1 == (this.lock % 2) );
    274         // Simple couting lock, release by incrementing to an even number
    275         __atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST );
    276 
    277         // Release the global lock, which we acquired when locking
    278         ready_schedule_unlock();
    279 
    280         /* paranoid */ verify( ! __preemption_enabled() );
    281 }
    282 
    283249//=======================================================================
    284250// Ready-Queue API
     
    312278//-----------------------------------------------------------------------
    313279// Increase the width of the ready queue (number of lanes) by 4
    314 void ready_queue_grow  (struct cluster * cltr);
     280unsigned ready_queue_grow  (struct cluster * cltr, int target);
    315281
    316282//-----------------------------------------------------------------------
    317283// Decrease the width of the ready queue (number of lanes) by 4
    318 void ready_queue_shrink(struct cluster * cltr);
     284void ready_queue_shrink(struct cluster * cltr, int target);
    319285
    320286
  • libcfa/src/concurrency/preemption.cfa

    r8cfa4ef r2f5ea69  
    712712static void * alarm_loop( __attribute__((unused)) void * args ) {
    713713        __processor_id_t id;
    714         register_proc_id(&id);
     714        id.id = doregister(&id);
    715715        __cfaabi_tls.this_proc_id = &id;
    716716
     
    773773EXIT:
    774774        __cfaabi_dbg_print_safe( "Kernel : Preemption thread stopping\n" );
    775         register_proc_id(&id);
     775        unregister(&id);
    776776
    777777        return 0p;
  • libcfa/src/concurrency/ready_queue.cfa

    r8cfa4ef r2f5ea69  
    9494//=======================================================================
    9595// Lock-Free registering/unregistering of threads
    96 void register_proc_id( struct __processor_id_t * proc ) with(*__scheduler_lock) {
     96unsigned doregister( struct __processor_id_t * proc ) with(*__scheduler_lock) {
    9797        __cfadbg_print_safe(ready_queue, "Kernel : Registering proc %p for RW-Lock\n", proc);
    9898
     
    108108                        /*paranoid*/ verify(0 == (__alignof__(data[i]) % cache_line_size));
    109109                        /*paranoid*/ verify((((uintptr_t)&data[i]) % cache_line_size) == 0);
    110                         proc->id = i;
     110                        return i;
    111111                }
    112112        }
     
    135135        /*paranoid*/ verify(__alignof__(data[n]) == (2 * cache_line_size));
    136136        /*paranoid*/ verify((((uintptr_t)&data[n]) % cache_line_size) == 0);
    137         proc->id = n;
    138 }
    139 
    140 void unregister_proc_id( struct __processor_id_t * proc ) with(*__scheduler_lock) {
     137        return n;
     138}
     139
     140void unregister( struct __processor_id_t * proc ) with(*__scheduler_lock) {
    141141        unsigned id = proc->id;
    142142        /*paranoid*/ verify(id < ready);
     
    254254        __attribute__((unused)) int preferred;
    255255        #if defined(BIAS)
    256                 /* paranoid */ verify(external || kernelTLS().this_processor->cltr_id < lanes.count );
    257256                preferred =
    258257                        //*
     
    345344        int preferred;
    346345        #if defined(BIAS)
    347                 /* paranoid */ verify(kernelTLS().this_processor->cltr_id < lanes.count );
     346                // Don't bother trying locally too much
    348347                preferred = kernelTLS().this_processor->cltr_id;
    349348        #endif
     
    542541}
    543542
    544 static void assign_list(unsigned & value, const int inc, dlist(processor, processor) & list, unsigned count) {
    545         processor * it = &list`first;
    546         for(unsigned i = 0; i < count; i++) {
    547                 /* paranoid */ verifyf( it, "Unexpected null iterator, at index %u of %u\n", i, count);
    548                 it->cltr_id = value;
    549                 value += inc;
    550                 it = &(*it)`next;
    551         }
    552 }
    553 
    554 static void reassign_cltr_id(struct cluster * cltr, const int inc) {
    555         unsigned preferred = 0;
    556         assign_list(preferred, inc, cltr->procs.actives, cltr->procs.total - cltr->procs.idle);
    557         assign_list(preferred, inc, cltr->procs.idles  , cltr->procs.idle );
    558 }
    559 
    560543// Grow the ready queue
    561 void ready_queue_grow(struct cluster * cltr) {
     544unsigned ready_queue_grow(struct cluster * cltr, int target) {
     545        unsigned preferred;
    562546        size_t ncount;
    563         int target = cltr->procs.total;
    564547
    565548        /* paranoid */ verify( ready_mutate_islocked() );
     
    579562                if(target >= 2) {
    580563                        ncount = target * 4;
     564                        preferred = ncount - 4;
    581565                } else {
    582566                        ncount = 1;
     567                        preferred = 0;
    583568                }
    584569
     
    610595        }
    611596
    612         reassign_cltr_id(cltr, 4);
    613 
    614597        // Make sure that everything is consistent
    615598        /* paranoid */ check( cltr->ready_queue );
     
    618601
    619602        /* paranoid */ verify( ready_mutate_islocked() );
     603        return preferred;
    620604}
    621605
    622606// Shrink the ready queue
    623 void ready_queue_shrink(struct cluster * cltr) {
     607void ready_queue_shrink(struct cluster * cltr, int target) {
    624608        /* paranoid */ verify( ready_mutate_islocked() );
    625609        __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue\n");
     
    627611        // Make sure that everything is consistent
    628612        /* paranoid */ check( cltr->ready_queue );
    629 
    630         int target = cltr->procs.total;
    631613
    632614        with( cltr->ready_queue ) {
     
    697679        }
    698680
    699         reassign_cltr_id(cltr, 4);
    700 
    701681        // Make sure that everything is consistent
    702682        /* paranoid */ check( cltr->ready_queue );
Note: See TracChangeset for help on using the changeset viewer.