Ignore:
Timestamp:
Apr 29, 2021, 4:26:25 PM (3 years ago)
Author:
Thierry Delisle <tdelisle@…>
Branches:
ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast-unique-expr, pthread-emulation, qualifiedEnum
Children:
3eb55f98
Parents:
b2fc7ad9
Message:

Changed RW lock to avoid hitting the global array on schedule.

Location:
libcfa/src/concurrency
Files:
7 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/concurrency/kernel.cfa

    rb2fc7ad9 rc993b15  
    163163        #if !defined(__CFA_NO_STATISTICS__)
    164164                if( this->print_halts ) {
    165                         __cfaabi_bits_print_safe( STDOUT_FILENO, "Processor : %d - %s (%p)\n", this->id, this->name, (void*)this);
     165                        __cfaabi_bits_print_safe( STDOUT_FILENO, "Processor : %d - %s (%p)\n", this->unique_id, this->name, (void*)this);
    166166                }
    167167        #endif
     
    223223                                #if !defined(__CFA_NO_STATISTICS__)
    224224                                        if(this->print_halts) {
    225                                                 __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->id, rdtscl());
     225                                                __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->unique_id, rdtscl());
    226226                                        }
    227227                                #endif
     
    236236                                #if !defined(__CFA_NO_STATISTICS__)
    237237                                        if(this->print_halts) {
    238                                                 __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->id, rdtscl());
     238                                                __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->unique_id, rdtscl());
    239239                                        }
    240240                                #endif
     
    390390
    391391        post( this->terminated );
    392 
    393392
    394393        if(this == mainProcessor) {
     
    553552static void __schedule_thread( $thread * thrd ) {
    554553        /* paranoid */ verify( ! __preemption_enabled() );
    555         /* paranoid */ verify( kernelTLS().this_proc_id );
    556554        /* paranoid */ verify( ready_schedule_islocked());
    557555        /* paranoid */ verify( thrd );
     
    611609static inline $thread * __next_thread(cluster * this) with( *this ) {
    612610        /* paranoid */ verify( ! __preemption_enabled() );
    613         /* paranoid */ verify( kernelTLS().this_proc_id );
    614611
    615612        ready_schedule_lock();
     
    617614        ready_schedule_unlock();
    618615
    619         /* paranoid */ verify( kernelTLS().this_proc_id );
    620616        /* paranoid */ verify( ! __preemption_enabled() );
    621617        return thrd;
     
    625621static inline $thread * __next_thread_slow(cluster * this) with( *this ) {
    626622        /* paranoid */ verify( ! __preemption_enabled() );
    627         /* paranoid */ verify( kernelTLS().this_proc_id );
    628623
    629624        ready_schedule_lock();
     
    638633        ready_schedule_unlock();
    639634
    640         /* paranoid */ verify( kernelTLS().this_proc_id );
    641635        /* paranoid */ verify( ! __preemption_enabled() );
    642636        return thrd;
  • libcfa/src/concurrency/kernel.hfa

    rb2fc7ad9 rc993b15  
    4949
    5050// Processor id, required for scheduling threads
    51 struct __processor_id_t {
    52         unsigned id:24;
    53 
    54         #if !defined(__CFA_NO_STATISTICS__)
    55                 struct __stats_t * stats;
    56         #endif
    57 };
     51
    5852
    5953coroutine processorCtx_t {
     
    6357// Wrapper around kernel threads
    6458struct __attribute__((aligned(128))) processor {
    65         // Main state
    66         inline __processor_id_t;
    67 
    6859        // Cluster from which to get threads
    6960        struct cluster * cltr;
     
    8980        // Handle to pthreads
    9081        pthread_t kernel_thread;
     82
     83        // Unique id for the processor (not per cluster)
     84        unsigned unique_id;
    9185
    9286        struct {
  • libcfa/src/concurrency/kernel/fwd.hfa

    rb2fc7ad9 rc993b15  
    3838                        struct $thread          * volatile this_thread;
    3939                        struct processor        * volatile this_processor;
    40                         struct __processor_id_t * volatile this_proc_id;
    41                         struct __stats_t        * volatile this_stats;
     40                        volatile bool sched_lock;
    4241
    4342                        struct {
     
    5655                                uint64_t bck_seed;
    5756                        } ready_rng;
     57
     58                        struct __stats_t        * volatile this_stats;
     59
     60
     61                        #ifdef __CFA_WITH_VERIFY__
     62                                // Debug, check if the rwlock is owned for reading
     63                                bool in_sched_lock;
     64                                unsigned sched_id;
     65                        #endif
    5866                } __cfaabi_tls __attribute__ ((tls_model ( "initial-exec" )));
    5967
  • libcfa/src/concurrency/kernel/startup.cfa

    rb2fc7ad9 rc993b15  
    7777static void doregister( struct cluster & cltr );
    7878static void unregister( struct cluster & cltr );
     79static void register_tls( processor * this );
     80static void unregister_tls( processor * this );
    7981static void ?{}( $coroutine & this, current_stack_info_t * info);
    8082static void ?{}( $thread & this, current_stack_info_t * info);
     
    123125        NULL,                                                                                           // cannot use 0p
    124126        NULL,
     127        false,
     128        { 1, false, false },
     129        0,
     130        { 0, 0 },
    125131        NULL,
    126         NULL,
    127         { 1, false, false },
     132        #ifdef __CFA_WITH_VERIFY__
     133                false,
     134                0,
     135        #endif
    128136};
    129137
     
    210218        (*mainProcessor){};
    211219
     220        register_tls( mainProcessor );
     221
    212222        //initialize the global state variables
    213223        __cfaabi_tls.this_processor = mainProcessor;
    214         __cfaabi_tls.this_proc_id   = (__processor_id_t*)mainProcessor;
    215224        __cfaabi_tls.this_thread    = mainThread;
    216225
     
    273282        #endif
    274283
     284        unregister_tls( mainProcessor );
     285
    275286        // Destroy the main processor and its context in reverse order of construction
    276287        // These were manually constructed so we need manually destroy them
     
    316327        processor * proc = (processor *) arg;
    317328        __cfaabi_tls.this_processor = proc;
    318         __cfaabi_tls.this_proc_id   = (__processor_id_t*)proc;
    319329        __cfaabi_tls.this_thread    = 0p;
    320330        __cfaabi_tls.preemption_state.[enabled, disable_count] = [false, 1];
     331
     332        register_tls( proc );
     333
    321334        // SKULLDUGGERY: We want to create a context for the processor coroutine
    322335        // which is needed for the 2-step context switch. However, there is no reason
     
    355368                #endif
    356369        #endif
     370
     371        unregister_tls( proc );
    357372
    358373        return 0p;
     
    496511        #endif
    497512
    498         // Register and Lock the RWlock so no-one pushes/pops while we are changing the queue
    499         uint_fast32_t last_size = ready_mutate_register((__processor_id_t*)&this);
    500                 this.cltr->procs.total += 1u;
    501                 insert_last(this.cltr->procs.actives, this);
    502 
    503                 // Adjust the ready queue size
    504                 ready_queue_grow( cltr );
    505 
    506         // Unlock the RWlock
    507         ready_mutate_unlock( last_size );
    508 
    509513        __cfadbg_print_safe(runtime_core, "Kernel : core %p created\n", &this);
    510514}
     
    512516// Not a ctor, it just preps the destruction but should not destroy members
    513517static void deinit(processor & this) {
    514         // Lock the RWlock so no-one pushes/pops while we are changing the queue
    515         uint_fast32_t last_size = ready_mutate_lock();
    516                 this.cltr->procs.total -= 1u;
    517                 remove(this);
    518 
    519                 // Adjust the ready queue size
    520                 ready_queue_shrink( this.cltr );
    521 
    522         // Unlock the RWlock and unregister: we don't need the read_lock any more
    523         ready_mutate_unregister((__processor_id_t*)&this, last_size );
    524 
    525518        close(this.idle);
    526519}
     
    656649        cltr->nthreads -= 1;
    657650        unlock(cltr->thread_list_lock);
     651}
     652
     653static void register_tls( processor * this ) {
     654        // Register and Lock the RWlock so no-one pushes/pops while we are changing the queue
     655        uint_fast32_t last_size;
     656        [this->unique_id, last_size] = ready_mutate_register();
     657
     658                this->cltr->procs.total += 1u;
     659                insert_last(this->cltr->procs.actives, *this);
     660
     661                // Adjust the ready queue size
     662                ready_queue_grow( this->cltr );
     663
     664        // Unlock the RWlock
     665        ready_mutate_unlock( last_size );
     666}
     667
     668
     669static void unregister_tls( processor * this ) {
     670        // Lock the RWlock so no-one pushes/pops while we are changing the queue
     671        uint_fast32_t last_size = ready_mutate_lock();
     672                this->cltr->procs.total -= 1u;
     673                remove(*this);
     674
     675                // clear the cluster so nothing gets pushed to local queues
     676                cluster * cltr = this->cltr;
     677                this->cltr = 0p;
     678
     679                // Adjust the ready queue size
     680                ready_queue_shrink( cltr );
     681
     682        // Unlock the RWlock and unregister: we don't need the read_lock any more
     683        ready_mutate_unregister( this->unique_id, last_size );
    658684}
    659685
  • libcfa/src/concurrency/kernel_private.hfa

    rb2fc7ad9 rc993b15  
    2525// Scheduler
    2626
    27 struct __attribute__((aligned(128))) __scheduler_lock_id_t;
    2827
    2928extern "C" {
     
    8079// Lock-Free registering/unregistering of threads
    8180// Register a processor to a given cluster and get its unique id in return
    82 void register_proc_id( struct __processor_id_t * );
     81unsigned register_proc_id( void );
    8382
    8483// Unregister a processor from a given cluster using its id, getting back the original pointer
    85 void unregister_proc_id( struct __processor_id_t * proc );
     84void unregister_proc_id( unsigned );
    8685
    8786//=======================================================================
     
    112111}
    113112
    114 // Cells use by the reader writer lock
    115 // while not generic it only relies on a opaque pointer
    116 struct __attribute__((aligned(128))) __scheduler_lock_id_t {
    117         // Spin lock used as the underlying lock
    118         volatile bool lock;
    119 
    120         // Handle pointing to the proc owning this cell
    121         // Used for allocating cells and debugging
    122         __processor_id_t * volatile handle;
    123 
    124         #ifdef __CFA_WITH_VERIFY__
    125                 // Debug, check if this is owned for reading
    126                 bool owned;
    127         #endif
    128 };
    129 
    130 static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t));
     113
     114
     115
    131116
    132117//-----------------------------------------------------------------------
     
    147132
    148133        // writer lock
    149         volatile bool lock;
     134        volatile bool write_lock;
    150135
    151136        // data pointer
    152         __scheduler_lock_id_t * data;
     137        volatile bool * volatile * data;
    153138};
    154139
     
    163148static inline void ready_schedule_lock(void) with(*__scheduler_lock) {
    164149        /* paranoid */ verify( ! __preemption_enabled() );
    165         /* paranoid */ verify( kernelTLS().this_proc_id );
    166 
    167         unsigned iproc = kernelTLS().this_proc_id->id;
    168         /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id);
    169         /*paranoid*/ verify(iproc < ready);
     150        /* paranoid */ verify( ! kernelTLS().in_sched_lock );
     151        /* paranoid */ verify( data[kernelTLS().sched_id] == &kernelTLS().sched_lock );
     152        /* paranoid */ verify( !kernelTLS().this_processor || kernelTLS().this_processor->unique_id == kernelTLS().sched_id );
    170153
    171154        // Step 1 : make sure no writer are in the middle of the critical section
    172         while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED))
     155        while(__atomic_load_n(&write_lock, (int)__ATOMIC_RELAXED))
    173156                Pause();
    174157
     
    179162
    180163        // Step 2 : acquire our local lock
    181         __atomic_acquire( &data[iproc].lock );
    182         /*paranoid*/ verify(data[iproc].lock);
     164        __atomic_acquire( &kernelTLS().sched_lock );
     165        /*paranoid*/ verify(kernelTLS().sched_lock);
    183166
    184167        #ifdef __CFA_WITH_VERIFY__
    185168                // Debug, check if this is owned for reading
    186                 data[iproc].owned = true;
     169                kernelTLS().in_sched_lock = true;
    187170        #endif
    188171}
     
    190173static inline void ready_schedule_unlock(void) with(*__scheduler_lock) {
    191174        /* paranoid */ verify( ! __preemption_enabled() );
    192         /* paranoid */ verify( kernelTLS().this_proc_id );
    193 
    194         unsigned iproc = kernelTLS().this_proc_id->id;
    195         /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id);
    196         /*paranoid*/ verify(iproc < ready);
    197         /*paranoid*/ verify(data[iproc].lock);
    198         /*paranoid*/ verify(data[iproc].owned);
     175        /* paranoid */ verify( data[kernelTLS().sched_id] == &kernelTLS().sched_lock );
     176        /* paranoid */ verify( !kernelTLS().this_processor || kernelTLS().this_processor->unique_id == kernelTLS().sched_id );
     177        /* paranoid */ verify( kernelTLS().sched_lock );
     178        /* paranoid */ verify( kernelTLS().in_sched_lock );
    199179        #ifdef __CFA_WITH_VERIFY__
    200180                // Debug, check if this is owned for reading
    201                 data[iproc].owned = false;
     181                kernelTLS().in_sched_lock = false;
    202182        #endif
    203         __atomic_unlock(&data[iproc].lock);
     183        __atomic_unlock(&kernelTLS().sched_lock);
    204184}
    205185
     
    207187        static inline bool ready_schedule_islocked(void) {
    208188                /* paranoid */ verify( ! __preemption_enabled() );
    209                 /*paranoid*/ verify( kernelTLS().this_proc_id );
    210                 __processor_id_t * proc = kernelTLS().this_proc_id;
    211                 return __scheduler_lock->data[proc->id].owned;
     189                /* paranoid */ verify( (!kernelTLS().in_sched_lock) || kernelTLS().sched_lock );
     190                return kernelTLS().sched_lock;
    212191        }
    213192
    214193        static inline bool ready_mutate_islocked() {
    215                 return __scheduler_lock->lock;
     194                return __scheduler_lock->write_lock;
    216195        }
    217196#endif
     
    228207// Register a processor to a given cluster and get its unique id in return
    229208// For convenience, also acquires the lock
    230 static inline uint_fast32_t ready_mutate_register( struct __processor_id_t * proc ) {
    231         register_proc_id( proc );
    232         return ready_mutate_lock();
     209static inline [unsigned, uint_fast32_t] ready_mutate_register() {
     210        unsigned id = register_proc_id();
     211        uint_fast32_t last = ready_mutate_lock();
     212        return [id, last];
    233213}
    234214
    235215// Unregister a processor from a given cluster using its id, getting back the original pointer
    236216// assumes the lock is acquired
    237 static inline void ready_mutate_unregister( struct __processor_id_t * proc, uint_fast32_t last_s ) {
     217static inline void ready_mutate_unregister( unsigned id, uint_fast32_t last_s ) {
    238218        ready_mutate_unlock( last_s );
    239         unregister_proc_id( proc );
     219        unregister_proc_id( id );
    240220}
    241221
  • libcfa/src/concurrency/preemption.cfa

    rb2fc7ad9 rc993b15  
    687687// Waits on SIGALRM and send SIGUSR1 to whom ever needs it
    688688static void * alarm_loop( __attribute__((unused)) void * args ) {
    689         __processor_id_t id;
    690         register_proc_id(&id);
    691         __cfaabi_tls.this_proc_id = &id;
    692 
     689        unsigned id = register_proc_id();
    693690
    694691        // Block sigalrms to control when they arrive
     
    749746EXIT:
    750747        __cfaabi_dbg_print_safe( "Kernel : Preemption thread stopping\n" );
    751         register_proc_id(&id);
     748        unregister_proc_id(id);
    752749
    753750        return 0p;
  • libcfa/src/concurrency/ready_queue.cfa

    rb2fc7ad9 rc993b15  
    9393        this.alloc = 0;
    9494        this.ready = 0;
    95         this.lock  = false;
    9695        this.data  = alloc(this.max);
    97 
    98         /*paranoid*/ verify( 0 == (((uintptr_t)(this.data    )) % 64) );
    99         /*paranoid*/ verify( 0 == (((uintptr_t)(this.data + 1)) % 64) );
     96        this.write_lock  = false;
     97
    10098        /*paranoid*/ verify(__atomic_is_lock_free(sizeof(this.alloc), &this.alloc));
    10199        /*paranoid*/ verify(__atomic_is_lock_free(sizeof(this.ready), &this.ready));
     
    106104}
    107105
    108 void ?{}( __scheduler_lock_id_t & this, __processor_id_t * proc ) {
    109         this.handle = proc;
    110         this.lock   = false;
    111         #ifdef __CFA_WITH_VERIFY__
    112                 this.owned  = false;
    113         #endif
    114 }
    115106
    116107//=======================================================================
    117108// Lock-Free registering/unregistering of threads
    118 void register_proc_id( struct __processor_id_t * proc ) with(*__scheduler_lock) {
     109unsigned register_proc_id( void ) with(*__scheduler_lock) {
    119110        __cfadbg_print_safe(ready_queue, "Kernel : Registering proc %p for RW-Lock\n", proc);
     111        bool * handle = (bool *)&kernelTLS().sched_lock;
    120112
    121113        // Step - 1 : check if there is already space in the data
     
    124116        // Check among all the ready
    125117        for(uint_fast32_t i = 0; i < s; i++) {
    126                 __processor_id_t * null = 0p; // Re-write every loop since compare thrashes it
    127                 if( __atomic_load_n(&data[i].handle, (int)__ATOMIC_RELAXED) == null
    128                         && __atomic_compare_exchange_n( &data[i].handle, &null, proc, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
    129                         /*paranoid*/ verify(i < ready);
    130                         /*paranoid*/ verify(0 == (__alignof__(data[i]) % cache_line_size));
    131                         /*paranoid*/ verify((((uintptr_t)&data[i]) % cache_line_size) == 0);
    132                         proc->id = i;
    133                         return;
     118                bool * volatile * cell = (bool * volatile *)&data[i]; // Cforall is bugged and the double volatiles causes problems
     119                /* paranoid */ verify( handle != *cell );
     120
     121                bool * null = 0p; // Re-write every loop since compare thrashes it
     122                if( __atomic_load_n(cell, (int)__ATOMIC_RELAXED) == null
     123                        && __atomic_compare_exchange_n( cell, &null, handle, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
     124                        /* paranoid */ verify(i < ready);
     125                        /* paranoid */ verify( (kernelTLS().sched_id = i, true) );
     126                        return i;
    134127                }
    135128        }
     
    142135
    143136        // Step - 3 : Mark space as used and then publish it.
    144         __scheduler_lock_id_t * storage = (__scheduler_lock_id_t *)&data[n];
    145         (*storage){ proc };
     137        data[n] = handle;
    146138        while() {
    147139                unsigned copy = n;
     
    155147
    156148        // Return new spot.
    157         /*paranoid*/ verify(n < ready);
    158         /*paranoid*/ verify(__alignof__(data[n]) == (2 * cache_line_size));
    159         /*paranoid*/ verify((((uintptr_t)&data[n]) % cache_line_size) == 0);
    160         proc->id = n;
    161 }
    162 
    163 void unregister_proc_id( struct __processor_id_t * proc ) with(*__scheduler_lock) {
    164         unsigned id = proc->id;
    165         /*paranoid*/ verify(id < ready);
    166         /*paranoid*/ verify(proc == __atomic_load_n(&data[id].handle, __ATOMIC_RELAXED));
    167         __atomic_store_n(&data[id].handle, 0p, __ATOMIC_RELEASE);
     149        /* paranoid */ verify(n < ready);
     150        /* paranoid */ verify( (kernelTLS().sched_id = n, true) );
     151        return n;
     152}
     153
     154void unregister_proc_id( unsigned id ) with(*__scheduler_lock) {
     155        /* paranoid */ verify(id < ready);
     156        /* paranoid */ verify(id == kernelTLS().sched_id);
     157        /* paranoid */ verify(data[id] == &kernelTLS().sched_lock);
     158
     159        bool * volatile * cell = (bool * volatile *)&data[id]; // Cforall is bugged and the double volatiles causes problems
     160
     161        __atomic_store_n(cell, 0p, __ATOMIC_RELEASE);
    168162
    169163        __cfadbg_print_safe(ready_queue, "Kernel : Unregister proc %p\n", proc);
     
    175169uint_fast32_t ready_mutate_lock( void ) with(*__scheduler_lock) {
    176170        /* paranoid */ verify( ! __preemption_enabled() );
     171        /* paranoid */ verify( ! kernelTLS().sched_lock );
    177172
    178173        // Step 1 : lock global lock
    179174        // It is needed to avoid processors that register mid Critical-Section
    180175        //   to simply lock their own lock and enter.
    181         __atomic_acquire( &lock );
     176        __atomic_acquire( &write_lock );
    182177
    183178        // Step 2 : lock per-proc lock
     
    187182        uint_fast32_t s = ready;
    188183        for(uint_fast32_t i = 0; i < s; i++) {
    189                 __atomic_acquire( &data[i].lock );
     184                volatile bool * llock = data[i];
     185                if(llock) __atomic_acquire( llock );
    190186        }
    191187
     
    204200        // Alternative solution : return s in write_lock and pass it to write_unlock
    205201        for(uint_fast32_t i = 0; i < last_s; i++) {
    206                 verify(data[i].lock);
    207                 __atomic_store_n(&data[i].lock, (bool)false, __ATOMIC_RELEASE);
     202                volatile bool * llock = data[i];
     203                if(llock) __atomic_store_n(llock, (bool)false, __ATOMIC_RELEASE);
    208204        }
    209205
    210206        // Step 2 : release global lock
    211         /*paranoid*/ assert(true == lock);
    212         __atomic_store_n(&lock, (bool)false, __ATOMIC_RELEASE);
     207        /*paranoid*/ assert(true == write_lock);
     208        __atomic_store_n(&write_lock, (bool)false, __ATOMIC_RELEASE);
    213209
    214210        /* paranoid */ verify( ! __preemption_enabled() );
Note: See TracChangeset for help on using the changeset viewer.