Changes in / [3eb55f98:2d8a770]


Ignore:
Location:
libcfa/src/concurrency
Files:
7 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/concurrency/kernel.cfa

    r3eb55f98 r2d8a770  
    163163        #if !defined(__CFA_NO_STATISTICS__)
    164164                if( this->print_halts ) {
    165                         __cfaabi_bits_print_safe( STDOUT_FILENO, "Processor : %d - %s (%p)\n", this->unique_id, this->name, (void*)this);
     165                        __cfaabi_bits_print_safe( STDOUT_FILENO, "Processor : %d - %s (%p)\n", this->id, this->name, (void*)this);
    166166                }
    167167        #endif
     
    223223                                #if !defined(__CFA_NO_STATISTICS__)
    224224                                        if(this->print_halts) {
    225                                                 __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->unique_id, rdtscl());
     225                                                __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->id, rdtscl());
    226226                                        }
    227227                                #endif
     
    236236                                #if !defined(__CFA_NO_STATISTICS__)
    237237                                        if(this->print_halts) {
    238                                                 __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->unique_id, rdtscl());
     238                                                __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->id, rdtscl());
    239239                                        }
    240240                                #endif
     
    390390
    391391        post( this->terminated );
     392
    392393
    393394        if(this == mainProcessor) {
     
    552553static void __schedule_thread( $thread * thrd ) {
    553554        /* paranoid */ verify( ! __preemption_enabled() );
     555        /* paranoid */ verify( kernelTLS().this_proc_id );
    554556        /* paranoid */ verify( ready_schedule_islocked());
    555557        /* paranoid */ verify( thrd );
     
    609611static inline $thread * __next_thread(cluster * this) with( *this ) {
    610612        /* paranoid */ verify( ! __preemption_enabled() );
     613        /* paranoid */ verify( kernelTLS().this_proc_id );
    611614
    612615        ready_schedule_lock();
     
    614617        ready_schedule_unlock();
    615618
     619        /* paranoid */ verify( kernelTLS().this_proc_id );
    616620        /* paranoid */ verify( ! __preemption_enabled() );
    617621        return thrd;
     
    621625static inline $thread * __next_thread_slow(cluster * this) with( *this ) {
    622626        /* paranoid */ verify( ! __preemption_enabled() );
     627        /* paranoid */ verify( kernelTLS().this_proc_id );
    623628
    624629        ready_schedule_lock();
     
    633638        ready_schedule_unlock();
    634639
     640        /* paranoid */ verify( kernelTLS().this_proc_id );
    635641        /* paranoid */ verify( ! __preemption_enabled() );
    636642        return thrd;
  • libcfa/src/concurrency/kernel.hfa

    r3eb55f98 r2d8a770  
    4949
    5050// Processor id, required for scheduling threads
    51 
     51struct __processor_id_t {
     52        unsigned id:24;
     53
     54        #if !defined(__CFA_NO_STATISTICS__)
     55                struct __stats_t * stats;
     56        #endif
     57};
    5258
    5359coroutine processorCtx_t {
     
    5763// Wrapper around kernel threads
    5864struct __attribute__((aligned(128))) processor {
     65        // Main state
     66        inline __processor_id_t;
     67
    5968        // Cluster from which to get threads
    6069        struct cluster * cltr;
     
    8089        // Handle to pthreads
    8190        pthread_t kernel_thread;
    82 
    83         // Unique id for the processor (not per cluster)
    84         unsigned unique_id;
    8591
    8692        struct {
  • libcfa/src/concurrency/kernel/fwd.hfa

    r3eb55f98 r2d8a770  
    3838                        struct $thread          * volatile this_thread;
    3939                        struct processor        * volatile this_processor;
    40                         volatile bool sched_lock;
     40                        struct __processor_id_t * volatile this_proc_id;
     41                        struct __stats_t        * volatile this_stats;
    4142
    4243                        struct {
     
    5556                                uint64_t bck_seed;
    5657                        } ready_rng;
    57 
    58                         struct __stats_t        * volatile this_stats;
    59 
    60 
    61                         #ifdef __CFA_WITH_VERIFY__
    62                                 // Debug, check if the rwlock is owned for reading
    63                                 bool in_sched_lock;
    64                                 unsigned sched_id;
    65                         #endif
    6658                } __cfaabi_tls __attribute__ ((tls_model ( "initial-exec" )));
    6759
  • libcfa/src/concurrency/kernel/startup.cfa

    r3eb55f98 r2d8a770  
    7777static void doregister( struct cluster & cltr );
    7878static void unregister( struct cluster & cltr );
    79 static void register_tls( processor * this );
    80 static void unregister_tls( processor * this );
    8179static void ?{}( $coroutine & this, current_stack_info_t * info);
    8280static void ?{}( $thread & this, current_stack_info_t * info);
     
    125123        NULL,                                                                                           // cannot use 0p
    126124        NULL,
    127         false,
     125        NULL,
     126        NULL,
    128127        { 1, false, false },
    129         0,
    130         { 0, 0 },
    131         NULL,
    132         #ifdef __CFA_WITH_VERIFY__
    133                 false,
    134                 0,
    135         #endif
    136128};
    137129
     
    218210        (*mainProcessor){};
    219211
    220         register_tls( mainProcessor );
    221 
    222212        //initialize the global state variables
    223213        __cfaabi_tls.this_processor = mainProcessor;
     214        __cfaabi_tls.this_proc_id   = (__processor_id_t*)mainProcessor;
    224215        __cfaabi_tls.this_thread    = mainThread;
    225216
     
    282273        #endif
    283274
    284         unregister_tls( mainProcessor );
    285 
    286275        // Destroy the main processor and its context in reverse order of construction
    287276        // These were manually constructed so we need manually destroy them
     
    327316        processor * proc = (processor *) arg;
    328317        __cfaabi_tls.this_processor = proc;
     318        __cfaabi_tls.this_proc_id   = (__processor_id_t*)proc;
    329319        __cfaabi_tls.this_thread    = 0p;
    330320        __cfaabi_tls.preemption_state.[enabled, disable_count] = [false, 1];
    331 
    332         register_tls( proc );
    333 
    334321        // SKULLDUGGERY: We want to create a context for the processor coroutine
    335322        // which is needed for the 2-step context switch. However, there is no reason
     
    368355                #endif
    369356        #endif
    370 
    371         unregister_tls( proc );
    372357
    373358        return 0p;
     
    511496        #endif
    512497
     498        // Register and Lock the RWlock so no-one pushes/pops while we are changing the queue
     499        uint_fast32_t last_size = ready_mutate_register((__processor_id_t*)&this);
     500                this.cltr->procs.total += 1u;
     501                insert_last(this.cltr->procs.actives, this);
     502
     503                // Adjust the ready queue size
     504                ready_queue_grow( cltr );
     505
     506        // Unlock the RWlock
     507        ready_mutate_unlock( last_size );
     508
    513509        __cfadbg_print_safe(runtime_core, "Kernel : core %p created\n", &this);
    514510}
     
    516512// Not a ctor, it just preps the destruction but should not destroy members
    517513static void deinit(processor & this) {
     514        // Lock the RWlock so no-one pushes/pops while we are changing the queue
     515        uint_fast32_t last_size = ready_mutate_lock();
     516                this.cltr->procs.total -= 1u;
     517                remove(this);
     518
     519                // Adjust the ready queue size
     520                ready_queue_shrink( this.cltr );
     521
     522        // Unlock the RWlock and unregister: we don't need the read_lock any more
     523        ready_mutate_unregister((__processor_id_t*)&this, last_size );
     524
    518525        close(this.idle);
    519526}
     
    649656        cltr->nthreads -= 1;
    650657        unlock(cltr->thread_list_lock);
    651 }
    652 
    653 static void register_tls( processor * this ) {
    654         // Register and Lock the RWlock so no-one pushes/pops while we are changing the queue
    655         uint_fast32_t last_size;
    656         [this->unique_id, last_size] = ready_mutate_register();
    657 
    658                 this->cltr->procs.total += 1u;
    659                 insert_last(this->cltr->procs.actives, *this);
    660 
    661                 // Adjust the ready queue size
    662                 ready_queue_grow( this->cltr );
    663 
    664         // Unlock the RWlock
    665         ready_mutate_unlock( last_size );
    666 }
    667 
    668 
    669 static void unregister_tls( processor * this ) {
    670         // Lock the RWlock so no-one pushes/pops while we are changing the queue
    671         uint_fast32_t last_size = ready_mutate_lock();
    672                 this->cltr->procs.total -= 1u;
    673                 remove(*this);
    674 
    675                 // clear the cluster so nothing gets pushed to local queues
    676                 cluster * cltr = this->cltr;
    677                 this->cltr = 0p;
    678 
    679                 // Adjust the ready queue size
    680                 ready_queue_shrink( cltr );
    681 
    682         // Unlock the RWlock and unregister: we don't need the read_lock any more
    683         ready_mutate_unregister( this->unique_id, last_size );
    684658}
    685659
  • libcfa/src/concurrency/kernel_private.hfa

    r3eb55f98 r2d8a770  
    2525// Scheduler
    2626
     27struct __attribute__((aligned(128))) __scheduler_lock_id_t;
    2728
    2829extern "C" {
     
    7980// Lock-Free registering/unregistering of threads
    8081// Register a processor to a given cluster and get its unique id in return
    81 unsigned register_proc_id( void );
     82void register_proc_id( struct __processor_id_t * );
    8283
    8384// Unregister a processor from a given cluster using its id, getting back the original pointer
    84 void unregister_proc_id( unsigned );
     85void unregister_proc_id( struct __processor_id_t * proc );
    8586
    8687//=======================================================================
     
    111112}
    112113
    113 
    114 
    115 
     114// Cells use by the reader writer lock
     115// while not generic it only relies on a opaque pointer
     116struct __attribute__((aligned(128))) __scheduler_lock_id_t {
     117        // Spin lock used as the underlying lock
     118        volatile bool lock;
     119
     120        // Handle pointing to the proc owning this cell
     121        // Used for allocating cells and debugging
     122        __processor_id_t * volatile handle;
     123
     124        #ifdef __CFA_WITH_VERIFY__
     125                // Debug, check if this is owned for reading
     126                bool owned;
     127        #endif
     128};
     129
     130static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t));
    116131
    117132//-----------------------------------------------------------------------
     
    132147
    133148        // writer lock
    134         volatile bool write_lock;
     149        volatile bool lock;
    135150
    136151        // data pointer
    137         volatile bool * volatile * data;
     152        __scheduler_lock_id_t * data;
    138153};
    139154
     
    148163static inline void ready_schedule_lock(void) with(*__scheduler_lock) {
    149164        /* paranoid */ verify( ! __preemption_enabled() );
    150         /* paranoid */ verify( ! kernelTLS().in_sched_lock );
    151         /* paranoid */ verify( data[kernelTLS().sched_id] == &kernelTLS().sched_lock );
    152         /* paranoid */ verify( !kernelTLS().this_processor || kernelTLS().this_processor->unique_id == kernelTLS().sched_id );
     165        /* paranoid */ verify( kernelTLS().this_proc_id );
     166
     167        unsigned iproc = kernelTLS().this_proc_id->id;
     168        /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id);
     169        /*paranoid*/ verify(iproc < ready);
    153170
    154171        // Step 1 : make sure no writer are in the middle of the critical section
    155         while(__atomic_load_n(&write_lock, (int)__ATOMIC_RELAXED))
     172        while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED))
    156173                Pause();
    157174
     
    162179
    163180        // Step 2 : acquire our local lock
    164         __atomic_acquire( &kernelTLS().sched_lock );
    165         /*paranoid*/ verify(kernelTLS().sched_lock);
     181        __atomic_acquire( &data[iproc].lock );
     182        /*paranoid*/ verify(data[iproc].lock);
    166183
    167184        #ifdef __CFA_WITH_VERIFY__
    168185                // Debug, check if this is owned for reading
    169                 kernelTLS().in_sched_lock = true;
     186                data[iproc].owned = true;
    170187        #endif
    171188}
     
    173190static inline void ready_schedule_unlock(void) with(*__scheduler_lock) {
    174191        /* paranoid */ verify( ! __preemption_enabled() );
    175         /* paranoid */ verify( data[kernelTLS().sched_id] == &kernelTLS().sched_lock );
    176         /* paranoid */ verify( !kernelTLS().this_processor || kernelTLS().this_processor->unique_id == kernelTLS().sched_id );
    177         /* paranoid */ verify( kernelTLS().sched_lock );
    178         /* paranoid */ verify( kernelTLS().in_sched_lock );
     192        /* paranoid */ verify( kernelTLS().this_proc_id );
     193
     194        unsigned iproc = kernelTLS().this_proc_id->id;
     195        /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id);
     196        /*paranoid*/ verify(iproc < ready);
     197        /*paranoid*/ verify(data[iproc].lock);
     198        /*paranoid*/ verify(data[iproc].owned);
    179199        #ifdef __CFA_WITH_VERIFY__
    180200                // Debug, check if this is owned for reading
    181                 kernelTLS().in_sched_lock = false;
     201                data[iproc].owned = false;
    182202        #endif
    183         __atomic_unlock(&kernelTLS().sched_lock);
     203        __atomic_unlock(&data[iproc].lock);
    184204}
    185205
     
    187207        static inline bool ready_schedule_islocked(void) {
    188208                /* paranoid */ verify( ! __preemption_enabled() );
    189                 /* paranoid */ verify( (!kernelTLS().in_sched_lock) || kernelTLS().sched_lock );
    190                 return kernelTLS().sched_lock;
     209                /*paranoid*/ verify( kernelTLS().this_proc_id );
     210                __processor_id_t * proc = kernelTLS().this_proc_id;
     211                return __scheduler_lock->data[proc->id].owned;
    191212        }
    192213
    193214        static inline bool ready_mutate_islocked() {
    194                 return __scheduler_lock->write_lock;
     215                return __scheduler_lock->lock;
    195216        }
    196217#endif
     
    207228// Register a processor to a given cluster and get its unique id in return
    208229// For convenience, also acquires the lock
    209 static inline [unsigned, uint_fast32_t] ready_mutate_register() {
    210         unsigned id = register_proc_id();
    211         uint_fast32_t last = ready_mutate_lock();
    212         return [id, last];
     230static inline uint_fast32_t ready_mutate_register( struct __processor_id_t * proc ) {
     231        register_proc_id( proc );
     232        return ready_mutate_lock();
    213233}
    214234
    215235// Unregister a processor from a given cluster using its id, getting back the original pointer
    216236// assumes the lock is acquired
    217 static inline void ready_mutate_unregister( unsigned id, uint_fast32_t last_s ) {
     237static inline void ready_mutate_unregister( struct __processor_id_t * proc, uint_fast32_t last_s ) {
    218238        ready_mutate_unlock( last_s );
    219         unregister_proc_id( id );
     239        unregister_proc_id( proc );
    220240}
    221241
  • libcfa/src/concurrency/preemption.cfa

    r3eb55f98 r2d8a770  
    687687// Waits on SIGALRM and send SIGUSR1 to whom ever needs it
    688688static void * alarm_loop( __attribute__((unused)) void * args ) {
    689         unsigned id = register_proc_id();
     689        __processor_id_t id;
     690        register_proc_id(&id);
     691        __cfaabi_tls.this_proc_id = &id;
     692
    690693
    691694        // Block sigalrms to control when they arrive
     
    746749EXIT:
    747750        __cfaabi_dbg_print_safe( "Kernel : Preemption thread stopping\n" );
    748         unregister_proc_id(id);
     751        register_proc_id(&id);
    749752
    750753        return 0p;
  • libcfa/src/concurrency/ready_queue.cfa

    r3eb55f98 r2d8a770  
    9393        this.alloc = 0;
    9494        this.ready = 0;
     95        this.lock  = false;
    9596        this.data  = alloc(this.max);
    96         this.write_lock  = false;
    97 
     97
     98        /*paranoid*/ verify( 0 == (((uintptr_t)(this.data    )) % 64) );
     99        /*paranoid*/ verify( 0 == (((uintptr_t)(this.data + 1)) % 64) );
    98100        /*paranoid*/ verify(__atomic_is_lock_free(sizeof(this.alloc), &this.alloc));
    99101        /*paranoid*/ verify(__atomic_is_lock_free(sizeof(this.ready), &this.ready));
     
    104106}
    105107
     108void ?{}( __scheduler_lock_id_t & this, __processor_id_t * proc ) {
     109        this.handle = proc;
     110        this.lock   = false;
     111        #ifdef __CFA_WITH_VERIFY__
     112                this.owned  = false;
     113        #endif
     114}
    106115
    107116//=======================================================================
    108117// Lock-Free registering/unregistering of threads
    109 unsigned register_proc_id( void ) with(*__scheduler_lock) {
     118void register_proc_id( struct __processor_id_t * proc ) with(*__scheduler_lock) {
    110119        __cfadbg_print_safe(ready_queue, "Kernel : Registering proc %p for RW-Lock\n", proc);
    111         bool * handle = (bool *)&kernelTLS().sched_lock;
    112120
    113121        // Step - 1 : check if there is already space in the data
     
    116124        // Check among all the ready
    117125        for(uint_fast32_t i = 0; i < s; i++) {
    118                 bool * volatile * cell = (bool * volatile *)&data[i]; // Cforall is bugged and the double volatiles causes problems
    119                 /* paranoid */ verify( handle != *cell );
    120 
    121                 bool * null = 0p; // Re-write every loop since compare thrashes it
    122                 if( __atomic_load_n(cell, (int)__ATOMIC_RELAXED) == null
    123                         && __atomic_compare_exchange_n( cell, &null, handle, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
    124                         /* paranoid */ verify(i < ready);
    125                         /* paranoid */ verify( (kernelTLS().sched_id = i, true) );
    126                         return i;
     126                __processor_id_t * null = 0p; // Re-write every loop since compare thrashes it
     127                if( __atomic_load_n(&data[i].handle, (int)__ATOMIC_RELAXED) == null
     128                        && __atomic_compare_exchange_n( &data[i].handle, &null, proc, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
     129                        /*paranoid*/ verify(i < ready);
     130                        /*paranoid*/ verify(0 == (__alignof__(data[i]) % cache_line_size));
     131                        /*paranoid*/ verify((((uintptr_t)&data[i]) % cache_line_size) == 0);
     132                        proc->id = i;
    127133                }
    128134        }
     
    135141
    136142        // Step - 3 : Mark space as used and then publish it.
    137         data[n] = handle;
     143        __scheduler_lock_id_t * storage = (__scheduler_lock_id_t *)&data[n];
     144        (*storage){ proc };
    138145        while() {
    139146                unsigned copy = n;
     
    147154
    148155        // Return new spot.
    149         /* paranoid */ verify(n < ready);
    150         /* paranoid */ verify( (kernelTLS().sched_id = n, true) );
    151         return n;
    152 }
    153 
    154 void unregister_proc_id( unsigned id ) with(*__scheduler_lock) {
    155         /* paranoid */ verify(id < ready);
    156         /* paranoid */ verify(id == kernelTLS().sched_id);
    157         /* paranoid */ verify(data[id] == &kernelTLS().sched_lock);
    158 
    159         bool * volatile * cell = (bool * volatile *)&data[id]; // Cforall is bugged and the double volatiles causes problems
    160 
    161         __atomic_store_n(cell, 0p, __ATOMIC_RELEASE);
     156        /*paranoid*/ verify(n < ready);
     157        /*paranoid*/ verify(__alignof__(data[n]) == (2 * cache_line_size));
     158        /*paranoid*/ verify((((uintptr_t)&data[n]) % cache_line_size) == 0);
     159        proc->id = n;
     160}
     161
     162void unregister_proc_id( struct __processor_id_t * proc ) with(*__scheduler_lock) {
     163        unsigned id = proc->id;
     164        /*paranoid*/ verify(id < ready);
     165        /*paranoid*/ verify(proc == __atomic_load_n(&data[id].handle, __ATOMIC_RELAXED));
     166        __atomic_store_n(&data[id].handle, 0p, __ATOMIC_RELEASE);
    162167
    163168        __cfadbg_print_safe(ready_queue, "Kernel : Unregister proc %p\n", proc);
     
    169174uint_fast32_t ready_mutate_lock( void ) with(*__scheduler_lock) {
    170175        /* paranoid */ verify( ! __preemption_enabled() );
    171         /* paranoid */ verify( ! kernelTLS().sched_lock );
    172176
    173177        // Step 1 : lock global lock
    174178        // It is needed to avoid processors that register mid Critical-Section
    175179        //   to simply lock their own lock and enter.
    176         __atomic_acquire( &write_lock );
     180        __atomic_acquire( &lock );
    177181
    178182        // Step 2 : lock per-proc lock
     
    182186        uint_fast32_t s = ready;
    183187        for(uint_fast32_t i = 0; i < s; i++) {
    184                 volatile bool * llock = data[i];
    185                 if(llock) __atomic_acquire( llock );
     188                __atomic_acquire( &data[i].lock );
    186189        }
    187190
     
    200203        // Alternative solution : return s in write_lock and pass it to write_unlock
    201204        for(uint_fast32_t i = 0; i < last_s; i++) {
    202                 volatile bool * llock = data[i];
    203                 if(llock) __atomic_store_n(llock, (bool)false, __ATOMIC_RELEASE);
     205                verify(data[i].lock);
     206                __atomic_store_n(&data[i].lock, (bool)false, __ATOMIC_RELEASE);
    204207        }
    205208
    206209        // Step 2 : release global lock
    207         /*paranoid*/ assert(true == write_lock);
    208         __atomic_store_n(&write_lock, (bool)false, __ATOMIC_RELEASE);
     210        /*paranoid*/ assert(true == lock);
     211        __atomic_store_n(&lock, (bool)false, __ATOMIC_RELEASE);
    209212
    210213        /* paranoid */ verify( ! __preemption_enabled() );
Note: See TracChangeset for help on using the changeset viewer.