Ignore:
File:
1 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/concurrency/kernel_private.hfa

    rbfcf6b9 r09f357ec  
    1010// Created On       : Mon Feb 13 12:27:26 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Wed Aug 12 08:21:33 2020
    13 // Update Count     : 9
     12// Last Modified On : Sat Nov 30 19:25:02 2019
     13// Update Count     : 8
    1414//
    1515
     
    2020
    2121#include "alarm.hfa"
    22 #include "stats.hfa"
     22
    2323
    2424//-----------------------------------------------------------------------------
    2525// Scheduler
    26 
    27 struct __attribute__((aligned(128))) __scheduler_lock_id_t;
    2826
    2927extern "C" {
     
    3331}
    3432
    35 void __schedule_thread( $thread * )
    36 #if defined(NDEBUG) || (!defined(__CFA_DEBUG__) && !defined(__CFA_VERIFY__))
    37         __attribute__((nonnull (1)))
    38 #endif
    39 ;
     33void ScheduleThread( thread_desc * );
     34static inline void WakeThread( thread_desc * thrd ) {
     35        if( !thrd ) return;
    4036
    41 extern bool __preemption_enabled();
     37        verify(thrd->state == Inactive);
    4238
    43 //release/wake-up the following resources
    44 void __thread_finish( $thread * thrd );
     39        disable_interrupts();
     40        ScheduleThread( thrd );
     41        enable_interrupts( __cfaabi_dbg_ctx );
     42}
     43thread_desc * nextThread(cluster * this);
     44
     45//Block current thread and release/wake-up the following resources
     46void BlockInternal(void);
     47void BlockInternal(__spinlock_t * lock);
     48void BlockInternal(thread_desc * thrd);
     49void BlockInternal(__spinlock_t * lock, thread_desc * thrd);
     50void BlockInternal(__spinlock_t * locks [], unsigned short count);
     51void BlockInternal(__spinlock_t * locks [], unsigned short count, thread_desc * thrds [], unsigned short thrd_count);
     52void BlockInternal(__finish_callback_fptr_t callback);
     53void LeaveThread(__spinlock_t * lock, thread_desc * thrd);
    4554
    4655//-----------------------------------------------------------------------------
     
    4857void main(processorCtx_t *);
    4958
    50 void * __create_pthread( pthread_t *, void * (*)(void *), void * );
    51 void __destroy_pthread( pthread_t pthread, void * stack, void ** retval );
     59void * create_pthread( pthread_t *, void * (*)(void *), void * );
    5260
     61static inline void wake_fast(processor * this) {
     62        __cfaabi_dbg_print_safe("Kernel : Waking up processor %p\n", this);
     63        post( this->idleLock );
     64}
    5365
     66static inline void wake(processor * this) {
     67        disable_interrupts();
     68        wake_fast(this);
     69        enable_interrupts( __cfaabi_dbg_ctx );
     70}
    5471
    55 extern cluster * mainCluster;
     72struct event_kernel_t {
     73        alarm_list_t alarms;
     74        __spinlock_t lock;
     75};
     76
     77extern event_kernel_t * event_kernel;
     78
     79struct __cfa_kernel_preemption_state_t {
     80        bool enabled;
     81        bool in_progress;
     82        unsigned short disable_count;
     83};
     84
     85extern volatile thread_local __cfa_kernel_preemption_state_t preemption_state __attribute__ ((tls_model ( "initial-exec" )));
    5686
    5787//-----------------------------------------------------------------------------
    5888// Threads
    5989extern "C" {
    60       void __cfactx_invoke_thread(void (*main)(void *), void * this);
     90      void CtxInvokeThread(void (*main)(void *), void * this);
    6191}
    6292
     93extern void ThreadCtxSwitch(coroutine_desc * src, coroutine_desc * dst);
     94
    6395__cfaabi_dbg_debug_do(
    64         extern void __cfaabi_dbg_thread_register  ( $thread * thrd );
    65         extern void __cfaabi_dbg_thread_unregister( $thread * thrd );
     96        extern void __cfaabi_dbg_thread_register  ( thread_desc * thrd );
     97        extern void __cfaabi_dbg_thread_unregister( thread_desc * thrd );
    6698)
    67 
    68 #define TICKET_BLOCKED (-1) // thread is blocked
    69 #define TICKET_RUNNING ( 0) // thread is running
    70 #define TICKET_UNBLOCK ( 1) // thread should ignore next block
    7199
    72100//-----------------------------------------------------------------------------
    73101// Utils
    74 void doregister( struct cluster * cltr, struct $thread & thrd );
    75 void unregister( struct cluster * cltr, struct $thread & thrd );
     102#define KERNEL_STORAGE(T,X) static char storage_##X[sizeof(T)]
    76103
    77 //-----------------------------------------------------------------------------
    78 // I/O
    79 void ^?{}(io_context & this, bool );
    80 
    81 //=======================================================================
    82 // Cluster lock API
    83 //=======================================================================
    84 // Cells use by the reader writer lock
    85 // while not generic it only relies on a opaque pointer
    86 struct __attribute__((aligned(128))) __scheduler_lock_id_t {
    87         // Spin lock used as the underlying lock
    88         volatile bool lock;
    89 
    90         // Handle pointing to the proc owning this cell
    91         // Used for allocating cells and debugging
    92         __processor_id_t * volatile handle;
    93 
    94         #ifdef __CFA_WITH_VERIFY__
    95                 // Debug, check if this is owned for reading
    96                 bool owned;
    97         #endif
    98 };
    99 
    100 static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t));
    101 
    102 // Lock-Free registering/unregistering of threads
    103 // Register a processor to a given cluster and get its unique id in return
    104 unsigned doregister( struct __processor_id_t * proc );
    105 
    106 // Unregister a processor from a given cluster using its id, getting back the original pointer
    107 void     unregister( struct __processor_id_t * proc );
    108 
    109 //-----------------------------------------------------------------------
    110 // Cluster idle lock/unlock
    111 static inline void lock(__cluster_idles & this) {
    112         for() {
    113                 uint64_t l = this.lock;
    114                 if(
    115                         (0 == (l % 2))
    116                         && __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
    117                 ) return;
    118                 Pause();
    119         }
     104static inline uint32_t tls_rand() {
     105        kernelTLS.rand_seed ^= kernelTLS.rand_seed << 6;
     106        kernelTLS.rand_seed ^= kernelTLS.rand_seed >> 21;
     107        kernelTLS.rand_seed ^= kernelTLS.rand_seed << 7;
     108        return kernelTLS.rand_seed;
    120109}
    121110
    122 static inline void unlock(__cluster_idles & this) {
    123         /* paranoid */ verify( 1 == (this.lock % 2) );
    124         __atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST );
    125 }
    126111
    127 //=======================================================================
    128 // Reader-writer lock implementation
    129 // Concurrent with doregister/unregister,
    130 //    i.e., threads can be added at any point during or between the entry/exit
     112void doregister( struct cluster & cltr );
     113void unregister( struct cluster & cltr );
    131114
    132 //-----------------------------------------------------------------------
    133 // simple spinlock underlying the RWLock
    134 // Blocking acquire
    135 static inline void __atomic_acquire(volatile bool * ll) {
    136         while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) {
    137                 while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED))
    138                         Pause();
    139         }
    140         /* paranoid */ verify(*ll);
    141 }
     115void doregister( struct cluster * cltr, struct thread_desc & thrd );
     116void unregister( struct cluster * cltr, struct thread_desc & thrd );
    142117
    143 // Non-Blocking acquire
    144 static inline bool __atomic_try_acquire(volatile bool * ll) {
    145         return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST);
    146 }
    147 
    148 // Release
    149 static inline void __atomic_unlock(volatile bool * ll) {
    150         /* paranoid */ verify(*ll);
    151         __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
    152 }
    153 
    154 //-----------------------------------------------------------------------
    155 // Reader-Writer lock protecting the ready-queues
    156 // while this lock is mostly generic some aspects
    157 // have been hard-coded to for the ready-queue for
    158 // simplicity and performance
    159 struct __scheduler_RWLock_t {
    160         // total cachelines allocated
    161         unsigned int max;
    162 
    163         // cachelines currently in use
    164         volatile unsigned int alloc;
    165 
    166         // cachelines ready to itereate over
    167         // (!= to alloc when thread is in second half of doregister)
    168         volatile unsigned int ready;
    169 
    170         // writer lock
    171         volatile bool lock;
    172 
    173         // data pointer
    174         __scheduler_lock_id_t * data;
    175 };
    176 
    177 void  ?{}(__scheduler_RWLock_t & this);
    178 void ^?{}(__scheduler_RWLock_t & this);
    179 
    180 extern __scheduler_RWLock_t * __scheduler_lock;
    181 
    182 //-----------------------------------------------------------------------
    183 // Reader side : acquire when using the ready queue to schedule but not
    184 //  creating/destroying queues
    185 static inline void ready_schedule_lock(void) with(*__scheduler_lock) {
    186         /* paranoid */ verify( ! __preemption_enabled() );
    187         /* paranoid */ verify( kernelTLS().this_proc_id );
    188 
    189         unsigned iproc = kernelTLS().this_proc_id->id;
    190         /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id);
    191         /*paranoid*/ verify(iproc < ready);
    192 
    193         // Step 1 : make sure no writer are in the middle of the critical section
    194         while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED))
    195                 Pause();
    196 
    197         // Fence needed because we don't want to start trying to acquire the lock
    198         // before we read a false.
    199         // Not needed on x86
    200         // std::atomic_thread_fence(std::memory_order_seq_cst);
    201 
    202         // Step 2 : acquire our local lock
    203         __atomic_acquire( &data[iproc].lock );
    204         /*paranoid*/ verify(data[iproc].lock);
    205 
    206         #ifdef __CFA_WITH_VERIFY__
    207                 // Debug, check if this is owned for reading
    208                 data[iproc].owned = true;
    209         #endif
    210 }
    211 
    212 static inline void ready_schedule_unlock(void) with(*__scheduler_lock) {
    213         /* paranoid */ verify( ! __preemption_enabled() );
    214         /* paranoid */ verify( kernelTLS().this_proc_id );
    215 
    216         unsigned iproc = kernelTLS().this_proc_id->id;
    217         /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id);
    218         /*paranoid*/ verify(iproc < ready);
    219         /*paranoid*/ verify(data[iproc].lock);
    220         /*paranoid*/ verify(data[iproc].owned);
    221         #ifdef __CFA_WITH_VERIFY__
    222                 // Debug, check if this is owned for reading
    223                 data[iproc].owned = false;
    224         #endif
    225         __atomic_unlock(&data[iproc].lock);
    226 }
    227 
    228 #ifdef __CFA_WITH_VERIFY__
    229         static inline bool ready_schedule_islocked(void) {
    230                 /* paranoid */ verify( ! __preemption_enabled() );
    231                 /*paranoid*/ verify( kernelTLS().this_proc_id );
    232                 __processor_id_t * proc = kernelTLS().this_proc_id;
    233                 return __scheduler_lock->data[proc->id].owned;
    234         }
    235 
    236         static inline bool ready_mutate_islocked() {
    237                 return __scheduler_lock->lock;
    238         }
    239 #endif
    240 
    241 //-----------------------------------------------------------------------
    242 // Writer side : acquire when changing the ready queue, e.g. adding more
    243 //  queues or removing them.
    244 uint_fast32_t ready_mutate_lock( void );
    245 
    246 void ready_mutate_unlock( uint_fast32_t /* value returned by lock */ );
    247 
    248 //=======================================================================
    249 // Ready-Queue API
    250 //-----------------------------------------------------------------------
    251 // pop thread from the ready queue of a cluster
    252 // returns 0p if empty
    253 __attribute__((hot)) bool query(struct cluster * cltr);
    254 
    255 //-----------------------------------------------------------------------
    256 // push thread onto a ready queue for a cluster
    257 // returns true if the list was previously empty, false otherwise
    258 __attribute__((hot)) bool push(struct cluster * cltr, struct $thread * thrd);
    259 
    260 //-----------------------------------------------------------------------
    261 // pop thread from the ready queue of a cluster
    262 // returns 0p if empty
    263 // May return 0p spuriously
    264 __attribute__((hot)) struct $thread * pop(struct cluster * cltr);
    265 
    266 //-----------------------------------------------------------------------
    267 // pop thread from the ready queue of a cluster
    268 // returns 0p if empty
    269 // guaranteed to find any threads added before this call
    270 __attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr);
    271 
    272 //-----------------------------------------------------------------------
    273 // remove thread from the ready queue of a cluster
    274 // returns bool if it wasn't found
    275 bool remove_head(struct cluster * cltr, struct $thread * thrd);
    276 
    277 //-----------------------------------------------------------------------
    278 // Increase the width of the ready queue (number of lanes) by 4
    279 void ready_queue_grow  (struct cluster * cltr, int target);
    280 
    281 //-----------------------------------------------------------------------
    282 // Decrease the width of the ready queue (number of lanes) by 4
    283 void ready_queue_shrink(struct cluster * cltr, int target);
    284 
     118void doregister( struct cluster * cltr, struct processor * proc );
     119void unregister( struct cluster * cltr, struct processor * proc );
    285120
    286121// Local Variables: //
Note: See TracChangeset for help on using the changeset viewer.