Ignore:
Timestamp:
Jan 7, 2021, 2:55:57 PM (5 years ago)
Author:
Thierry Delisle <tdelisle@…>
Branches:
ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast-unique-expr, pthread-emulation, qualifiedEnum
Children:
58fe85a
Parents:
bdfc032 (diff), 44e37ef (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'master' into dkobets-vector

File:
1 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/concurrency/kernel_private.hfa

    rbdfc032 reef8dfb  
    1010// Created On       : Mon Feb 13 12:27:26 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Sat Nov 30 19:25:02 2019
    13 // Update Count     : 8
     12// Last Modified On : Wed Aug 12 08:21:33 2020
     13// Update Count     : 9
    1414//
    1515
     
    2020
    2121#include "alarm.hfa"
    22 
     22#include "stats.hfa"
    2323
    2424//-----------------------------------------------------------------------------
    2525// Scheduler
     26
     27struct __attribute__((aligned(128))) __scheduler_lock_id_t;
    2628
    2729extern "C" {
     
    3133}
    3234
    33 void ScheduleThread( thread_desc * );
    34 static inline void WakeThread( thread_desc * thrd ) {
    35         if( !thrd ) return;
    36 
    37         verify(thrd->state == Inactive);
    38 
    39         disable_interrupts();
    40         ScheduleThread( thrd );
    41         enable_interrupts( __cfaabi_dbg_ctx );
    42 }
    43 thread_desc * nextThread(cluster * this);
    44 
    45 //Block current thread and release/wake-up the following resources
    46 void BlockInternal(void);
    47 void BlockInternal(__spinlock_t * lock);
    48 void BlockInternal(thread_desc * thrd);
    49 void BlockInternal(__spinlock_t * lock, thread_desc * thrd);
    50 void BlockInternal(__spinlock_t * locks [], unsigned short count);
    51 void BlockInternal(__spinlock_t * locks [], unsigned short count, thread_desc * thrds [], unsigned short thrd_count);
    52 void BlockInternal(__finish_callback_fptr_t callback);
    53 void LeaveThread(__spinlock_t * lock, thread_desc * thrd);
     35void __schedule_thread( $thread * )
     36#if defined(NDEBUG) || (!defined(__CFA_DEBUG__) && !defined(__CFA_VERIFY__))
     37        __attribute__((nonnull (1)))
     38#endif
     39;
     40
     41extern bool __preemption_enabled();
     42
     43//release/wake-up the following resources
     44void __thread_finish( $thread * thrd );
    5445
    5546//-----------------------------------------------------------------------------
     
    5748void main(processorCtx_t *);
    5849
    59 void * create_pthread( pthread_t *, void * (*)(void *), void * );
    60 
    61 static inline void wake_fast(processor * this) {
    62         __cfaabi_dbg_print_safe("Kernel : Waking up processor %p\n", this);
    63         post( this->idleLock );
    64 }
    65 
    66 static inline void wake(processor * this) {
    67         disable_interrupts();
    68         wake_fast(this);
    69         enable_interrupts( __cfaabi_dbg_ctx );
    70 }
    71 
    72 struct event_kernel_t {
    73         alarm_list_t alarms;
    74         __spinlock_t lock;
    75 };
    76 
    77 extern event_kernel_t * event_kernel;
    78 
    79 struct __cfa_kernel_preemption_state_t {
    80         bool enabled;
    81         bool in_progress;
    82         unsigned short disable_count;
    83 };
    84 
    85 extern volatile thread_local __cfa_kernel_preemption_state_t preemption_state __attribute__ ((tls_model ( "initial-exec" )));
     50void * __create_pthread( pthread_t *, void * (*)(void *), void * );
     51void __destroy_pthread( pthread_t pthread, void * stack, void ** retval );
     52
     53
     54
     55extern cluster * mainCluster;
    8656
    8757//-----------------------------------------------------------------------------
    8858// Threads
    8959extern "C" {
    90       void CtxInvokeThread(void (*main)(void *), void * this);
    91 }
    92 
    93 extern void ThreadCtxSwitch(coroutine_desc * src, coroutine_desc * dst);
     60      void __cfactx_invoke_thread(void (*main)(void *), void * this);
     61}
    9462
    9563__cfaabi_dbg_debug_do(
    96         extern void __cfaabi_dbg_thread_register  ( thread_desc * thrd );
    97         extern void __cfaabi_dbg_thread_unregister( thread_desc * thrd );
     64        extern void __cfaabi_dbg_thread_register  ( $thread * thrd );
     65        extern void __cfaabi_dbg_thread_unregister( $thread * thrd );
    9866)
    9967
     68#define TICKET_BLOCKED (-1) // thread is blocked
     69#define TICKET_RUNNING ( 0) // thread is running
     70#define TICKET_UNBLOCK ( 1) // thread should ignore next block
     71
    10072//-----------------------------------------------------------------------------
    10173// Utils
    102 #define KERNEL_STORAGE(T,X) static char storage_##X[sizeof(T)]
    103 
    104 static inline uint32_t tls_rand() {
    105         kernelTLS.rand_seed ^= kernelTLS.rand_seed << 6;
    106         kernelTLS.rand_seed ^= kernelTLS.rand_seed >> 21;
    107         kernelTLS.rand_seed ^= kernelTLS.rand_seed << 7;
    108         return kernelTLS.rand_seed;
    109 }
    110 
    111 
    112 void doregister( struct cluster & cltr );
    113 void unregister( struct cluster & cltr );
    114 
    115 void doregister( struct cluster * cltr, struct thread_desc & thrd );
    116 void unregister( struct cluster * cltr, struct thread_desc & thrd );
    117 
    118 void doregister( struct cluster * cltr, struct processor * proc );
    119 void unregister( struct cluster * cltr, struct processor * proc );
     74void doregister( struct cluster * cltr, struct $thread & thrd );
     75void unregister( struct cluster * cltr, struct $thread & thrd );
     76
     77//-----------------------------------------------------------------------------
     78// I/O
     79void ^?{}(io_context & this, bool );
     80
     81//=======================================================================
     82// Cluster lock API
     83//=======================================================================
     84// Cells use by the reader writer lock
     85// while not generic it only relies on a opaque pointer
     86struct __attribute__((aligned(128))) __scheduler_lock_id_t {
     87        // Spin lock used as the underlying lock
     88        volatile bool lock;
     89
     90        // Handle pointing to the proc owning this cell
     91        // Used for allocating cells and debugging
     92        __processor_id_t * volatile handle;
     93
     94        #ifdef __CFA_WITH_VERIFY__
     95                // Debug, check if this is owned for reading
     96                bool owned;
     97        #endif
     98};
     99
     100static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t));
     101
     102// Lock-Free registering/unregistering of threads
     103// Register a processor to a given cluster and get its unique id in return
     104unsigned doregister( struct __processor_id_t * proc );
     105
     106// Unregister a processor from a given cluster using its id, getting back the original pointer
     107void     unregister( struct __processor_id_t * proc );
     108
     109//-----------------------------------------------------------------------
     110// Cluster idle lock/unlock
     111static inline void lock(__cluster_idles & this) {
     112        for() {
     113                uint64_t l = this.lock;
     114                if(
     115                        (0 == (l % 2))
     116                        && __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
     117                ) return;
     118                Pause();
     119        }
     120}
     121
     122static inline void unlock(__cluster_idles & this) {
     123        /* paranoid */ verify( 1 == (this.lock % 2) );
     124        __atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST );
     125}
     126
     127//=======================================================================
     128// Reader-writer lock implementation
     129// Concurrent with doregister/unregister,
     130//    i.e., threads can be added at any point during or between the entry/exit
     131
     132//-----------------------------------------------------------------------
     133// simple spinlock underlying the RWLock
     134// Blocking acquire
     135static inline void __atomic_acquire(volatile bool * ll) {
     136        while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) {
     137                while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED))
     138                        Pause();
     139        }
     140        /* paranoid */ verify(*ll);
     141}
     142
     143// Non-Blocking acquire
     144static inline bool __atomic_try_acquire(volatile bool * ll) {
     145        return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST);
     146}
     147
     148// Release
     149static inline void __atomic_unlock(volatile bool * ll) {
     150        /* paranoid */ verify(*ll);
     151        __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
     152}
     153
     154//-----------------------------------------------------------------------
     155// Reader-Writer lock protecting the ready-queues
     156// while this lock is mostly generic some aspects
     157// have been hard-coded to for the ready-queue for
     158// simplicity and performance
     159struct __scheduler_RWLock_t {
     160        // total cachelines allocated
     161        unsigned int max;
     162
     163        // cachelines currently in use
     164        volatile unsigned int alloc;
     165
     166        // cachelines ready to itereate over
     167        // (!= to alloc when thread is in second half of doregister)
     168        volatile unsigned int ready;
     169
     170        // writer lock
     171        volatile bool lock;
     172
     173        // data pointer
     174        __scheduler_lock_id_t * data;
     175};
     176
     177void  ?{}(__scheduler_RWLock_t & this);
     178void ^?{}(__scheduler_RWLock_t & this);
     179
     180extern __scheduler_RWLock_t * __scheduler_lock;
     181
     182//-----------------------------------------------------------------------
     183// Reader side : acquire when using the ready queue to schedule but not
     184//  creating/destroying queues
     185static inline void ready_schedule_lock(void) with(*__scheduler_lock) {
     186        /* paranoid */ verify( ! __preemption_enabled() );
     187        /* paranoid */ verify( kernelTLS().this_proc_id );
     188
     189        unsigned iproc = kernelTLS().this_proc_id->id;
     190        /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id);
     191        /*paranoid*/ verify(iproc < ready);
     192
     193        // Step 1 : make sure no writer are in the middle of the critical section
     194        while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED))
     195                Pause();
     196
     197        // Fence needed because we don't want to start trying to acquire the lock
     198        // before we read a false.
     199        // Not needed on x86
     200        // std::atomic_thread_fence(std::memory_order_seq_cst);
     201
     202        // Step 2 : acquire our local lock
     203        __atomic_acquire( &data[iproc].lock );
     204        /*paranoid*/ verify(data[iproc].lock);
     205
     206        #ifdef __CFA_WITH_VERIFY__
     207                // Debug, check if this is owned for reading
     208                data[iproc].owned = true;
     209        #endif
     210}
     211
     212static inline void ready_schedule_unlock(void) with(*__scheduler_lock) {
     213        /* paranoid */ verify( ! __preemption_enabled() );
     214        /* paranoid */ verify( kernelTLS().this_proc_id );
     215
     216        unsigned iproc = kernelTLS().this_proc_id->id;
     217        /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id);
     218        /*paranoid*/ verify(iproc < ready);
     219        /*paranoid*/ verify(data[iproc].lock);
     220        /*paranoid*/ verify(data[iproc].owned);
     221        #ifdef __CFA_WITH_VERIFY__
     222                // Debug, check if this is owned for reading
     223                data[iproc].owned = false;
     224        #endif
     225        __atomic_unlock(&data[iproc].lock);
     226}
     227
     228#ifdef __CFA_WITH_VERIFY__
     229        static inline bool ready_schedule_islocked(void) {
     230                /* paranoid */ verify( ! __preemption_enabled() );
     231                /*paranoid*/ verify( kernelTLS().this_proc_id );
     232                __processor_id_t * proc = kernelTLS().this_proc_id;
     233                return __scheduler_lock->data[proc->id].owned;
     234        }
     235
     236        static inline bool ready_mutate_islocked() {
     237                return __scheduler_lock->lock;
     238        }
     239#endif
     240
     241//-----------------------------------------------------------------------
     242// Writer side : acquire when changing the ready queue, e.g. adding more
     243//  queues or removing them.
     244uint_fast32_t ready_mutate_lock( void );
     245
     246void ready_mutate_unlock( uint_fast32_t /* value returned by lock */ );
     247
     248//=======================================================================
     249// Ready-Queue API
     250//-----------------------------------------------------------------------
     251// pop thread from the ready queue of a cluster
     252// returns 0p if empty
     253__attribute__((hot)) bool query(struct cluster * cltr);
     254
     255//-----------------------------------------------------------------------
     256// push thread onto a ready queue for a cluster
     257// returns true if the list was previously empty, false otherwise
     258__attribute__((hot)) bool push(struct cluster * cltr, struct $thread * thrd);
     259
     260//-----------------------------------------------------------------------
     261// pop thread from the ready queue of a cluster
     262// returns 0p if empty
     263// May return 0p spuriously
     264__attribute__((hot)) struct $thread * pop(struct cluster * cltr);
     265
     266//-----------------------------------------------------------------------
     267// pop thread from the ready queue of a cluster
     268// returns 0p if empty
     269// guaranteed to find any threads added before this call
     270__attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr);
     271
     272//-----------------------------------------------------------------------
     273// remove thread from the ready queue of a cluster
     274// returns bool if it wasn't found
     275bool remove_head(struct cluster * cltr, struct $thread * thrd);
     276
     277//-----------------------------------------------------------------------
     278// Increase the width of the ready queue (number of lanes) by 4
     279void ready_queue_grow  (struct cluster * cltr, int target);
     280
     281//-----------------------------------------------------------------------
     282// Decrease the width of the ready queue (number of lanes) by 4
     283void ready_queue_shrink(struct cluster * cltr, int target);
     284
    120285
    121286// Local Variables: //
Note: See TracChangeset for help on using the changeset viewer.