Ignore:
File:
1 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/concurrency/kernel_private.hfa

    rdd4e2d7 r13c5e19  
    2020
    2121#include "alarm.hfa"
     22#include "stats.hfa"
     23
     24#include "bits/random.hfa"
    2225
    2326
    2427//-----------------------------------------------------------------------------
    2528// Scheduler
     29
     30struct __attribute__((aligned(128))) __scheduler_lock_id_t;
    2631
    2732extern "C" {
     
    3136}
    3237
    33 void __schedule_thread( $thread * ) __attribute__((nonnull (1)));
     38void __schedule_thread( struct __processor_id_t *, $thread * ) __attribute__((nonnull (2)));
    3439
    3540//Block current thread and release/wake-up the following resources
     
    7378
    7479// KERNEL ONLY unpark with out disabling interrupts
    75 void __unpark( $thread * thrd __cfaabi_dbg_ctx_param2 );
     80void __unpark( struct __processor_id_t *, $thread * thrd __cfaabi_dbg_ctx_param2 );
    7681
    7782//-----------------------------------------------------------------------------
     
    8489//-----------------------------------------------------------------------------
    8590// Utils
    86 #define KERNEL_STORAGE(T,X) static char storage_##X[sizeof(T)]
    87 
    88 static inline uint32_t __tls_rand() {
    89         kernelTLS.rand_seed ^= kernelTLS.rand_seed << 6;
    90         kernelTLS.rand_seed ^= kernelTLS.rand_seed >> 21;
    91         kernelTLS.rand_seed ^= kernelTLS.rand_seed << 7;
    92         return kernelTLS.rand_seed;
     91#define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)]
     92
     93static inline uint64_t __tls_rand() {
     94        // kernelTLS.rand_seed ^= kernelTLS.rand_seed << 6;
     95        // kernelTLS.rand_seed ^= kernelTLS.rand_seed >> 21;
     96        // kernelTLS.rand_seed ^= kernelTLS.rand_seed << 7;
     97        // return kernelTLS.rand_seed;
     98        return __lehmer64( kernelTLS.rand_seed );
    9399}
    94100
     
    100106void unregister( struct cluster * cltr, struct $thread & thrd );
    101107
    102 void doregister( struct cluster * cltr, struct processor * proc );
    103 void unregister( struct cluster * cltr, struct processor * proc );
     108//=======================================================================
     109// Cluster lock API
     110//=======================================================================
     111// Cells use by the reader writer lock
     112// while not generic it only relies on a opaque pointer
     113struct __attribute__((aligned(128))) __scheduler_lock_id_t {
     114        // Spin lock used as the underlying lock
     115        volatile bool lock;
     116
     117        // Handle pointing to the proc owning this cell
     118        // Used for allocating cells and debugging
     119        __processor_id_t * volatile handle;
     120
     121        #ifdef __CFA_WITH_VERIFY__
     122                // Debug, check if this is owned for reading
     123                bool owned;
     124        #endif
     125};
     126
     127static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t));
     128
     129// Lock-Free registering/unregistering of threads
     130// Register a processor to a given cluster and get its unique id in return
     131unsigned doregister( struct __processor_id_t * proc );
     132
     133// Unregister a processor from a given cluster using its id, getting back the original pointer
     134void     unregister( struct __processor_id_t * proc );
     135
     136//=======================================================================
     137// Reader-writer lock implementation
     138// Concurrent with doregister/unregister,
     139//    i.e., threads can be added at any point during or between the entry/exit
     140
     141//-----------------------------------------------------------------------
     142// simple spinlock underlying the RWLock
     143// Blocking acquire
     144static inline void __atomic_acquire(volatile bool * ll) {
     145        while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) {
     146                while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED))
     147                        asm volatile("pause");
     148        }
     149        /* paranoid */ verify(*ll);
     150}
     151
     152// Non-Blocking acquire
     153static inline bool __atomic_try_acquire(volatile bool * ll) {
     154        return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST);
     155}
     156
     157// Release
     158static inline void __atomic_unlock(volatile bool * ll) {
     159        /* paranoid */ verify(*ll);
     160        __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
     161}
     162
     163//-----------------------------------------------------------------------
     164// Reader-Writer lock protecting the ready-queues
     165// while this lock is mostly generic some aspects
     166// have been hard-coded to for the ready-queue for
     167// simplicity and performance
     168struct __scheduler_RWLock_t {
     169        // total cachelines allocated
     170        unsigned int max;
     171
     172        // cachelines currently in use
     173        volatile unsigned int alloc;
     174
     175        // cachelines ready to itereate over
     176        // (!= to alloc when thread is in second half of doregister)
     177        volatile unsigned int ready;
     178
     179        // writer lock
     180        volatile bool lock;
     181
     182        // data pointer
     183        __scheduler_lock_id_t * data;
     184};
     185
     186void  ?{}(__scheduler_RWLock_t & this);
     187void ^?{}(__scheduler_RWLock_t & this);
     188
     189extern __scheduler_RWLock_t * __scheduler_lock;
     190
     191//-----------------------------------------------------------------------
     192// Reader side : acquire when using the ready queue to schedule but not
     193//  creating/destroying queues
     194static inline void ready_schedule_lock( struct __processor_id_t * proc) with(*__scheduler_lock) {
     195        unsigned iproc = proc->id;
     196        /*paranoid*/ verify(data[iproc].handle == proc);
     197        /*paranoid*/ verify(iproc < ready);
     198
     199        // Step 1 : make sure no writer are in the middle of the critical section
     200        while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED))
     201                asm volatile("pause");
     202
     203        // Fence needed because we don't want to start trying to acquire the lock
     204        // before we read a false.
     205        // Not needed on x86
     206        // std::atomic_thread_fence(std::memory_order_seq_cst);
     207
     208        // Step 2 : acquire our local lock
     209        __atomic_acquire( &data[iproc].lock );
     210        /*paranoid*/ verify(data[iproc].lock);
     211
     212        #ifdef __CFA_WITH_VERIFY__
     213                // Debug, check if this is owned for reading
     214                data[iproc].owned = true;
     215        #endif
     216}
     217
     218static inline void ready_schedule_unlock( struct __processor_id_t * proc) with(*__scheduler_lock) {
     219        unsigned iproc = proc->id;
     220        /*paranoid*/ verify(data[iproc].handle == proc);
     221        /*paranoid*/ verify(iproc < ready);
     222        /*paranoid*/ verify(data[iproc].lock);
     223        /*paranoid*/ verify(data[iproc].owned);
     224        #ifdef __CFA_WITH_VERIFY__
     225                // Debug, check if this is owned for reading
     226                data[iproc].owned = false;
     227        #endif
     228        __atomic_unlock(&data[iproc].lock);
     229}
     230
     231#ifdef __CFA_WITH_VERIFY__
     232        static inline bool ready_schedule_islocked( struct __processor_id_t * proc) {
     233                return __scheduler_lock->data[proc->id].owned;
     234        }
     235
     236        static inline bool ready_mutate_islocked() {
     237                return __scheduler_lock->lock;
     238        }
     239#endif
     240
     241//-----------------------------------------------------------------------
     242// Writer side : acquire when changing the ready queue, e.g. adding more
     243//  queues or removing them.
     244uint_fast32_t ready_mutate_lock( void );
     245
     246void ready_mutate_unlock( uint_fast32_t /* value returned by lock */ );
     247
     248//=======================================================================
     249// Ready-Queue API
     250//-----------------------------------------------------------------------
     251// pop thread from the ready queue of a cluster
     252// returns 0p if empty
     253__attribute__((hot)) bool query(struct cluster * cltr);
     254
     255//-----------------------------------------------------------------------
     256// push thread onto a ready queue for a cluster
     257// returns true if the list was previously empty, false otherwise
     258__attribute__((hot)) bool push(struct cluster * cltr, struct $thread * thrd);
     259
     260//-----------------------------------------------------------------------
     261// pop thread from the ready queue of a cluster
     262// returns 0p if empty
     263__attribute__((hot)) struct $thread * pop(struct cluster * cltr);
     264
     265//-----------------------------------------------------------------------
     266// remove thread from the ready queue of a cluster
     267// returns bool if it wasn't found
     268bool remove_head(struct cluster * cltr, struct $thread * thrd);
     269
     270//-----------------------------------------------------------------------
     271// Increase the width of the ready queue (number of lanes) by 4
     272void ready_queue_grow  (struct cluster * cltr);
     273
     274//-----------------------------------------------------------------------
     275// Decrease the width of the ready queue (number of lanes) by 4
     276void ready_queue_shrink(struct cluster * cltr);
     277
     278//-----------------------------------------------------------------------
     279// Statics call at the end of each thread to register statistics
     280#if !defined(__CFA_NO_STATISTICS__)
     281static inline struct __stats_t * __tls_stats() {
     282        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     283        /* paranoid */ verify( kernelTLS.this_stats );
     284        return kernelTLS.this_stats;
     285}
     286#endif
    104287
    105288// Local Variables: //
Note: See TracChangeset for help on using the changeset viewer.