Ignore:
File:
1 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/concurrency/kernel_private.hfa

    r09f357ec rdca5802  
    8888// Threads
    8989extern "C" {
    90       void CtxInvokeThread(void (*main)(void *), void * this);
     90      forall(dtype T | is_thread(T))
     91      void CtxInvokeThread(T * this);
    9192}
    9293
     
    100101//-----------------------------------------------------------------------------
    101102// Utils
    102 #define KERNEL_STORAGE(T,X) static char storage_##X[sizeof(T)]
     103#define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)]
    103104
    104105static inline uint32_t tls_rand() {
     
    116117void unregister( struct cluster * cltr, struct thread_desc & thrd );
    117118
    118 void doregister( struct cluster * cltr, struct processor * proc );
    119 void unregister( struct cluster * cltr, struct processor * proc );
     119//=======================================================================
     120// Cluster lock API
     121//=======================================================================
     122struct __attribute__((aligned(64))) __processor_id {
     123        processor * volatile handle;
     124        volatile bool lock;
     125};
     126
     127// Lock-Free registering/unregistering of threads
     128// Register a processor to a given cluster and get its unique id in return
     129unsigned doregister( struct cluster * cltr, struct processor * proc );
     130
     131// Unregister a processor from a given cluster using its id, getting back the original pointer
     132void     unregister( struct cluster * cltr, struct processor * proc );
     133
     134//=======================================================================
     135// Reader-writer lock implementation
     136// Concurrent with doregister/unregister,
     137//    i.e., threads can be added at any point during or between the entry/exit
     138
     139//-----------------------------------------------------------------------
     140// simple spinlock underlying the RWLock
     141// Blocking acquire
     142static inline void __atomic_acquire(volatile bool * ll) {
     143        while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) {
     144                while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED))
     145                        asm volatile("pause");
     146        }
     147        /* paranoid */ verify(*ll);
     148}
     149
     150// Non-Blocking acquire
     151static inline bool __atomic_try_acquire(volatile bool * ll) {
     152        return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST);
     153}
     154
     155// Release
     156static inline void __atomic_unlock(volatile bool * ll) {
     157        /* paranoid */ verify(*ll);
     158        __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
     159}
     160
     161//-----------------------------------------------------------------------
     162// Reader side : acquire when using the ready queue to schedule but not
     163//  creating/destroying queues
     164static inline void ready_schedule_lock( struct cluster * cltr, struct processor * proc) with(cltr->ready_lock) {
     165        unsigned iproc = proc->id;
     166        /*paranoid*/ verify(data[iproc].handle == proc);
     167        /*paranoid*/ verify(iproc < ready);
     168
     169        // Step 1 : make sure no writer are in the middle of the critical section
     170        while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED))
     171                asm volatile("pause");
     172
     173        // Fence needed because we don't want to start trying to acquire the lock
     174        // before we read a false.
     175        // Not needed on x86
     176        // std::atomic_thread_fence(std::memory_order_seq_cst);
     177
     178        // Step 2 : acquire our local lock
     179        __atomic_acquire( &data[iproc].lock );
     180        /*paranoid*/ verify(data[iproc].lock);
     181}
     182
     183static inline void ready_schedule_unlock( struct cluster * cltr, struct processor * proc) with(cltr->ready_lock) {
     184        unsigned iproc = proc->id;
     185        /*paranoid*/ verify(data[iproc].handle == proc);
     186        /*paranoid*/ verify(iproc < ready);
     187        /*paranoid*/ verify(data[iproc].lock);
     188        __atomic_unlock(&data[iproc].lock);
     189}
     190
     191//-----------------------------------------------------------------------
     192// Writer side : acquire when changing the ready queue, e.g. adding more
     193//  queues or removing them.
     194uint_fast32_t ready_mutate_lock( struct cluster & cltr );
     195
     196void ready_mutate_unlock( struct cluster & cltr, uint_fast32_t /* value returned by lock */ );
     197
     198//=======================================================================
     199// Ready-Queue API
     200//-----------------------------------------------------------------------
     201// push thread onto a ready queue for a cluster
     202// returns true if the list was previously empty, false otherwise
     203__attribute__((hot)) bool push(struct cluster * cltr, struct thread_desc * thrd);
     204
     205//-----------------------------------------------------------------------
     206// pop thread from the ready queue of a cluster
     207// returns 0p if empty
     208__attribute__((hot)) thread_desc * pop(struct cluster * cltr);
     209
     210//-----------------------------------------------------------------------
     211// Increase the width of the ready queue (number of lanes) by 4
     212void ready_queue_grow  (struct cluster * cltr);
     213
     214//-----------------------------------------------------------------------
     215// Decrease the width of the ready queue (number of lanes) by 4
     216void ready_queue_shrink(struct cluster * cltr);
     217
     218//-----------------------------------------------------------------------
     219// Statics call at the end of each thread to register statistics
     220#if !defined(__CFA_NO_STATISTICS__)
     221void stats_tls_tally(struct cluster * cltr);
     222#else
     223static inline void stats_tls_tally(struct cluster * cltr) {}
     224#endif
    120225
    121226// Local Variables: //
Note: See TracChangeset for help on using the changeset viewer.