Ignore:
File:
1 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/concurrency/kernel_private.hfa

    rdca5802 r09f357ec  
    8888// Threads
    8989extern "C" {
    90       forall(dtype T | is_thread(T))
    91       void CtxInvokeThread(T * this);
     90      void CtxInvokeThread(void (*main)(void *), void * this);
    9291}
    9392
     
    101100//-----------------------------------------------------------------------------
    102101// Utils
    103 #define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)]
     102#define KERNEL_STORAGE(T,X) static char storage_##X[sizeof(T)]
    104103
    105104static inline uint32_t tls_rand() {
     
    117116void unregister( struct cluster * cltr, struct thread_desc & thrd );
    118117
    119 //=======================================================================
    120 // Cluster lock API
    121 //=======================================================================
    122 struct __attribute__((aligned(64))) __processor_id {
    123         processor * volatile handle;
    124         volatile bool lock;
    125 };
    126 
    127 // Lock-Free registering/unregistering of threads
    128 // Register a processor to a given cluster and get its unique id in return
    129 unsigned doregister( struct cluster * cltr, struct processor * proc );
    130 
    131 // Unregister a processor from a given cluster using its id, getting back the original pointer
    132 void     unregister( struct cluster * cltr, struct processor * proc );
    133 
    134 //=======================================================================
    135 // Reader-writer lock implementation
    136 // Concurrent with doregister/unregister,
    137 //    i.e., threads can be added at any point during or between the entry/exit
    138 
    139 //-----------------------------------------------------------------------
    140 // simple spinlock underlying the RWLock
    141 // Blocking acquire
    142 static inline void __atomic_acquire(volatile bool * ll) {
    143         while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) {
    144                 while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED))
    145                         asm volatile("pause");
    146         }
    147         /* paranoid */ verify(*ll);
    148 }
    149 
    150 // Non-Blocking acquire
    151 static inline bool __atomic_try_acquire(volatile bool * ll) {
    152         return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST);
    153 }
    154 
    155 // Release
    156 static inline void __atomic_unlock(volatile bool * ll) {
    157         /* paranoid */ verify(*ll);
    158         __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
    159 }
    160 
    161 //-----------------------------------------------------------------------
    162 // Reader side : acquire when using the ready queue to schedule but not
    163 //  creating/destroying queues
    164 static inline void ready_schedule_lock( struct cluster * cltr, struct processor * proc) with(cltr->ready_lock) {
    165         unsigned iproc = proc->id;
    166         /*paranoid*/ verify(data[iproc].handle == proc);
    167         /*paranoid*/ verify(iproc < ready);
    168 
    169         // Step 1 : make sure no writer are in the middle of the critical section
    170         while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED))
    171                 asm volatile("pause");
    172 
    173         // Fence needed because we don't want to start trying to acquire the lock
    174         // before we read a false.
    175         // Not needed on x86
    176         // std::atomic_thread_fence(std::memory_order_seq_cst);
    177 
    178         // Step 2 : acquire our local lock
    179         __atomic_acquire( &data[iproc].lock );
    180         /*paranoid*/ verify(data[iproc].lock);
    181 }
    182 
    183 static inline void ready_schedule_unlock( struct cluster * cltr, struct processor * proc) with(cltr->ready_lock) {
    184         unsigned iproc = proc->id;
    185         /*paranoid*/ verify(data[iproc].handle == proc);
    186         /*paranoid*/ verify(iproc < ready);
    187         /*paranoid*/ verify(data[iproc].lock);
    188         __atomic_unlock(&data[iproc].lock);
    189 }
    190 
    191 //-----------------------------------------------------------------------
    192 // Writer side : acquire when changing the ready queue, e.g. adding more
    193 //  queues or removing them.
    194 uint_fast32_t ready_mutate_lock( struct cluster & cltr );
    195 
    196 void ready_mutate_unlock( struct cluster & cltr, uint_fast32_t /* value returned by lock */ );
    197 
    198 //=======================================================================
    199 // Ready-Queue API
    200 //-----------------------------------------------------------------------
    201 // push thread onto a ready queue for a cluster
    202 // returns true if the list was previously empty, false otherwise
    203 __attribute__((hot)) bool push(struct cluster * cltr, struct thread_desc * thrd);
    204 
    205 //-----------------------------------------------------------------------
    206 // pop thread from the ready queue of a cluster
    207 // returns 0p if empty
    208 __attribute__((hot)) thread_desc * pop(struct cluster * cltr);
    209 
    210 //-----------------------------------------------------------------------
    211 // Increase the width of the ready queue (number of lanes) by 4
    212 void ready_queue_grow  (struct cluster * cltr);
    213 
    214 //-----------------------------------------------------------------------
    215 // Decrease the width of the ready queue (number of lanes) by 4
    216 void ready_queue_shrink(struct cluster * cltr);
    217 
    218 //-----------------------------------------------------------------------
    219 // Statics call at the end of each thread to register statistics
    220 #if !defined(__CFA_NO_STATISTICS__)
    221 void stats_tls_tally(struct cluster * cltr);
    222 #else
    223 static inline void stats_tls_tally(struct cluster * cltr) {}
    224 #endif
     118void doregister( struct cluster * cltr, struct processor * proc );
     119void unregister( struct cluster * cltr, struct processor * proc );
    225120
    226121// Local Variables: //
Note: See TracChangeset for help on using the changeset viewer.