- File:
-
- 1 edited
-
libcfa/src/concurrency/kernel_private.hfa (modified) (3 diffs)
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/concurrency/kernel_private.hfa
rdca5802 r09f357ec 88 88 // Threads 89 89 extern "C" { 90 forall(dtype T | is_thread(T)) 91 void CtxInvokeThread(T * this); 90 void CtxInvokeThread(void (*main)(void *), void * this); 92 91 } 93 92 … … 101 100 //----------------------------------------------------------------------------- 102 101 // Utils 103 #define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T))))static char storage_##X[sizeof(T)]102 #define KERNEL_STORAGE(T,X) static char storage_##X[sizeof(T)] 104 103 105 104 static inline uint32_t tls_rand() { … … 117 116 void unregister( struct cluster * cltr, struct thread_desc & thrd ); 118 117 119 //======================================================================= 120 // Cluster lock API 121 //======================================================================= 122 struct __attribute__((aligned(64))) __processor_id { 123 processor * volatile handle; 124 volatile bool lock; 125 }; 126 127 // Lock-Free registering/unregistering of threads 128 // Register a processor to a given cluster and get its unique id in return 129 unsigned doregister( struct cluster * cltr, struct processor * proc ); 130 131 // Unregister a processor from a given cluster using its id, getting back the original pointer 132 void unregister( struct cluster * cltr, struct processor * proc ); 133 134 //======================================================================= 135 // Reader-writer lock implementation 136 // Concurrent with doregister/unregister, 137 // i.e., threads can be added at any point during or between the entry/exit 138 139 //----------------------------------------------------------------------- 140 // simple spinlock underlying the RWLock 141 // Blocking acquire 142 static inline void __atomic_acquire(volatile bool * ll) { 143 while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) { 144 while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED)) 145 asm volatile("pause"); 146 } 147 /* paranoid */ verify(*ll); 148 } 149 150 // Non-Blocking acquire 151 static inline bool __atomic_try_acquire(volatile bool * ll) { 152 return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST); 153 } 154 155 // Release 156 static inline void __atomic_unlock(volatile bool * ll) { 157 /* paranoid */ verify(*ll); 158 __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE); 159 } 160 161 //----------------------------------------------------------------------- 162 // Reader side : acquire when using the ready queue to schedule but not 163 // creating/destroying queues 164 static inline void ready_schedule_lock( struct cluster * cltr, struct processor * proc) with(cltr->ready_lock) { 165 unsigned iproc = proc->id; 166 /*paranoid*/ verify(data[iproc].handle == proc); 167 /*paranoid*/ verify(iproc < ready); 168 169 // Step 1 : make sure no writer are in the middle of the critical section 170 while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED)) 171 asm volatile("pause"); 172 173 // Fence needed because we don't want to start trying to acquire the lock 174 // before we read a false. 175 // Not needed on x86 176 // std::atomic_thread_fence(std::memory_order_seq_cst); 177 178 // Step 2 : acquire our local lock 179 __atomic_acquire( &data[iproc].lock ); 180 /*paranoid*/ verify(data[iproc].lock); 181 } 182 183 static inline void ready_schedule_unlock( struct cluster * cltr, struct processor * proc) with(cltr->ready_lock) { 184 unsigned iproc = proc->id; 185 /*paranoid*/ verify(data[iproc].handle == proc); 186 /*paranoid*/ verify(iproc < ready); 187 /*paranoid*/ verify(data[iproc].lock); 188 __atomic_unlock(&data[iproc].lock); 189 } 190 191 //----------------------------------------------------------------------- 192 // Writer side : acquire when changing the ready queue, e.g. adding more 193 // queues or removing them. 194 uint_fast32_t ready_mutate_lock( struct cluster & cltr ); 195 196 void ready_mutate_unlock( struct cluster & cltr, uint_fast32_t /* value returned by lock */ ); 197 198 //======================================================================= 199 // Ready-Queue API 200 //----------------------------------------------------------------------- 201 // push thread onto a ready queue for a cluster 202 // returns true if the list was previously empty, false otherwise 203 __attribute__((hot)) bool push(struct cluster * cltr, struct thread_desc * thrd); 204 205 //----------------------------------------------------------------------- 206 // pop thread from the ready queue of a cluster 207 // returns 0p if empty 208 __attribute__((hot)) thread_desc * pop(struct cluster * cltr); 209 210 //----------------------------------------------------------------------- 211 // Increase the width of the ready queue (number of lanes) by 4 212 void ready_queue_grow (struct cluster * cltr); 213 214 //----------------------------------------------------------------------- 215 // Decrease the width of the ready queue (number of lanes) by 4 216 void ready_queue_shrink(struct cluster * cltr); 217 218 //----------------------------------------------------------------------- 219 // Statics call at the end of each thread to register statistics 220 #if !defined(__CFA_NO_STATISTICS__) 221 void stats_tls_tally(struct cluster * cltr); 222 #else 223 static inline void stats_tls_tally(struct cluster * cltr) {} 224 #endif 118 void doregister( struct cluster * cltr, struct processor * proc ); 119 void unregister( struct cluster * cltr, struct processor * proc ); 225 120 226 121 // Local Variables: //
Note:
See TracChangeset
for help on using the changeset viewer.