- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/concurrency/kernel_private.hfa
rdd4e2d7 r13c5e19 20 20 21 21 #include "alarm.hfa" 22 #include "stats.hfa" 23 24 #include "bits/random.hfa" 22 25 23 26 24 27 //----------------------------------------------------------------------------- 25 28 // Scheduler 29 30 struct __attribute__((aligned(128))) __scheduler_lock_id_t; 26 31 27 32 extern "C" { … … 31 36 } 32 37 33 void __schedule_thread( $thread * ) __attribute__((nonnull (1)));38 void __schedule_thread( struct __processor_id_t *, $thread * ) __attribute__((nonnull (2))); 34 39 35 40 //Block current thread and release/wake-up the following resources … … 73 78 74 79 // KERNEL ONLY unpark with out disabling interrupts 75 void __unpark( $thread * thrd __cfaabi_dbg_ctx_param2 );80 void __unpark( struct __processor_id_t *, $thread * thrd __cfaabi_dbg_ctx_param2 ); 76 81 77 82 //----------------------------------------------------------------------------- … … 84 89 //----------------------------------------------------------------------------- 85 90 // Utils 86 #define KERNEL_STORAGE(T,X) static char storage_##X[sizeof(T)] 87 88 static inline uint32_t __tls_rand() { 89 kernelTLS.rand_seed ^= kernelTLS.rand_seed << 6; 90 kernelTLS.rand_seed ^= kernelTLS.rand_seed >> 21; 91 kernelTLS.rand_seed ^= kernelTLS.rand_seed << 7; 92 return kernelTLS.rand_seed; 91 #define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)] 92 93 static inline uint64_t __tls_rand() { 94 // kernelTLS.rand_seed ^= kernelTLS.rand_seed << 6; 95 // kernelTLS.rand_seed ^= kernelTLS.rand_seed >> 21; 96 // kernelTLS.rand_seed ^= kernelTLS.rand_seed << 7; 97 // return kernelTLS.rand_seed; 98 return __lehmer64( kernelTLS.rand_seed ); 93 99 } 94 100 … … 100 106 void unregister( struct cluster * cltr, struct $thread & thrd ); 101 107 102 void doregister( struct cluster * cltr, struct processor * proc ); 103 void unregister( struct cluster * cltr, struct processor * proc ); 108 //======================================================================= 109 // Cluster lock API 110 //======================================================================= 111 // Cells use by the reader writer lock 112 // while not generic it only relies on a opaque pointer 113 struct __attribute__((aligned(128))) __scheduler_lock_id_t { 114 // Spin lock used as the underlying lock 115 volatile bool lock; 116 117 // Handle pointing to the proc owning this cell 118 // Used for allocating cells and debugging 119 __processor_id_t * volatile handle; 120 121 #ifdef __CFA_WITH_VERIFY__ 122 // Debug, check if this is owned for reading 123 bool owned; 124 #endif 125 }; 126 127 static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t)); 128 129 // Lock-Free registering/unregistering of threads 130 // Register a processor to a given cluster and get its unique id in return 131 unsigned doregister( struct __processor_id_t * proc ); 132 133 // Unregister a processor from a given cluster using its id, getting back the original pointer 134 void unregister( struct __processor_id_t * proc ); 135 136 //======================================================================= 137 // Reader-writer lock implementation 138 // Concurrent with doregister/unregister, 139 // i.e., threads can be added at any point during or between the entry/exit 140 141 //----------------------------------------------------------------------- 142 // simple spinlock underlying the RWLock 143 // Blocking acquire 144 static inline void __atomic_acquire(volatile bool * ll) { 145 while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) { 146 while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED)) 147 asm volatile("pause"); 148 } 149 /* paranoid */ verify(*ll); 150 } 151 152 // Non-Blocking acquire 153 static inline bool __atomic_try_acquire(volatile bool * ll) { 154 return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST); 155 } 156 157 // Release 158 static inline void __atomic_unlock(volatile bool * ll) { 159 /* paranoid */ verify(*ll); 160 __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE); 161 } 162 163 //----------------------------------------------------------------------- 164 // Reader-Writer lock protecting the ready-queues 165 // while this lock is mostly generic some aspects 166 // have been hard-coded to for the ready-queue for 167 // simplicity and performance 168 struct __scheduler_RWLock_t { 169 // total cachelines allocated 170 unsigned int max; 171 172 // cachelines currently in use 173 volatile unsigned int alloc; 174 175 // cachelines ready to itereate over 176 // (!= to alloc when thread is in second half of doregister) 177 volatile unsigned int ready; 178 179 // writer lock 180 volatile bool lock; 181 182 // data pointer 183 __scheduler_lock_id_t * data; 184 }; 185 186 void ?{}(__scheduler_RWLock_t & this); 187 void ^?{}(__scheduler_RWLock_t & this); 188 189 extern __scheduler_RWLock_t * __scheduler_lock; 190 191 //----------------------------------------------------------------------- 192 // Reader side : acquire when using the ready queue to schedule but not 193 // creating/destroying queues 194 static inline void ready_schedule_lock( struct __processor_id_t * proc) with(*__scheduler_lock) { 195 unsigned iproc = proc->id; 196 /*paranoid*/ verify(data[iproc].handle == proc); 197 /*paranoid*/ verify(iproc < ready); 198 199 // Step 1 : make sure no writer are in the middle of the critical section 200 while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED)) 201 asm volatile("pause"); 202 203 // Fence needed because we don't want to start trying to acquire the lock 204 // before we read a false. 205 // Not needed on x86 206 // std::atomic_thread_fence(std::memory_order_seq_cst); 207 208 // Step 2 : acquire our local lock 209 __atomic_acquire( &data[iproc].lock ); 210 /*paranoid*/ verify(data[iproc].lock); 211 212 #ifdef __CFA_WITH_VERIFY__ 213 // Debug, check if this is owned for reading 214 data[iproc].owned = true; 215 #endif 216 } 217 218 static inline void ready_schedule_unlock( struct __processor_id_t * proc) with(*__scheduler_lock) { 219 unsigned iproc = proc->id; 220 /*paranoid*/ verify(data[iproc].handle == proc); 221 /*paranoid*/ verify(iproc < ready); 222 /*paranoid*/ verify(data[iproc].lock); 223 /*paranoid*/ verify(data[iproc].owned); 224 #ifdef __CFA_WITH_VERIFY__ 225 // Debug, check if this is owned for reading 226 data[iproc].owned = false; 227 #endif 228 __atomic_unlock(&data[iproc].lock); 229 } 230 231 #ifdef __CFA_WITH_VERIFY__ 232 static inline bool ready_schedule_islocked( struct __processor_id_t * proc) { 233 return __scheduler_lock->data[proc->id].owned; 234 } 235 236 static inline bool ready_mutate_islocked() { 237 return __scheduler_lock->lock; 238 } 239 #endif 240 241 //----------------------------------------------------------------------- 242 // Writer side : acquire when changing the ready queue, e.g. adding more 243 // queues or removing them. 244 uint_fast32_t ready_mutate_lock( void ); 245 246 void ready_mutate_unlock( uint_fast32_t /* value returned by lock */ ); 247 248 //======================================================================= 249 // Ready-Queue API 250 //----------------------------------------------------------------------- 251 // pop thread from the ready queue of a cluster 252 // returns 0p if empty 253 __attribute__((hot)) bool query(struct cluster * cltr); 254 255 //----------------------------------------------------------------------- 256 // push thread onto a ready queue for a cluster 257 // returns true if the list was previously empty, false otherwise 258 __attribute__((hot)) bool push(struct cluster * cltr, struct $thread * thrd); 259 260 //----------------------------------------------------------------------- 261 // pop thread from the ready queue of a cluster 262 // returns 0p if empty 263 __attribute__((hot)) struct $thread * pop(struct cluster * cltr); 264 265 //----------------------------------------------------------------------- 266 // remove thread from the ready queue of a cluster 267 // returns bool if it wasn't found 268 bool remove_head(struct cluster * cltr, struct $thread * thrd); 269 270 //----------------------------------------------------------------------- 271 // Increase the width of the ready queue (number of lanes) by 4 272 void ready_queue_grow (struct cluster * cltr); 273 274 //----------------------------------------------------------------------- 275 // Decrease the width of the ready queue (number of lanes) by 4 276 void ready_queue_shrink(struct cluster * cltr); 277 278 //----------------------------------------------------------------------- 279 // Statics call at the end of each thread to register statistics 280 #if !defined(__CFA_NO_STATISTICS__) 281 static inline struct __stats_t * __tls_stats() { 282 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled ); 283 /* paranoid */ verify( kernelTLS.this_stats ); 284 return kernelTLS.this_stats; 285 } 286 #endif 104 287 105 288 // Local Variables: //
Note:
See TracChangeset
for help on using the changeset viewer.