Changeset 58fe85a for libcfa/src/concurrency/kernel_private.hfa
- Timestamp:
- Jan 7, 2021, 3:27:00 PM (5 years ago)
- Branches:
- ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast-unique-expr, pthread-emulation, qualifiedEnum
- Children:
- 2b4daf2, 64aeca0
- Parents:
- 3c64c668 (diff), eef8dfb (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)
links above to see all the changes relative to each parent. - File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/concurrency/kernel_private.hfa
r3c64c668 r58fe85a 10 10 // Created On : Mon Feb 13 12:27:26 2017 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Sat Nov 30 19:25:02 201913 // Update Count : 812 // Last Modified On : Wed Aug 12 08:21:33 2020 13 // Update Count : 9 14 14 // 15 15 … … 20 20 21 21 #include "alarm.hfa" 22 22 #include "stats.hfa" 23 23 24 24 //----------------------------------------------------------------------------- 25 25 // Scheduler 26 27 struct __attribute__((aligned(128))) __scheduler_lock_id_t; 26 28 27 29 extern "C" { … … 31 33 } 32 34 33 void __schedule_thread( $thread * ) __attribute__((nonnull (1))); 34 35 //Block current thread and release/wake-up the following resources 36 void __leave_thread() __attribute__((noreturn)); 35 void __schedule_thread( $thread * ) 36 #if defined(NDEBUG) || (!defined(__CFA_DEBUG__) && !defined(__CFA_VERIFY__)) 37 __attribute__((nonnull (1))) 38 #endif 39 ; 40 41 extern bool __preemption_enabled(); 42 43 //release/wake-up the following resources 44 void __thread_finish( $thread * thrd ); 37 45 38 46 //----------------------------------------------------------------------------- … … 41 49 42 50 void * __create_pthread( pthread_t *, void * (*)(void *), void * ); 43 44 static inline void wake_fast(processor * this) { 45 __cfaabi_dbg_print_safe("Kernel : Waking up processor %p\n", this); 46 post( this->idleLock ); 47 } 48 49 static inline void wake(processor * this) { 50 disable_interrupts(); 51 wake_fast(this); 52 enable_interrupts( __cfaabi_dbg_ctx ); 53 } 54 55 struct event_kernel_t { 56 alarm_list_t alarms; 57 __spinlock_t lock; 58 }; 59 60 extern event_kernel_t * event_kernel; 61 62 struct __cfa_kernel_preemption_state_t { 63 bool enabled; 64 bool in_progress; 65 unsigned short disable_count; 66 }; 67 68 extern volatile thread_local __cfa_kernel_preemption_state_t preemption_state __attribute__ ((tls_model ( "initial-exec" ))); 51 void __destroy_pthread( pthread_t pthread, void * stack, void ** retval ); 52 53 54 55 extern cluster * mainCluster; 69 56 70 57 //----------------------------------------------------------------------------- … … 79 66 ) 80 67 68 #define TICKET_BLOCKED (-1) // thread is blocked 69 #define TICKET_RUNNING ( 0) // thread is running 70 #define TICKET_UNBLOCK ( 1) // thread should ignore next block 71 81 72 //----------------------------------------------------------------------------- 82 73 // Utils 83 #define KERNEL_STORAGE(T,X) static char storage_##X[sizeof(T)]84 85 static inline uint32_t __tls_rand() {86 kernelTLS.rand_seed ^= kernelTLS.rand_seed << 6;87 kernelTLS.rand_seed ^= kernelTLS.rand_seed >> 21;88 kernelTLS.rand_seed ^= kernelTLS.rand_seed << 7;89 return kernelTLS.rand_seed;90 }91 92 93 void doregister( struct cluster & cltr );94 void unregister( struct cluster & cltr );95 96 74 void doregister( struct cluster * cltr, struct $thread & thrd ); 97 75 void unregister( struct cluster * cltr, struct $thread & thrd ); 98 76 99 void doregister( struct cluster * cltr, struct processor * proc ); 100 void unregister( struct cluster * cltr, struct processor * proc ); 77 //----------------------------------------------------------------------------- 78 // I/O 79 void ^?{}(io_context & this, bool ); 80 81 //======================================================================= 82 // Cluster lock API 83 //======================================================================= 84 // Cells use by the reader writer lock 85 // while not generic it only relies on a opaque pointer 86 struct __attribute__((aligned(128))) __scheduler_lock_id_t { 87 // Spin lock used as the underlying lock 88 volatile bool lock; 89 90 // Handle pointing to the proc owning this cell 91 // Used for allocating cells and debugging 92 __processor_id_t * volatile handle; 93 94 #ifdef __CFA_WITH_VERIFY__ 95 // Debug, check if this is owned for reading 96 bool owned; 97 #endif 98 }; 99 100 static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t)); 101 102 // Lock-Free registering/unregistering of threads 103 // Register a processor to a given cluster and get its unique id in return 104 unsigned doregister( struct __processor_id_t * proc ); 105 106 // Unregister a processor from a given cluster using its id, getting back the original pointer 107 void unregister( struct __processor_id_t * proc ); 108 109 //----------------------------------------------------------------------- 110 // Cluster idle lock/unlock 111 static inline void lock(__cluster_idles & this) { 112 for() { 113 uint64_t l = this.lock; 114 if( 115 (0 == (l % 2)) 116 && __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) 117 ) return; 118 Pause(); 119 } 120 } 121 122 static inline void unlock(__cluster_idles & this) { 123 /* paranoid */ verify( 1 == (this.lock % 2) ); 124 __atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST ); 125 } 126 127 //======================================================================= 128 // Reader-writer lock implementation 129 // Concurrent with doregister/unregister, 130 // i.e., threads can be added at any point during or between the entry/exit 131 132 //----------------------------------------------------------------------- 133 // simple spinlock underlying the RWLock 134 // Blocking acquire 135 static inline void __atomic_acquire(volatile bool * ll) { 136 while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) { 137 while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED)) 138 Pause(); 139 } 140 /* paranoid */ verify(*ll); 141 } 142 143 // Non-Blocking acquire 144 static inline bool __atomic_try_acquire(volatile bool * ll) { 145 return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST); 146 } 147 148 // Release 149 static inline void __atomic_unlock(volatile bool * ll) { 150 /* paranoid */ verify(*ll); 151 __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE); 152 } 153 154 //----------------------------------------------------------------------- 155 // Reader-Writer lock protecting the ready-queues 156 // while this lock is mostly generic some aspects 157 // have been hard-coded to for the ready-queue for 158 // simplicity and performance 159 struct __scheduler_RWLock_t { 160 // total cachelines allocated 161 unsigned int max; 162 163 // cachelines currently in use 164 volatile unsigned int alloc; 165 166 // cachelines ready to itereate over 167 // (!= to alloc when thread is in second half of doregister) 168 volatile unsigned int ready; 169 170 // writer lock 171 volatile bool lock; 172 173 // data pointer 174 __scheduler_lock_id_t * data; 175 }; 176 177 void ?{}(__scheduler_RWLock_t & this); 178 void ^?{}(__scheduler_RWLock_t & this); 179 180 extern __scheduler_RWLock_t * __scheduler_lock; 181 182 //----------------------------------------------------------------------- 183 // Reader side : acquire when using the ready queue to schedule but not 184 // creating/destroying queues 185 static inline void ready_schedule_lock(void) with(*__scheduler_lock) { 186 /* paranoid */ verify( ! __preemption_enabled() ); 187 /* paranoid */ verify( kernelTLS().this_proc_id ); 188 189 unsigned iproc = kernelTLS().this_proc_id->id; 190 /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id); 191 /*paranoid*/ verify(iproc < ready); 192 193 // Step 1 : make sure no writer are in the middle of the critical section 194 while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED)) 195 Pause(); 196 197 // Fence needed because we don't want to start trying to acquire the lock 198 // before we read a false. 199 // Not needed on x86 200 // std::atomic_thread_fence(std::memory_order_seq_cst); 201 202 // Step 2 : acquire our local lock 203 __atomic_acquire( &data[iproc].lock ); 204 /*paranoid*/ verify(data[iproc].lock); 205 206 #ifdef __CFA_WITH_VERIFY__ 207 // Debug, check if this is owned for reading 208 data[iproc].owned = true; 209 #endif 210 } 211 212 static inline void ready_schedule_unlock(void) with(*__scheduler_lock) { 213 /* paranoid */ verify( ! __preemption_enabled() ); 214 /* paranoid */ verify( kernelTLS().this_proc_id ); 215 216 unsigned iproc = kernelTLS().this_proc_id->id; 217 /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id); 218 /*paranoid*/ verify(iproc < ready); 219 /*paranoid*/ verify(data[iproc].lock); 220 /*paranoid*/ verify(data[iproc].owned); 221 #ifdef __CFA_WITH_VERIFY__ 222 // Debug, check if this is owned for reading 223 data[iproc].owned = false; 224 #endif 225 __atomic_unlock(&data[iproc].lock); 226 } 227 228 #ifdef __CFA_WITH_VERIFY__ 229 static inline bool ready_schedule_islocked(void) { 230 /* paranoid */ verify( ! __preemption_enabled() ); 231 /*paranoid*/ verify( kernelTLS().this_proc_id ); 232 __processor_id_t * proc = kernelTLS().this_proc_id; 233 return __scheduler_lock->data[proc->id].owned; 234 } 235 236 static inline bool ready_mutate_islocked() { 237 return __scheduler_lock->lock; 238 } 239 #endif 240 241 //----------------------------------------------------------------------- 242 // Writer side : acquire when changing the ready queue, e.g. adding more 243 // queues or removing them. 244 uint_fast32_t ready_mutate_lock( void ); 245 246 void ready_mutate_unlock( uint_fast32_t /* value returned by lock */ ); 247 248 //======================================================================= 249 // Ready-Queue API 250 //----------------------------------------------------------------------- 251 // pop thread from the ready queue of a cluster 252 // returns 0p if empty 253 __attribute__((hot)) bool query(struct cluster * cltr); 254 255 //----------------------------------------------------------------------- 256 // push thread onto a ready queue for a cluster 257 // returns true if the list was previously empty, false otherwise 258 __attribute__((hot)) bool push(struct cluster * cltr, struct $thread * thrd); 259 260 //----------------------------------------------------------------------- 261 // pop thread from the ready queue of a cluster 262 // returns 0p if empty 263 // May return 0p spuriously 264 __attribute__((hot)) struct $thread * pop(struct cluster * cltr); 265 266 //----------------------------------------------------------------------- 267 // pop thread from the ready queue of a cluster 268 // returns 0p if empty 269 // guaranteed to find any threads added before this call 270 __attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr); 271 272 //----------------------------------------------------------------------- 273 // remove thread from the ready queue of a cluster 274 // returns bool if it wasn't found 275 bool remove_head(struct cluster * cltr, struct $thread * thrd); 276 277 //----------------------------------------------------------------------- 278 // Increase the width of the ready queue (number of lanes) by 4 279 void ready_queue_grow (struct cluster * cltr, int target); 280 281 //----------------------------------------------------------------------- 282 // Decrease the width of the ready queue (number of lanes) by 4 283 void ready_queue_shrink(struct cluster * cltr, int target); 284 101 285 102 286 // Local Variables: //
Note:
See TracChangeset
for help on using the changeset viewer.