Changeset c993b15 for libcfa/src/concurrency
- Timestamp:
- Apr 29, 2021, 4:26:25 PM (4 years ago)
- Branches:
- ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast-unique-expr, pthread-emulation, qualifiedEnum
- Children:
- 3eb55f98
- Parents:
- b2fc7ad9
- Location:
- libcfa/src/concurrency
- Files:
-
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/concurrency/kernel.cfa
rb2fc7ad9 rc993b15 163 163 #if !defined(__CFA_NO_STATISTICS__) 164 164 if( this->print_halts ) { 165 __cfaabi_bits_print_safe( STDOUT_FILENO, "Processor : %d - %s (%p)\n", this-> id, this->name, (void*)this);165 __cfaabi_bits_print_safe( STDOUT_FILENO, "Processor : %d - %s (%p)\n", this->unique_id, this->name, (void*)this); 166 166 } 167 167 #endif … … 223 223 #if !defined(__CFA_NO_STATISTICS__) 224 224 if(this->print_halts) { 225 __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this-> id, rdtscl());225 __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->unique_id, rdtscl()); 226 226 } 227 227 #endif … … 236 236 #if !defined(__CFA_NO_STATISTICS__) 237 237 if(this->print_halts) { 238 __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this-> id, rdtscl());238 __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->unique_id, rdtscl()); 239 239 } 240 240 #endif … … 390 390 391 391 post( this->terminated ); 392 393 392 394 393 if(this == mainProcessor) { … … 553 552 static void __schedule_thread( $thread * thrd ) { 554 553 /* paranoid */ verify( ! __preemption_enabled() ); 555 /* paranoid */ verify( kernelTLS().this_proc_id );556 554 /* paranoid */ verify( ready_schedule_islocked()); 557 555 /* paranoid */ verify( thrd ); … … 611 609 static inline $thread * __next_thread(cluster * this) with( *this ) { 612 610 /* paranoid */ verify( ! __preemption_enabled() ); 613 /* paranoid */ verify( kernelTLS().this_proc_id );614 611 615 612 ready_schedule_lock(); … … 617 614 ready_schedule_unlock(); 618 615 619 /* paranoid */ verify( kernelTLS().this_proc_id );620 616 /* paranoid */ verify( ! __preemption_enabled() ); 621 617 return thrd; … … 625 621 static inline $thread * __next_thread_slow(cluster * this) with( *this ) { 626 622 /* paranoid */ verify( ! __preemption_enabled() ); 627 /* paranoid */ verify( kernelTLS().this_proc_id );628 623 629 624 ready_schedule_lock(); … … 638 633 ready_schedule_unlock(); 639 634 640 /* paranoid */ verify( kernelTLS().this_proc_id );641 635 /* paranoid */ verify( ! __preemption_enabled() ); 642 636 return thrd; -
libcfa/src/concurrency/kernel.hfa
rb2fc7ad9 rc993b15 49 49 50 50 // Processor id, required for scheduling threads 51 struct __processor_id_t { 52 unsigned id:24; 53 54 #if !defined(__CFA_NO_STATISTICS__) 55 struct __stats_t * stats; 56 #endif 57 }; 51 58 52 59 53 coroutine processorCtx_t { … … 63 57 // Wrapper around kernel threads 64 58 struct __attribute__((aligned(128))) processor { 65 // Main state66 inline __processor_id_t;67 68 59 // Cluster from which to get threads 69 60 struct cluster * cltr; … … 89 80 // Handle to pthreads 90 81 pthread_t kernel_thread; 82 83 // Unique id for the processor (not per cluster) 84 unsigned unique_id; 91 85 92 86 struct { -
libcfa/src/concurrency/kernel/fwd.hfa
rb2fc7ad9 rc993b15 38 38 struct $thread * volatile this_thread; 39 39 struct processor * volatile this_processor; 40 struct __processor_id_t * volatile this_proc_id; 41 struct __stats_t * volatile this_stats; 40 volatile bool sched_lock; 42 41 43 42 struct { … … 56 55 uint64_t bck_seed; 57 56 } ready_rng; 57 58 struct __stats_t * volatile this_stats; 59 60 61 #ifdef __CFA_WITH_VERIFY__ 62 // Debug, check if the rwlock is owned for reading 63 bool in_sched_lock; 64 unsigned sched_id; 65 #endif 58 66 } __cfaabi_tls __attribute__ ((tls_model ( "initial-exec" ))); 59 67 -
libcfa/src/concurrency/kernel/startup.cfa
rb2fc7ad9 rc993b15 77 77 static void doregister( struct cluster & cltr ); 78 78 static void unregister( struct cluster & cltr ); 79 static void register_tls( processor * this ); 80 static void unregister_tls( processor * this ); 79 81 static void ?{}( $coroutine & this, current_stack_info_t * info); 80 82 static void ?{}( $thread & this, current_stack_info_t * info); … … 123 125 NULL, // cannot use 0p 124 126 NULL, 127 false, 128 { 1, false, false }, 129 0, 130 { 0, 0 }, 125 131 NULL, 126 NULL, 127 { 1, false, false }, 132 #ifdef __CFA_WITH_VERIFY__ 133 false, 134 0, 135 #endif 128 136 }; 129 137 … … 210 218 (*mainProcessor){}; 211 219 220 register_tls( mainProcessor ); 221 212 222 //initialize the global state variables 213 223 __cfaabi_tls.this_processor = mainProcessor; 214 __cfaabi_tls.this_proc_id = (__processor_id_t*)mainProcessor;215 224 __cfaabi_tls.this_thread = mainThread; 216 225 … … 273 282 #endif 274 283 284 unregister_tls( mainProcessor ); 285 275 286 // Destroy the main processor and its context in reverse order of construction 276 287 // These were manually constructed so we need manually destroy them … … 316 327 processor * proc = (processor *) arg; 317 328 __cfaabi_tls.this_processor = proc; 318 __cfaabi_tls.this_proc_id = (__processor_id_t*)proc;319 329 __cfaabi_tls.this_thread = 0p; 320 330 __cfaabi_tls.preemption_state.[enabled, disable_count] = [false, 1]; 331 332 register_tls( proc ); 333 321 334 // SKULLDUGGERY: We want to create a context for the processor coroutine 322 335 // which is needed for the 2-step context switch. However, there is no reason … … 355 368 #endif 356 369 #endif 370 371 unregister_tls( proc ); 357 372 358 373 return 0p; … … 496 511 #endif 497 512 498 // Register and Lock the RWlock so no-one pushes/pops while we are changing the queue499 uint_fast32_t last_size = ready_mutate_register((__processor_id_t*)&this);500 this.cltr->procs.total += 1u;501 insert_last(this.cltr->procs.actives, this);502 503 // Adjust the ready queue size504 ready_queue_grow( cltr );505 506 // Unlock the RWlock507 ready_mutate_unlock( last_size );508 509 513 __cfadbg_print_safe(runtime_core, "Kernel : core %p created\n", &this); 510 514 } … … 512 516 // Not a ctor, it just preps the destruction but should not destroy members 513 517 static void deinit(processor & this) { 514 // Lock the RWlock so no-one pushes/pops while we are changing the queue515 uint_fast32_t last_size = ready_mutate_lock();516 this.cltr->procs.total -= 1u;517 remove(this);518 519 // Adjust the ready queue size520 ready_queue_shrink( this.cltr );521 522 // Unlock the RWlock and unregister: we don't need the read_lock any more523 ready_mutate_unregister((__processor_id_t*)&this, last_size );524 525 518 close(this.idle); 526 519 } … … 656 649 cltr->nthreads -= 1; 657 650 unlock(cltr->thread_list_lock); 651 } 652 653 static void register_tls( processor * this ) { 654 // Register and Lock the RWlock so no-one pushes/pops while we are changing the queue 655 uint_fast32_t last_size; 656 [this->unique_id, last_size] = ready_mutate_register(); 657 658 this->cltr->procs.total += 1u; 659 insert_last(this->cltr->procs.actives, *this); 660 661 // Adjust the ready queue size 662 ready_queue_grow( this->cltr ); 663 664 // Unlock the RWlock 665 ready_mutate_unlock( last_size ); 666 } 667 668 669 static void unregister_tls( processor * this ) { 670 // Lock the RWlock so no-one pushes/pops while we are changing the queue 671 uint_fast32_t last_size = ready_mutate_lock(); 672 this->cltr->procs.total -= 1u; 673 remove(*this); 674 675 // clear the cluster so nothing gets pushed to local queues 676 cluster * cltr = this->cltr; 677 this->cltr = 0p; 678 679 // Adjust the ready queue size 680 ready_queue_shrink( cltr ); 681 682 // Unlock the RWlock and unregister: we don't need the read_lock any more 683 ready_mutate_unregister( this->unique_id, last_size ); 658 684 } 659 685 -
libcfa/src/concurrency/kernel_private.hfa
rb2fc7ad9 rc993b15 25 25 // Scheduler 26 26 27 struct __attribute__((aligned(128))) __scheduler_lock_id_t;28 27 29 28 extern "C" { … … 80 79 // Lock-Free registering/unregistering of threads 81 80 // Register a processor to a given cluster and get its unique id in return 82 void register_proc_id( struct __processor_id_t *);81 unsigned register_proc_id( void ); 83 82 84 83 // Unregister a processor from a given cluster using its id, getting back the original pointer 85 void unregister_proc_id( struct __processor_id_t * proc);84 void unregister_proc_id( unsigned ); 86 85 87 86 //======================================================================= … … 112 111 } 113 112 114 // Cells use by the reader writer lock 115 // while not generic it only relies on a opaque pointer 116 struct __attribute__((aligned(128))) __scheduler_lock_id_t { 117 // Spin lock used as the underlying lock 118 volatile bool lock; 119 120 // Handle pointing to the proc owning this cell 121 // Used for allocating cells and debugging 122 __processor_id_t * volatile handle; 123 124 #ifdef __CFA_WITH_VERIFY__ 125 // Debug, check if this is owned for reading 126 bool owned; 127 #endif 128 }; 129 130 static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t)); 113 114 115 131 116 132 117 //----------------------------------------------------------------------- … … 147 132 148 133 // writer lock 149 volatile bool lock;134 volatile bool write_lock; 150 135 151 136 // data pointer 152 __scheduler_lock_id_t* data;137 volatile bool * volatile * data; 153 138 }; 154 139 … … 163 148 static inline void ready_schedule_lock(void) with(*__scheduler_lock) { 164 149 /* paranoid */ verify( ! __preemption_enabled() ); 165 /* paranoid */ verify( kernelTLS().this_proc_id ); 166 167 unsigned iproc = kernelTLS().this_proc_id->id; 168 /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id); 169 /*paranoid*/ verify(iproc < ready); 150 /* paranoid */ verify( ! kernelTLS().in_sched_lock ); 151 /* paranoid */ verify( data[kernelTLS().sched_id] == &kernelTLS().sched_lock ); 152 /* paranoid */ verify( !kernelTLS().this_processor || kernelTLS().this_processor->unique_id == kernelTLS().sched_id ); 170 153 171 154 // Step 1 : make sure no writer are in the middle of the critical section 172 while(__atomic_load_n(& lock, (int)__ATOMIC_RELAXED))155 while(__atomic_load_n(&write_lock, (int)__ATOMIC_RELAXED)) 173 156 Pause(); 174 157 … … 179 162 180 163 // Step 2 : acquire our local lock 181 __atomic_acquire( & data[iproc].lock );182 /*paranoid*/ verify( data[iproc].lock);164 __atomic_acquire( &kernelTLS().sched_lock ); 165 /*paranoid*/ verify(kernelTLS().sched_lock); 183 166 184 167 #ifdef __CFA_WITH_VERIFY__ 185 168 // Debug, check if this is owned for reading 186 data[iproc].owned= true;169 kernelTLS().in_sched_lock = true; 187 170 #endif 188 171 } … … 190 173 static inline void ready_schedule_unlock(void) with(*__scheduler_lock) { 191 174 /* paranoid */ verify( ! __preemption_enabled() ); 192 /* paranoid */ verify( kernelTLS().this_proc_id ); 193 194 unsigned iproc = kernelTLS().this_proc_id->id; 195 /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id); 196 /*paranoid*/ verify(iproc < ready); 197 /*paranoid*/ verify(data[iproc].lock); 198 /*paranoid*/ verify(data[iproc].owned); 175 /* paranoid */ verify( data[kernelTLS().sched_id] == &kernelTLS().sched_lock ); 176 /* paranoid */ verify( !kernelTLS().this_processor || kernelTLS().this_processor->unique_id == kernelTLS().sched_id ); 177 /* paranoid */ verify( kernelTLS().sched_lock ); 178 /* paranoid */ verify( kernelTLS().in_sched_lock ); 199 179 #ifdef __CFA_WITH_VERIFY__ 200 180 // Debug, check if this is owned for reading 201 data[iproc].owned= false;181 kernelTLS().in_sched_lock = false; 202 182 #endif 203 __atomic_unlock(& data[iproc].lock);183 __atomic_unlock(&kernelTLS().sched_lock); 204 184 } 205 185 … … 207 187 static inline bool ready_schedule_islocked(void) { 208 188 /* paranoid */ verify( ! __preemption_enabled() ); 209 /*paranoid*/ verify( kernelTLS().this_proc_id ); 210 __processor_id_t * proc = kernelTLS().this_proc_id; 211 return __scheduler_lock->data[proc->id].owned; 189 /* paranoid */ verify( (!kernelTLS().in_sched_lock) || kernelTLS().sched_lock ); 190 return kernelTLS().sched_lock; 212 191 } 213 192 214 193 static inline bool ready_mutate_islocked() { 215 return __scheduler_lock-> lock;194 return __scheduler_lock->write_lock; 216 195 } 217 196 #endif … … 228 207 // Register a processor to a given cluster and get its unique id in return 229 208 // For convenience, also acquires the lock 230 static inline uint_fast32_t ready_mutate_register( struct __processor_id_t * proc ) { 231 register_proc_id( proc ); 232 return ready_mutate_lock(); 209 static inline [unsigned, uint_fast32_t] ready_mutate_register() { 210 unsigned id = register_proc_id(); 211 uint_fast32_t last = ready_mutate_lock(); 212 return [id, last]; 233 213 } 234 214 235 215 // Unregister a processor from a given cluster using its id, getting back the original pointer 236 216 // assumes the lock is acquired 237 static inline void ready_mutate_unregister( struct __processor_id_t * proc, uint_fast32_t last_s ) {217 static inline void ready_mutate_unregister( unsigned id, uint_fast32_t last_s ) { 238 218 ready_mutate_unlock( last_s ); 239 unregister_proc_id( proc);219 unregister_proc_id( id ); 240 220 } 241 221 -
libcfa/src/concurrency/preemption.cfa
rb2fc7ad9 rc993b15 687 687 // Waits on SIGALRM and send SIGUSR1 to whom ever needs it 688 688 static void * alarm_loop( __attribute__((unused)) void * args ) { 689 __processor_id_t id; 690 register_proc_id(&id); 691 __cfaabi_tls.this_proc_id = &id; 692 689 unsigned id = register_proc_id(); 693 690 694 691 // Block sigalrms to control when they arrive … … 749 746 EXIT: 750 747 __cfaabi_dbg_print_safe( "Kernel : Preemption thread stopping\n" ); 751 register_proc_id(&id);748 unregister_proc_id(id); 752 749 753 750 return 0p; -
libcfa/src/concurrency/ready_queue.cfa
rb2fc7ad9 rc993b15 93 93 this.alloc = 0; 94 94 this.ready = 0; 95 this.lock = false;96 95 this.data = alloc(this.max); 97 98 /*paranoid*/ verify( 0 == (((uintptr_t)(this.data )) % 64) ); 99 /*paranoid*/ verify( 0 == (((uintptr_t)(this.data + 1)) % 64) ); 96 this.write_lock = false; 97 100 98 /*paranoid*/ verify(__atomic_is_lock_free(sizeof(this.alloc), &this.alloc)); 101 99 /*paranoid*/ verify(__atomic_is_lock_free(sizeof(this.ready), &this.ready)); … … 106 104 } 107 105 108 void ?{}( __scheduler_lock_id_t & this, __processor_id_t * proc ) {109 this.handle = proc;110 this.lock = false;111 #ifdef __CFA_WITH_VERIFY__112 this.owned = false;113 #endif114 }115 106 116 107 //======================================================================= 117 108 // Lock-Free registering/unregistering of threads 118 void register_proc_id( struct __processor_id_t * proc) with(*__scheduler_lock) {109 unsigned register_proc_id( void ) with(*__scheduler_lock) { 119 110 __cfadbg_print_safe(ready_queue, "Kernel : Registering proc %p for RW-Lock\n", proc); 111 bool * handle = (bool *)&kernelTLS().sched_lock; 120 112 121 113 // Step - 1 : check if there is already space in the data … … 124 116 // Check among all the ready 125 117 for(uint_fast32_t i = 0; i < s; i++) { 126 __processor_id_t * null = 0p; // Re-write every loop since compare thrashes it 127 if( __atomic_load_n(&data[i].handle, (int)__ATOMIC_RELAXED) == null 128 && __atomic_compare_exchange_n( &data[i].handle, &null, proc, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { 129 /*paranoid*/ verify(i < ready); 130 /*paranoid*/ verify(0 == (__alignof__(data[i]) % cache_line_size)); 131 /*paranoid*/ verify((((uintptr_t)&data[i]) % cache_line_size) == 0); 132 proc->id = i; 133 return; 118 bool * volatile * cell = (bool * volatile *)&data[i]; // Cforall is bugged and the double volatiles causes problems 119 /* paranoid */ verify( handle != *cell ); 120 121 bool * null = 0p; // Re-write every loop since compare thrashes it 122 if( __atomic_load_n(cell, (int)__ATOMIC_RELAXED) == null 123 && __atomic_compare_exchange_n( cell, &null, handle, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { 124 /* paranoid */ verify(i < ready); 125 /* paranoid */ verify( (kernelTLS().sched_id = i, true) ); 126 return i; 134 127 } 135 128 } … … 142 135 143 136 // Step - 3 : Mark space as used and then publish it. 144 __scheduler_lock_id_t * storage = (__scheduler_lock_id_t *)&data[n]; 145 (*storage){ proc }; 137 data[n] = handle; 146 138 while() { 147 139 unsigned copy = n; … … 155 147 156 148 // Return new spot. 157 /*paranoid*/ verify(n < ready); 158 /*paranoid*/ verify(__alignof__(data[n]) == (2 * cache_line_size)); 159 /*paranoid*/ verify((((uintptr_t)&data[n]) % cache_line_size) == 0); 160 proc->id = n; 161 } 162 163 void unregister_proc_id( struct __processor_id_t * proc ) with(*__scheduler_lock) { 164 unsigned id = proc->id; 165 /*paranoid*/ verify(id < ready); 166 /*paranoid*/ verify(proc == __atomic_load_n(&data[id].handle, __ATOMIC_RELAXED)); 167 __atomic_store_n(&data[id].handle, 0p, __ATOMIC_RELEASE); 149 /* paranoid */ verify(n < ready); 150 /* paranoid */ verify( (kernelTLS().sched_id = n, true) ); 151 return n; 152 } 153 154 void unregister_proc_id( unsigned id ) with(*__scheduler_lock) { 155 /* paranoid */ verify(id < ready); 156 /* paranoid */ verify(id == kernelTLS().sched_id); 157 /* paranoid */ verify(data[id] == &kernelTLS().sched_lock); 158 159 bool * volatile * cell = (bool * volatile *)&data[id]; // Cforall is bugged and the double volatiles causes problems 160 161 __atomic_store_n(cell, 0p, __ATOMIC_RELEASE); 168 162 169 163 __cfadbg_print_safe(ready_queue, "Kernel : Unregister proc %p\n", proc); … … 175 169 uint_fast32_t ready_mutate_lock( void ) with(*__scheduler_lock) { 176 170 /* paranoid */ verify( ! __preemption_enabled() ); 171 /* paranoid */ verify( ! kernelTLS().sched_lock ); 177 172 178 173 // Step 1 : lock global lock 179 174 // It is needed to avoid processors that register mid Critical-Section 180 175 // to simply lock their own lock and enter. 181 __atomic_acquire( & lock );176 __atomic_acquire( &write_lock ); 182 177 183 178 // Step 2 : lock per-proc lock … … 187 182 uint_fast32_t s = ready; 188 183 for(uint_fast32_t i = 0; i < s; i++) { 189 __atomic_acquire( &data[i].lock ); 184 volatile bool * llock = data[i]; 185 if(llock) __atomic_acquire( llock ); 190 186 } 191 187 … … 204 200 // Alternative solution : return s in write_lock and pass it to write_unlock 205 201 for(uint_fast32_t i = 0; i < last_s; i++) { 206 v erify(data[i].lock);207 __atomic_store_n(&data[i].lock, (bool)false, __ATOMIC_RELEASE);202 volatile bool * llock = data[i]; 203 if(llock) __atomic_store_n(llock, (bool)false, __ATOMIC_RELEASE); 208 204 } 209 205 210 206 // Step 2 : release global lock 211 /*paranoid*/ assert(true == lock);212 __atomic_store_n(& lock, (bool)false, __ATOMIC_RELEASE);207 /*paranoid*/ assert(true == write_lock); 208 __atomic_store_n(&write_lock, (bool)false, __ATOMIC_RELEASE); 213 209 214 210 /* paranoid */ verify( ! __preemption_enabled() );
Note: See TracChangeset
for help on using the changeset viewer.