Changes in / [3eb55f98:2d8a770]
- Location:
- libcfa/src/concurrency
- Files:
-
- 7 edited
-
kernel.cfa (modified) (9 diffs)
-
kernel.hfa (modified) (3 diffs)
-
kernel/fwd.hfa (modified) (2 diffs)
-
kernel/startup.cfa (modified) (9 diffs)
-
kernel_private.hfa (modified) (9 diffs)
-
preemption.cfa (modified) (2 diffs)
-
ready_queue.cfa (modified) (8 diffs)
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/concurrency/kernel.cfa
r3eb55f98 r2d8a770 163 163 #if !defined(__CFA_NO_STATISTICS__) 164 164 if( this->print_halts ) { 165 __cfaabi_bits_print_safe( STDOUT_FILENO, "Processor : %d - %s (%p)\n", this-> unique_id, this->name, (void*)this);165 __cfaabi_bits_print_safe( STDOUT_FILENO, "Processor : %d - %s (%p)\n", this->id, this->name, (void*)this); 166 166 } 167 167 #endif … … 223 223 #if !defined(__CFA_NO_STATISTICS__) 224 224 if(this->print_halts) { 225 __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this-> unique_id, rdtscl());225 __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->id, rdtscl()); 226 226 } 227 227 #endif … … 236 236 #if !defined(__CFA_NO_STATISTICS__) 237 237 if(this->print_halts) { 238 __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this-> unique_id, rdtscl());238 __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->id, rdtscl()); 239 239 } 240 240 #endif … … 390 390 391 391 post( this->terminated ); 392 392 393 393 394 if(this == mainProcessor) { … … 552 553 static void __schedule_thread( $thread * thrd ) { 553 554 /* paranoid */ verify( ! __preemption_enabled() ); 555 /* paranoid */ verify( kernelTLS().this_proc_id ); 554 556 /* paranoid */ verify( ready_schedule_islocked()); 555 557 /* paranoid */ verify( thrd ); … … 609 611 static inline $thread * __next_thread(cluster * this) with( *this ) { 610 612 /* paranoid */ verify( ! __preemption_enabled() ); 613 /* paranoid */ verify( kernelTLS().this_proc_id ); 611 614 612 615 ready_schedule_lock(); … … 614 617 ready_schedule_unlock(); 615 618 619 /* paranoid */ verify( kernelTLS().this_proc_id ); 616 620 /* paranoid */ verify( ! __preemption_enabled() ); 617 621 return thrd; … … 621 625 static inline $thread * __next_thread_slow(cluster * this) with( *this ) { 622 626 /* paranoid */ verify( ! __preemption_enabled() ); 627 /* paranoid */ verify( kernelTLS().this_proc_id ); 623 628 624 629 ready_schedule_lock(); … … 633 638 ready_schedule_unlock(); 634 639 640 /* paranoid */ verify( kernelTLS().this_proc_id ); 635 641 /* paranoid */ verify( ! __preemption_enabled() ); 636 642 return thrd; -
libcfa/src/concurrency/kernel.hfa
r3eb55f98 r2d8a770 49 49 50 50 // Processor id, required for scheduling threads 51 51 struct __processor_id_t { 52 unsigned id:24; 53 54 #if !defined(__CFA_NO_STATISTICS__) 55 struct __stats_t * stats; 56 #endif 57 }; 52 58 53 59 coroutine processorCtx_t { … … 57 63 // Wrapper around kernel threads 58 64 struct __attribute__((aligned(128))) processor { 65 // Main state 66 inline __processor_id_t; 67 59 68 // Cluster from which to get threads 60 69 struct cluster * cltr; … … 80 89 // Handle to pthreads 81 90 pthread_t kernel_thread; 82 83 // Unique id for the processor (not per cluster)84 unsigned unique_id;85 91 86 92 struct { -
libcfa/src/concurrency/kernel/fwd.hfa
r3eb55f98 r2d8a770 38 38 struct $thread * volatile this_thread; 39 39 struct processor * volatile this_processor; 40 volatile bool sched_lock; 40 struct __processor_id_t * volatile this_proc_id; 41 struct __stats_t * volatile this_stats; 41 42 42 43 struct { … … 55 56 uint64_t bck_seed; 56 57 } ready_rng; 57 58 struct __stats_t * volatile this_stats;59 60 61 #ifdef __CFA_WITH_VERIFY__62 // Debug, check if the rwlock is owned for reading63 bool in_sched_lock;64 unsigned sched_id;65 #endif66 58 } __cfaabi_tls __attribute__ ((tls_model ( "initial-exec" ))); 67 59 -
libcfa/src/concurrency/kernel/startup.cfa
r3eb55f98 r2d8a770 77 77 static void doregister( struct cluster & cltr ); 78 78 static void unregister( struct cluster & cltr ); 79 static void register_tls( processor * this );80 static void unregister_tls( processor * this );81 79 static void ?{}( $coroutine & this, current_stack_info_t * info); 82 80 static void ?{}( $thread & this, current_stack_info_t * info); … … 125 123 NULL, // cannot use 0p 126 124 NULL, 127 false, 125 NULL, 126 NULL, 128 127 { 1, false, false }, 129 0,130 { 0, 0 },131 NULL,132 #ifdef __CFA_WITH_VERIFY__133 false,134 0,135 #endif136 128 }; 137 129 … … 218 210 (*mainProcessor){}; 219 211 220 register_tls( mainProcessor );221 222 212 //initialize the global state variables 223 213 __cfaabi_tls.this_processor = mainProcessor; 214 __cfaabi_tls.this_proc_id = (__processor_id_t*)mainProcessor; 224 215 __cfaabi_tls.this_thread = mainThread; 225 216 … … 282 273 #endif 283 274 284 unregister_tls( mainProcessor );285 286 275 // Destroy the main processor and its context in reverse order of construction 287 276 // These were manually constructed so we need manually destroy them … … 327 316 processor * proc = (processor *) arg; 328 317 __cfaabi_tls.this_processor = proc; 318 __cfaabi_tls.this_proc_id = (__processor_id_t*)proc; 329 319 __cfaabi_tls.this_thread = 0p; 330 320 __cfaabi_tls.preemption_state.[enabled, disable_count] = [false, 1]; 331 332 register_tls( proc );333 334 321 // SKULLDUGGERY: We want to create a context for the processor coroutine 335 322 // which is needed for the 2-step context switch. However, there is no reason … … 368 355 #endif 369 356 #endif 370 371 unregister_tls( proc );372 357 373 358 return 0p; … … 511 496 #endif 512 497 498 // Register and Lock the RWlock so no-one pushes/pops while we are changing the queue 499 uint_fast32_t last_size = ready_mutate_register((__processor_id_t*)&this); 500 this.cltr->procs.total += 1u; 501 insert_last(this.cltr->procs.actives, this); 502 503 // Adjust the ready queue size 504 ready_queue_grow( cltr ); 505 506 // Unlock the RWlock 507 ready_mutate_unlock( last_size ); 508 513 509 __cfadbg_print_safe(runtime_core, "Kernel : core %p created\n", &this); 514 510 } … … 516 512 // Not a ctor, it just preps the destruction but should not destroy members 517 513 static void deinit(processor & this) { 514 // Lock the RWlock so no-one pushes/pops while we are changing the queue 515 uint_fast32_t last_size = ready_mutate_lock(); 516 this.cltr->procs.total -= 1u; 517 remove(this); 518 519 // Adjust the ready queue size 520 ready_queue_shrink( this.cltr ); 521 522 // Unlock the RWlock and unregister: we don't need the read_lock any more 523 ready_mutate_unregister((__processor_id_t*)&this, last_size ); 524 518 525 close(this.idle); 519 526 } … … 649 656 cltr->nthreads -= 1; 650 657 unlock(cltr->thread_list_lock); 651 }652 653 static void register_tls( processor * this ) {654 // Register and Lock the RWlock so no-one pushes/pops while we are changing the queue655 uint_fast32_t last_size;656 [this->unique_id, last_size] = ready_mutate_register();657 658 this->cltr->procs.total += 1u;659 insert_last(this->cltr->procs.actives, *this);660 661 // Adjust the ready queue size662 ready_queue_grow( this->cltr );663 664 // Unlock the RWlock665 ready_mutate_unlock( last_size );666 }667 668 669 static void unregister_tls( processor * this ) {670 // Lock the RWlock so no-one pushes/pops while we are changing the queue671 uint_fast32_t last_size = ready_mutate_lock();672 this->cltr->procs.total -= 1u;673 remove(*this);674 675 // clear the cluster so nothing gets pushed to local queues676 cluster * cltr = this->cltr;677 this->cltr = 0p;678 679 // Adjust the ready queue size680 ready_queue_shrink( cltr );681 682 // Unlock the RWlock and unregister: we don't need the read_lock any more683 ready_mutate_unregister( this->unique_id, last_size );684 658 } 685 659 -
libcfa/src/concurrency/kernel_private.hfa
r3eb55f98 r2d8a770 25 25 // Scheduler 26 26 27 struct __attribute__((aligned(128))) __scheduler_lock_id_t; 27 28 28 29 extern "C" { … … 79 80 // Lock-Free registering/unregistering of threads 80 81 // Register a processor to a given cluster and get its unique id in return 81 unsigned register_proc_id( void);82 void register_proc_id( struct __processor_id_t * ); 82 83 83 84 // Unregister a processor from a given cluster using its id, getting back the original pointer 84 void unregister_proc_id( unsigned);85 void unregister_proc_id( struct __processor_id_t * proc ); 85 86 86 87 //======================================================================= … … 111 112 } 112 113 113 114 115 114 // Cells use by the reader writer lock 115 // while not generic it only relies on a opaque pointer 116 struct __attribute__((aligned(128))) __scheduler_lock_id_t { 117 // Spin lock used as the underlying lock 118 volatile bool lock; 119 120 // Handle pointing to the proc owning this cell 121 // Used for allocating cells and debugging 122 __processor_id_t * volatile handle; 123 124 #ifdef __CFA_WITH_VERIFY__ 125 // Debug, check if this is owned for reading 126 bool owned; 127 #endif 128 }; 129 130 static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t)); 116 131 117 132 //----------------------------------------------------------------------- … … 132 147 133 148 // writer lock 134 volatile bool write_lock;149 volatile bool lock; 135 150 136 151 // data pointer 137 volatile bool * volatile* data;152 __scheduler_lock_id_t * data; 138 153 }; 139 154 … … 148 163 static inline void ready_schedule_lock(void) with(*__scheduler_lock) { 149 164 /* paranoid */ verify( ! __preemption_enabled() ); 150 /* paranoid */ verify( ! kernelTLS().in_sched_lock ); 151 /* paranoid */ verify( data[kernelTLS().sched_id] == &kernelTLS().sched_lock ); 152 /* paranoid */ verify( !kernelTLS().this_processor || kernelTLS().this_processor->unique_id == kernelTLS().sched_id ); 165 /* paranoid */ verify( kernelTLS().this_proc_id ); 166 167 unsigned iproc = kernelTLS().this_proc_id->id; 168 /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id); 169 /*paranoid*/ verify(iproc < ready); 153 170 154 171 // Step 1 : make sure no writer are in the middle of the critical section 155 while(__atomic_load_n(& write_lock, (int)__ATOMIC_RELAXED))172 while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED)) 156 173 Pause(); 157 174 … … 162 179 163 180 // Step 2 : acquire our local lock 164 __atomic_acquire( & kernelTLS().sched_lock );165 /*paranoid*/ verify( kernelTLS().sched_lock);181 __atomic_acquire( &data[iproc].lock ); 182 /*paranoid*/ verify(data[iproc].lock); 166 183 167 184 #ifdef __CFA_WITH_VERIFY__ 168 185 // Debug, check if this is owned for reading 169 kernelTLS().in_sched_lock= true;186 data[iproc].owned = true; 170 187 #endif 171 188 } … … 173 190 static inline void ready_schedule_unlock(void) with(*__scheduler_lock) { 174 191 /* paranoid */ verify( ! __preemption_enabled() ); 175 /* paranoid */ verify( data[kernelTLS().sched_id] == &kernelTLS().sched_lock ); 176 /* paranoid */ verify( !kernelTLS().this_processor || kernelTLS().this_processor->unique_id == kernelTLS().sched_id ); 177 /* paranoid */ verify( kernelTLS().sched_lock ); 178 /* paranoid */ verify( kernelTLS().in_sched_lock ); 192 /* paranoid */ verify( kernelTLS().this_proc_id ); 193 194 unsigned iproc = kernelTLS().this_proc_id->id; 195 /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id); 196 /*paranoid*/ verify(iproc < ready); 197 /*paranoid*/ verify(data[iproc].lock); 198 /*paranoid*/ verify(data[iproc].owned); 179 199 #ifdef __CFA_WITH_VERIFY__ 180 200 // Debug, check if this is owned for reading 181 kernelTLS().in_sched_lock= false;201 data[iproc].owned = false; 182 202 #endif 183 __atomic_unlock(& kernelTLS().sched_lock);203 __atomic_unlock(&data[iproc].lock); 184 204 } 185 205 … … 187 207 static inline bool ready_schedule_islocked(void) { 188 208 /* paranoid */ verify( ! __preemption_enabled() ); 189 /* paranoid */ verify( (!kernelTLS().in_sched_lock) || kernelTLS().sched_lock ); 190 return kernelTLS().sched_lock; 209 /*paranoid*/ verify( kernelTLS().this_proc_id ); 210 __processor_id_t * proc = kernelTLS().this_proc_id; 211 return __scheduler_lock->data[proc->id].owned; 191 212 } 192 213 193 214 static inline bool ready_mutate_islocked() { 194 return __scheduler_lock-> write_lock;215 return __scheduler_lock->lock; 195 216 } 196 217 #endif … … 207 228 // Register a processor to a given cluster and get its unique id in return 208 229 // For convenience, also acquires the lock 209 static inline [unsigned, uint_fast32_t] ready_mutate_register() { 210 unsigned id = register_proc_id(); 211 uint_fast32_t last = ready_mutate_lock(); 212 return [id, last]; 230 static inline uint_fast32_t ready_mutate_register( struct __processor_id_t * proc ) { 231 register_proc_id( proc ); 232 return ready_mutate_lock(); 213 233 } 214 234 215 235 // Unregister a processor from a given cluster using its id, getting back the original pointer 216 236 // assumes the lock is acquired 217 static inline void ready_mutate_unregister( unsigned id, uint_fast32_t last_s ) {237 static inline void ready_mutate_unregister( struct __processor_id_t * proc, uint_fast32_t last_s ) { 218 238 ready_mutate_unlock( last_s ); 219 unregister_proc_id( id);239 unregister_proc_id( proc ); 220 240 } 221 241 -
libcfa/src/concurrency/preemption.cfa
r3eb55f98 r2d8a770 687 687 // Waits on SIGALRM and send SIGUSR1 to whom ever needs it 688 688 static void * alarm_loop( __attribute__((unused)) void * args ) { 689 unsigned id = register_proc_id(); 689 __processor_id_t id; 690 register_proc_id(&id); 691 __cfaabi_tls.this_proc_id = &id; 692 690 693 691 694 // Block sigalrms to control when they arrive … … 746 749 EXIT: 747 750 __cfaabi_dbg_print_safe( "Kernel : Preemption thread stopping\n" ); 748 unregister_proc_id(id);751 register_proc_id(&id); 749 752 750 753 return 0p; -
libcfa/src/concurrency/ready_queue.cfa
r3eb55f98 r2d8a770 93 93 this.alloc = 0; 94 94 this.ready = 0; 95 this.lock = false; 95 96 this.data = alloc(this.max); 96 this.write_lock = false; 97 97 98 /*paranoid*/ verify( 0 == (((uintptr_t)(this.data )) % 64) ); 99 /*paranoid*/ verify( 0 == (((uintptr_t)(this.data + 1)) % 64) ); 98 100 /*paranoid*/ verify(__atomic_is_lock_free(sizeof(this.alloc), &this.alloc)); 99 101 /*paranoid*/ verify(__atomic_is_lock_free(sizeof(this.ready), &this.ready)); … … 104 106 } 105 107 108 void ?{}( __scheduler_lock_id_t & this, __processor_id_t * proc ) { 109 this.handle = proc; 110 this.lock = false; 111 #ifdef __CFA_WITH_VERIFY__ 112 this.owned = false; 113 #endif 114 } 106 115 107 116 //======================================================================= 108 117 // Lock-Free registering/unregistering of threads 109 unsigned register_proc_id( void) with(*__scheduler_lock) {118 void register_proc_id( struct __processor_id_t * proc ) with(*__scheduler_lock) { 110 119 __cfadbg_print_safe(ready_queue, "Kernel : Registering proc %p for RW-Lock\n", proc); 111 bool * handle = (bool *)&kernelTLS().sched_lock;112 120 113 121 // Step - 1 : check if there is already space in the data … … 116 124 // Check among all the ready 117 125 for(uint_fast32_t i = 0; i < s; i++) { 118 bool * volatile * cell = (bool * volatile *)&data[i]; // Cforall is bugged and the double volatiles causes problems 119 /* paranoid */ verify( handle != *cell ); 120 121 bool * null = 0p; // Re-write every loop since compare thrashes it 122 if( __atomic_load_n(cell, (int)__ATOMIC_RELAXED) == null 123 && __atomic_compare_exchange_n( cell, &null, handle, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { 124 /* paranoid */ verify(i < ready); 125 /* paranoid */ verify( (kernelTLS().sched_id = i, true) ); 126 return i; 126 __processor_id_t * null = 0p; // Re-write every loop since compare thrashes it 127 if( __atomic_load_n(&data[i].handle, (int)__ATOMIC_RELAXED) == null 128 && __atomic_compare_exchange_n( &data[i].handle, &null, proc, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { 129 /*paranoid*/ verify(i < ready); 130 /*paranoid*/ verify(0 == (__alignof__(data[i]) % cache_line_size)); 131 /*paranoid*/ verify((((uintptr_t)&data[i]) % cache_line_size) == 0); 132 proc->id = i; 127 133 } 128 134 } … … 135 141 136 142 // Step - 3 : Mark space as used and then publish it. 137 data[n] = handle; 143 __scheduler_lock_id_t * storage = (__scheduler_lock_id_t *)&data[n]; 144 (*storage){ proc }; 138 145 while() { 139 146 unsigned copy = n; … … 147 154 148 155 // Return new spot. 149 /* paranoid */ verify(n < ready); 150 /* paranoid */ verify( (kernelTLS().sched_id = n, true) ); 151 return n; 152 } 153 154 void unregister_proc_id( unsigned id ) with(*__scheduler_lock) { 155 /* paranoid */ verify(id < ready); 156 /* paranoid */ verify(id == kernelTLS().sched_id); 157 /* paranoid */ verify(data[id] == &kernelTLS().sched_lock); 158 159 bool * volatile * cell = (bool * volatile *)&data[id]; // Cforall is bugged and the double volatiles causes problems 160 161 __atomic_store_n(cell, 0p, __ATOMIC_RELEASE); 156 /*paranoid*/ verify(n < ready); 157 /*paranoid*/ verify(__alignof__(data[n]) == (2 * cache_line_size)); 158 /*paranoid*/ verify((((uintptr_t)&data[n]) % cache_line_size) == 0); 159 proc->id = n; 160 } 161 162 void unregister_proc_id( struct __processor_id_t * proc ) with(*__scheduler_lock) { 163 unsigned id = proc->id; 164 /*paranoid*/ verify(id < ready); 165 /*paranoid*/ verify(proc == __atomic_load_n(&data[id].handle, __ATOMIC_RELAXED)); 166 __atomic_store_n(&data[id].handle, 0p, __ATOMIC_RELEASE); 162 167 163 168 __cfadbg_print_safe(ready_queue, "Kernel : Unregister proc %p\n", proc); … … 169 174 uint_fast32_t ready_mutate_lock( void ) with(*__scheduler_lock) { 170 175 /* paranoid */ verify( ! __preemption_enabled() ); 171 /* paranoid */ verify( ! kernelTLS().sched_lock );172 176 173 177 // Step 1 : lock global lock 174 178 // It is needed to avoid processors that register mid Critical-Section 175 179 // to simply lock their own lock and enter. 176 __atomic_acquire( & write_lock );180 __atomic_acquire( &lock ); 177 181 178 182 // Step 2 : lock per-proc lock … … 182 186 uint_fast32_t s = ready; 183 187 for(uint_fast32_t i = 0; i < s; i++) { 184 volatile bool * llock = data[i]; 185 if(llock) __atomic_acquire( llock ); 188 __atomic_acquire( &data[i].lock ); 186 189 } 187 190 … … 200 203 // Alternative solution : return s in write_lock and pass it to write_unlock 201 204 for(uint_fast32_t i = 0; i < last_s; i++) { 202 v olatile bool * llock = data[i];203 if(llock) __atomic_store_n(llock, (bool)false, __ATOMIC_RELEASE);205 verify(data[i].lock); 206 __atomic_store_n(&data[i].lock, (bool)false, __ATOMIC_RELEASE); 204 207 } 205 208 206 209 // Step 2 : release global lock 207 /*paranoid*/ assert(true == write_lock);208 __atomic_store_n(& write_lock, (bool)false, __ATOMIC_RELEASE);210 /*paranoid*/ assert(true == lock); 211 __atomic_store_n(&lock, (bool)false, __ATOMIC_RELEASE); 209 212 210 213 /* paranoid */ verify( ! __preemption_enabled() );
Note:
See TracChangeset
for help on using the changeset viewer.