Changeset 8cfa4ef
- Timestamp:
- Apr 15, 2021, 12:05:16 PM (4 years ago)
- Branches:
- ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast-unique-expr, pthread-emulation, qualifiedEnum
- Children:
- 8590328
- Parents:
- 2f5ea69 (diff), a4b0aa4 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)
links above to see all the changes relative to each parent. - Location:
- libcfa/src/concurrency
- Files:
-
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/concurrency/kernel.cfa
r2f5ea69 r8cfa4ef 113 113 static void __wake_one(cluster * cltr); 114 114 115 static void push (__cluster_idles& idles, processor & proc);116 static void remove(__cluster_idles& idles, processor & proc);117 static [unsigned idle, unsigned total, * processor] query ( & __cluster_idlesidles );115 static void mark_idle (__cluster_proc_list & idles, processor & proc); 116 static void mark_awake(__cluster_proc_list & idles, processor & proc); 117 static [unsigned idle, unsigned total, * processor] query_idles( & __cluster_proc_list idles ); 118 118 119 119 extern void __cfa_io_start( processor * ); … … 189 189 190 190 // Push self to idle stack 191 push(this->cltr->idles, * this);191 mark_idle(this->cltr->procs, * this); 192 192 193 193 // Confirm the ready-queue is empty … … 195 195 if( readyThread ) { 196 196 // A thread was found, cancel the halt 197 remove(this->cltr->idles, * this);197 mark_awake(this->cltr->procs, * this); 198 198 199 199 #if !defined(__CFA_NO_STATISTICS__) … … 225 225 226 226 // We were woken up, remove self from idle 227 remove(this->cltr->idles, * this);227 mark_awake(this->cltr->procs, * this); 228 228 229 229 // DON'T just proceed, start looking again … … 617 617 unsigned idle; 618 618 unsigned total; 619 [idle, total, p] = query (this->idles);619 [idle, total, p] = query_idles(this->procs); 620 620 621 621 // If no one is sleeping, we are done … … 654 654 } 655 655 656 static void push (__cluster_idles& this, processor & proc) {656 static void mark_idle(__cluster_proc_list & this, processor & proc) { 657 657 /* paranoid */ verify( ! __preemption_enabled() ); 658 658 lock( this ); 659 659 this.idle++; 660 660 /* paranoid */ verify( this.idle <= this.total ); 661 662 insert_first(this. list, proc);661 remove(proc); 662 insert_first(this.idles, proc); 663 663 unlock( this ); 664 664 /* paranoid */ verify( ! __preemption_enabled() ); 665 665 } 666 666 667 static void remove(__cluster_idles& this, processor & proc) {667 static void mark_awake(__cluster_proc_list & this, processor & proc) { 668 668 /* paranoid */ verify( ! __preemption_enabled() ); 669 669 lock( this ); 670 670 this.idle--; 671 671 /* paranoid */ verify( this.idle >= 0 ); 672 673 672 remove(proc); 673 insert_last(this.actives, proc); 674 674 unlock( this ); 675 675 /* paranoid */ verify( ! __preemption_enabled() ); 676 676 } 677 677 678 static [unsigned idle, unsigned total, * processor] query( & __cluster_idles this ) { 678 static [unsigned idle, unsigned total, * processor] query_idles( & __cluster_proc_list this ) { 679 /* paranoid */ verify( ! __preemption_enabled() ); 680 /* paranoid */ verify( ready_schedule_islocked() ); 681 679 682 for() { 680 683 uint64_t l = __atomic_load_n(&this.lock, __ATOMIC_SEQ_CST); … … 682 685 unsigned idle = this.idle; 683 686 unsigned total = this.total; 684 processor * proc = &this. list`first;687 processor * proc = &this.idles`first; 685 688 // Compiler fence is unnecessary, but gcc-8 and older incorrectly reorder code without it 686 689 asm volatile("": : :"memory"); … … 688 691 return [idle, total, proc]; 689 692 } 693 694 /* paranoid */ verify( ready_schedule_islocked() ); 695 /* paranoid */ verify( ! __preemption_enabled() ); 690 696 } 691 697 -
libcfa/src/concurrency/kernel.hfa
r2f5ea69 r8cfa4ef 180 180 181 181 // Idle Sleep 182 struct __cluster_ idles{182 struct __cluster_proc_list { 183 183 // Spin lock protecting the queue 184 184 volatile uint64_t lock; … … 191 191 192 192 // List of idle processors 193 dlist(processor, processor) list; 193 dlist(processor, processor) idles; 194 195 // List of active processors 196 dlist(processor, processor) actives; 194 197 }; 195 198 … … 207 210 208 211 // List of idle processors 209 __cluster_ idles idles;212 __cluster_proc_list procs; 210 213 211 214 // List of threads -
libcfa/src/concurrency/kernel/startup.cfa
r2f5ea69 r8cfa4ef 469 469 this.name = name; 470 470 this.cltr = &_cltr; 471 this.cltr_id = -1u; 471 472 do_terminate = false; 472 473 preemption_alarm = 0p; … … 489 490 #endif 490 491 491 lock( this.cltr->idles ); 492 int target = this.cltr->idles.total += 1u; 493 unlock( this.cltr->idles ); 494 495 id = doregister((__processor_id_t*)&this); 496 492 // Register and Lock the RWlock so no-one pushes/pops while we are changing the queue 493 uint_fast32_t last_size = ready_mutate_register((__processor_id_t*)&this); 494 this.cltr->procs.total += 1u; 495 insert_last(this.cltr->procs.actives, this); 496 497 // Adjust the ready queue size 498 ready_queue_grow( cltr ); 499 500 // Unlock the RWlock 501 ready_mutate_unlock( last_size ); 502 503 __cfadbg_print_safe(runtime_core, "Kernel : core %p created\n", &this); 504 } 505 506 // Not a ctor, it just preps the destruction but should not destroy members 507 static void deinit(processor & this) { 497 508 // Lock the RWlock so no-one pushes/pops while we are changing the queue 498 509 uint_fast32_t last_size = ready_mutate_lock(); 510 this.cltr->procs.total -= 1u; 511 remove(this); 499 512 500 513 // Adjust the ready queue size 501 this.cltr_id = ready_queue_grow( cltr, target ); 502 503 // Unlock the RWlock 504 ready_mutate_unlock( last_size ); 505 506 __cfadbg_print_safe(runtime_core, "Kernel : core %p created\n", &this); 507 } 508 509 // Not a ctor, it just preps the destruction but should not destroy members 510 static void deinit(processor & this) { 511 lock( this.cltr->idles ); 512 int target = this.cltr->idles.total -= 1u; 513 unlock( this.cltr->idles ); 514 515 // Lock the RWlock so no-one pushes/pops while we are changing the queue 516 uint_fast32_t last_size = ready_mutate_lock(); 517 518 // Adjust the ready queue size 519 ready_queue_shrink( this.cltr, target ); 520 521 // Unlock the RWlock 522 ready_mutate_unlock( last_size ); 523 524 // Finally we don't need the read_lock any more 525 unregister((__processor_id_t*)&this); 514 ready_queue_shrink( this.cltr ); 515 516 // Unlock the RWlock and unregister: we don't need the read_lock any more 517 ready_mutate_unregister((__processor_id_t*)&this, last_size ); 526 518 527 519 close(this.idle); … … 566 558 //----------------------------------------------------------------------------- 567 559 // Cluster 568 static void ?{}(__cluster_ idles& this) {560 static void ?{}(__cluster_proc_list & this) { 569 561 this.lock = 0; 570 562 this.idle = 0; 571 563 this.total = 0; 572 (this.list){};573 564 } 574 565 … … 596 587 597 588 // Adjust the ready queue size 598 ready_queue_grow( &this , 0);589 ready_queue_grow( &this ); 599 590 600 591 // Unlock the RWlock … … 611 602 612 603 // Adjust the ready queue size 613 ready_queue_shrink( &this , 0);604 ready_queue_shrink( &this ); 614 605 615 606 // Unlock the RWlock -
libcfa/src/concurrency/kernel_private.hfa
r2f5ea69 r8cfa4ef 83 83 // Cluster lock API 84 84 //======================================================================= 85 // Cells use by the reader writer lock86 // while not generic it only relies on a opaque pointer87 struct __attribute__((aligned(128))) __scheduler_lock_id_t {88 // Spin lock used as the underlying lock89 volatile bool lock;90 91 // Handle pointing to the proc owning this cell92 // Used for allocating cells and debugging93 __processor_id_t * volatile handle;94 95 #ifdef __CFA_WITH_VERIFY__96 // Debug, check if this is owned for reading97 bool owned;98 #endif99 };100 101 static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t));102 103 85 // Lock-Free registering/unregistering of threads 104 86 // Register a processor to a given cluster and get its unique id in return 105 unsigned doregister( struct __processor_id_t * proc);87 void register_proc_id( struct __processor_id_t * ); 106 88 107 89 // Unregister a processor from a given cluster using its id, getting back the original pointer 108 void unregister( struct __processor_id_t * proc ); 109 110 //----------------------------------------------------------------------- 111 // Cluster idle lock/unlock 112 static inline void lock(__cluster_idles & this) { 113 for() { 114 uint64_t l = this.lock; 115 if( 116 (0 == (l % 2)) 117 && __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) 118 ) return; 119 Pause(); 120 } 121 } 122 123 static inline void unlock(__cluster_idles & this) { 124 /* paranoid */ verify( 1 == (this.lock % 2) ); 125 __atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST ); 126 } 90 void unregister_proc_id( struct __processor_id_t * proc ); 127 91 128 92 //======================================================================= … … 152 116 __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE); 153 117 } 118 119 // Cells use by the reader writer lock 120 // while not generic it only relies on a opaque pointer 121 struct __attribute__((aligned(128))) __scheduler_lock_id_t { 122 // Spin lock used as the underlying lock 123 volatile bool lock; 124 125 // Handle pointing to the proc owning this cell 126 // Used for allocating cells and debugging 127 __processor_id_t * volatile handle; 128 129 #ifdef __CFA_WITH_VERIFY__ 130 // Debug, check if this is owned for reading 131 bool owned; 132 #endif 133 }; 134 135 static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t)); 154 136 155 137 //----------------------------------------------------------------------- … … 247 229 void ready_mutate_unlock( uint_fast32_t /* value returned by lock */ ); 248 230 231 //----------------------------------------------------------------------- 232 // Lock-Free registering/unregistering of threads 233 // Register a processor to a given cluster and get its unique id in return 234 // For convenience, also acquires the lock 235 static inline uint_fast32_t ready_mutate_register( struct __processor_id_t * proc ) { 236 register_proc_id( proc ); 237 return ready_mutate_lock(); 238 } 239 240 // Unregister a processor from a given cluster using its id, getting back the original pointer 241 // assumes the lock is acquired 242 static inline void ready_mutate_unregister( struct __processor_id_t * proc, uint_fast32_t last_s ) { 243 ready_mutate_unlock( last_s ); 244 unregister_proc_id( proc ); 245 } 246 247 //----------------------------------------------------------------------- 248 // Cluster idle lock/unlock 249 static inline void lock(__cluster_proc_list & this) { 250 /* paranoid */ verify( ! __preemption_enabled() ); 251 252 // Start by locking the global RWlock so that we know no-one is 253 // adding/removing processors while we mess with the idle lock 254 ready_schedule_lock(); 255 256 // Simple counting lock, acquired, acquired by incrementing the counter 257 // to an odd number 258 for() { 259 uint64_t l = this.lock; 260 if( 261 (0 == (l % 2)) 262 && __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) 263 ) return; 264 Pause(); 265 } 266 267 /* paranoid */ verify( ! __preemption_enabled() ); 268 } 269 270 static inline void unlock(__cluster_proc_list & this) { 271 /* paranoid */ verify( ! __preemption_enabled() ); 272 273 /* paranoid */ verify( 1 == (this.lock % 2) ); 274 // Simple couting lock, release by incrementing to an even number 275 __atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST ); 276 277 // Release the global lock, which we acquired when locking 278 ready_schedule_unlock(); 279 280 /* paranoid */ verify( ! __preemption_enabled() ); 281 } 282 249 283 //======================================================================= 250 284 // Ready-Queue API … … 278 312 //----------------------------------------------------------------------- 279 313 // Increase the width of the ready queue (number of lanes) by 4 280 unsigned ready_queue_grow (struct cluster * cltr, int target);314 void ready_queue_grow (struct cluster * cltr); 281 315 282 316 //----------------------------------------------------------------------- 283 317 // Decrease the width of the ready queue (number of lanes) by 4 284 void ready_queue_shrink(struct cluster * cltr , int target);318 void ready_queue_shrink(struct cluster * cltr); 285 319 286 320 -
libcfa/src/concurrency/preemption.cfa
r2f5ea69 r8cfa4ef 712 712 static void * alarm_loop( __attribute__((unused)) void * args ) { 713 713 __processor_id_t id; 714 id.id = doregister(&id);714 register_proc_id(&id); 715 715 __cfaabi_tls.this_proc_id = &id; 716 716 … … 773 773 EXIT: 774 774 __cfaabi_dbg_print_safe( "Kernel : Preemption thread stopping\n" ); 775 unregister(&id);775 register_proc_id(&id); 776 776 777 777 return 0p; -
libcfa/src/concurrency/ready_queue.cfa
r2f5ea69 r8cfa4ef 94 94 //======================================================================= 95 95 // Lock-Free registering/unregistering of threads 96 unsigned doregister( struct __processor_id_t * proc ) with(*__scheduler_lock) {96 void register_proc_id( struct __processor_id_t * proc ) with(*__scheduler_lock) { 97 97 __cfadbg_print_safe(ready_queue, "Kernel : Registering proc %p for RW-Lock\n", proc); 98 98 … … 108 108 /*paranoid*/ verify(0 == (__alignof__(data[i]) % cache_line_size)); 109 109 /*paranoid*/ verify((((uintptr_t)&data[i]) % cache_line_size) == 0); 110 returni;110 proc->id = i; 111 111 } 112 112 } … … 135 135 /*paranoid*/ verify(__alignof__(data[n]) == (2 * cache_line_size)); 136 136 /*paranoid*/ verify((((uintptr_t)&data[n]) % cache_line_size) == 0); 137 returnn;138 } 139 140 void unregister ( struct __processor_id_t * proc ) with(*__scheduler_lock) {137 proc->id = n; 138 } 139 140 void unregister_proc_id( struct __processor_id_t * proc ) with(*__scheduler_lock) { 141 141 unsigned id = proc->id; 142 142 /*paranoid*/ verify(id < ready); … … 254 254 __attribute__((unused)) int preferred; 255 255 #if defined(BIAS) 256 /* paranoid */ verify(external || kernelTLS().this_processor->cltr_id < lanes.count ); 256 257 preferred = 257 258 //* … … 344 345 int preferred; 345 346 #if defined(BIAS) 346 / / Don't bother trying locally too much347 /* paranoid */ verify(kernelTLS().this_processor->cltr_id < lanes.count ); 347 348 preferred = kernelTLS().this_processor->cltr_id; 348 349 #endif … … 541 542 } 542 543 544 static void assign_list(unsigned & value, const int inc, dlist(processor, processor) & list, unsigned count) { 545 processor * it = &list`first; 546 for(unsigned i = 0; i < count; i++) { 547 /* paranoid */ verifyf( it, "Unexpected null iterator, at index %u of %u\n", i, count); 548 it->cltr_id = value; 549 value += inc; 550 it = &(*it)`next; 551 } 552 } 553 554 static void reassign_cltr_id(struct cluster * cltr, const int inc) { 555 unsigned preferred = 0; 556 assign_list(preferred, inc, cltr->procs.actives, cltr->procs.total - cltr->procs.idle); 557 assign_list(preferred, inc, cltr->procs.idles , cltr->procs.idle ); 558 } 559 543 560 // Grow the ready queue 544 unsigned ready_queue_grow(struct cluster * cltr, int target) { 545 unsigned preferred; 561 void ready_queue_grow(struct cluster * cltr) { 546 562 size_t ncount; 563 int target = cltr->procs.total; 547 564 548 565 /* paranoid */ verify( ready_mutate_islocked() ); … … 562 579 if(target >= 2) { 563 580 ncount = target * 4; 564 preferred = ncount - 4;565 581 } else { 566 582 ncount = 1; 567 preferred = 0;568 583 } 569 584 … … 595 610 } 596 611 612 reassign_cltr_id(cltr, 4); 613 597 614 // Make sure that everything is consistent 598 615 /* paranoid */ check( cltr->ready_queue ); … … 601 618 602 619 /* paranoid */ verify( ready_mutate_islocked() ); 603 return preferred;604 620 } 605 621 606 622 // Shrink the ready queue 607 void ready_queue_shrink(struct cluster * cltr , int target) {623 void ready_queue_shrink(struct cluster * cltr) { 608 624 /* paranoid */ verify( ready_mutate_islocked() ); 609 625 __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue\n"); … … 611 627 // Make sure that everything is consistent 612 628 /* paranoid */ check( cltr->ready_queue ); 629 630 int target = cltr->procs.total; 613 631 614 632 with( cltr->ready_queue ) { … … 679 697 } 680 698 699 reassign_cltr_id(cltr, 4); 700 681 701 // Make sure that everything is consistent 682 702 /* paranoid */ check( cltr->ready_queue );
Note: See TracChangeset
for help on using the changeset viewer.