Changes in / [8cfa4ef:2f5ea69]
- Location:
- libcfa/src/concurrency
- Files:
-
- 6 edited
-
kernel.cfa (modified) (8 diffs)
-
kernel.hfa (modified) (3 diffs)
-
kernel/startup.cfa (modified) (6 diffs)
-
kernel_private.hfa (modified) (4 diffs)
-
preemption.cfa (modified) (2 diffs)
-
ready_queue.cfa (modified) (11 diffs)
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/concurrency/kernel.cfa
r8cfa4ef r2f5ea69 113 113 static void __wake_one(cluster * cltr); 114 114 115 static void mark_idle (__cluster_proc_list& idles, processor & proc);116 static void mark_awake(__cluster_proc_list& idles, processor & proc);117 static [unsigned idle, unsigned total, * processor] query _idles( & __cluster_proc_listidles );115 static void push (__cluster_idles & idles, processor & proc); 116 static void remove(__cluster_idles & idles, processor & proc); 117 static [unsigned idle, unsigned total, * processor] query( & __cluster_idles idles ); 118 118 119 119 extern void __cfa_io_start( processor * ); … … 189 189 190 190 // Push self to idle stack 191 mark_idle(this->cltr->procs, * this);191 push(this->cltr->idles, * this); 192 192 193 193 // Confirm the ready-queue is empty … … 195 195 if( readyThread ) { 196 196 // A thread was found, cancel the halt 197 mark_awake(this->cltr->procs, * this);197 remove(this->cltr->idles, * this); 198 198 199 199 #if !defined(__CFA_NO_STATISTICS__) … … 225 225 226 226 // We were woken up, remove self from idle 227 mark_awake(this->cltr->procs, * this);227 remove(this->cltr->idles, * this); 228 228 229 229 // DON'T just proceed, start looking again … … 617 617 unsigned idle; 618 618 unsigned total; 619 [idle, total, p] = query _idles(this->procs);619 [idle, total, p] = query(this->idles); 620 620 621 621 // If no one is sleeping, we are done … … 654 654 } 655 655 656 static void mark_idle(__cluster_proc_list& this, processor & proc) {656 static void push (__cluster_idles & this, processor & proc) { 657 657 /* paranoid */ verify( ! __preemption_enabled() ); 658 658 lock( this ); 659 659 this.idle++; 660 660 /* paranoid */ verify( this.idle <= this.total ); 661 remove(proc); 662 insert_first(this. idles, proc);661 662 insert_first(this.list, proc); 663 663 unlock( this ); 664 664 /* paranoid */ verify( ! __preemption_enabled() ); 665 665 } 666 666 667 static void mark_awake(__cluster_proc_list& this, processor & proc) {667 static void remove(__cluster_idles & this, processor & proc) { 668 668 /* paranoid */ verify( ! __preemption_enabled() ); 669 669 lock( this ); 670 670 this.idle--; 671 671 /* paranoid */ verify( this.idle >= 0 ); 672 672 673 remove(proc); 673 insert_last(this.actives, proc);674 674 unlock( this ); 675 675 /* paranoid */ verify( ! __preemption_enabled() ); 676 676 } 677 677 678 static [unsigned idle, unsigned total, * processor] query_idles( & __cluster_proc_list this ) { 679 /* paranoid */ verify( ! __preemption_enabled() ); 680 /* paranoid */ verify( ready_schedule_islocked() ); 681 678 static [unsigned idle, unsigned total, * processor] query( & __cluster_idles this ) { 682 679 for() { 683 680 uint64_t l = __atomic_load_n(&this.lock, __ATOMIC_SEQ_CST); … … 685 682 unsigned idle = this.idle; 686 683 unsigned total = this.total; 687 processor * proc = &this. idles`first;684 processor * proc = &this.list`first; 688 685 // Compiler fence is unnecessary, but gcc-8 and older incorrectly reorder code without it 689 686 asm volatile("": : :"memory"); … … 691 688 return [idle, total, proc]; 692 689 } 693 694 /* paranoid */ verify( ready_schedule_islocked() );695 /* paranoid */ verify( ! __preemption_enabled() );696 690 } 697 691 -
libcfa/src/concurrency/kernel.hfa
r8cfa4ef r2f5ea69 180 180 181 181 // Idle Sleep 182 struct __cluster_ proc_list{182 struct __cluster_idles { 183 183 // Spin lock protecting the queue 184 184 volatile uint64_t lock; … … 191 191 192 192 // List of idle processors 193 dlist(processor, processor) idles; 194 195 // List of active processors 196 dlist(processor, processor) actives; 193 dlist(processor, processor) list; 197 194 }; 198 195 … … 210 207 211 208 // List of idle processors 212 __cluster_ proc_list procs;209 __cluster_idles idles; 213 210 214 211 // List of threads -
libcfa/src/concurrency/kernel/startup.cfa
r8cfa4ef r2f5ea69 469 469 this.name = name; 470 470 this.cltr = &_cltr; 471 this.cltr_id = -1u;472 471 do_terminate = false; 473 472 preemption_alarm = 0p; … … 490 489 #endif 491 490 492 // Register and Lock the RWlock so no-one pushes/pops while we are changing the queue 493 uint_fast32_t last_size = ready_mutate_register((__processor_id_t*)&this); 494 this.cltr->procs.total += 1u; 495 insert_last(this.cltr->procs.actives, this); 491 lock( this.cltr->idles ); 492 int target = this.cltr->idles.total += 1u; 493 unlock( this.cltr->idles ); 494 495 id = doregister((__processor_id_t*)&this); 496 497 // Lock the RWlock so no-one pushes/pops while we are changing the queue 498 uint_fast32_t last_size = ready_mutate_lock(); 496 499 497 500 // Adjust the ready queue size 498 ready_queue_grow( cltr);501 this.cltr_id = ready_queue_grow( cltr, target ); 499 502 500 503 // Unlock the RWlock … … 506 509 // Not a ctor, it just preps the destruction but should not destroy members 507 510 static void deinit(processor & this) { 511 lock( this.cltr->idles ); 512 int target = this.cltr->idles.total -= 1u; 513 unlock( this.cltr->idles ); 514 508 515 // Lock the RWlock so no-one pushes/pops while we are changing the queue 509 516 uint_fast32_t last_size = ready_mutate_lock(); 510 this.cltr->procs.total -= 1u;511 remove(this);512 517 513 518 // Adjust the ready queue size 514 ready_queue_shrink( this.cltr ); 515 516 // Unlock the RWlock and unregister: we don't need the read_lock any more 517 ready_mutate_unregister((__processor_id_t*)&this, last_size ); 519 ready_queue_shrink( this.cltr, target ); 520 521 // Unlock the RWlock 522 ready_mutate_unlock( last_size ); 523 524 // Finally we don't need the read_lock any more 525 unregister((__processor_id_t*)&this); 518 526 519 527 close(this.idle); … … 558 566 //----------------------------------------------------------------------------- 559 567 // Cluster 560 static void ?{}(__cluster_ proc_list& this) {568 static void ?{}(__cluster_idles & this) { 561 569 this.lock = 0; 562 570 this.idle = 0; 563 571 this.total = 0; 572 (this.list){}; 564 573 } 565 574 … … 587 596 588 597 // Adjust the ready queue size 589 ready_queue_grow( &this );598 ready_queue_grow( &this, 0 ); 590 599 591 600 // Unlock the RWlock … … 602 611 603 612 // Adjust the ready queue size 604 ready_queue_shrink( &this );613 ready_queue_shrink( &this, 0 ); 605 614 606 615 // Unlock the RWlock -
libcfa/src/concurrency/kernel_private.hfa
r8cfa4ef r2f5ea69 83 83 // Cluster lock API 84 84 //======================================================================= 85 // Cells use by the reader writer lock 86 // while not generic it only relies on a opaque pointer 87 struct __attribute__((aligned(128))) __scheduler_lock_id_t { 88 // Spin lock used as the underlying lock 89 volatile bool lock; 90 91 // Handle pointing to the proc owning this cell 92 // Used for allocating cells and debugging 93 __processor_id_t * volatile handle; 94 95 #ifdef __CFA_WITH_VERIFY__ 96 // Debug, check if this is owned for reading 97 bool owned; 98 #endif 99 }; 100 101 static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t)); 102 85 103 // Lock-Free registering/unregistering of threads 86 104 // Register a processor to a given cluster and get its unique id in return 87 void register_proc_id( struct __processor_id_t *);105 unsigned doregister( struct __processor_id_t * proc ); 88 106 89 107 // Unregister a processor from a given cluster using its id, getting back the original pointer 90 void unregister_proc_id( struct __processor_id_t * proc ); 108 void unregister( struct __processor_id_t * proc ); 109 110 //----------------------------------------------------------------------- 111 // Cluster idle lock/unlock 112 static inline void lock(__cluster_idles & this) { 113 for() { 114 uint64_t l = this.lock; 115 if( 116 (0 == (l % 2)) 117 && __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) 118 ) return; 119 Pause(); 120 } 121 } 122 123 static inline void unlock(__cluster_idles & this) { 124 /* paranoid */ verify( 1 == (this.lock % 2) ); 125 __atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST ); 126 } 91 127 92 128 //======================================================================= … … 116 152 __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE); 117 153 } 118 119 // Cells use by the reader writer lock120 // while not generic it only relies on a opaque pointer121 struct __attribute__((aligned(128))) __scheduler_lock_id_t {122 // Spin lock used as the underlying lock123 volatile bool lock;124 125 // Handle pointing to the proc owning this cell126 // Used for allocating cells and debugging127 __processor_id_t * volatile handle;128 129 #ifdef __CFA_WITH_VERIFY__130 // Debug, check if this is owned for reading131 bool owned;132 #endif133 };134 135 static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t));136 154 137 155 //----------------------------------------------------------------------- … … 229 247 void ready_mutate_unlock( uint_fast32_t /* value returned by lock */ ); 230 248 231 //-----------------------------------------------------------------------232 // Lock-Free registering/unregistering of threads233 // Register a processor to a given cluster and get its unique id in return234 // For convenience, also acquires the lock235 static inline uint_fast32_t ready_mutate_register( struct __processor_id_t * proc ) {236 register_proc_id( proc );237 return ready_mutate_lock();238 }239 240 // Unregister a processor from a given cluster using its id, getting back the original pointer241 // assumes the lock is acquired242 static inline void ready_mutate_unregister( struct __processor_id_t * proc, uint_fast32_t last_s ) {243 ready_mutate_unlock( last_s );244 unregister_proc_id( proc );245 }246 247 //-----------------------------------------------------------------------248 // Cluster idle lock/unlock249 static inline void lock(__cluster_proc_list & this) {250 /* paranoid */ verify( ! __preemption_enabled() );251 252 // Start by locking the global RWlock so that we know no-one is253 // adding/removing processors while we mess with the idle lock254 ready_schedule_lock();255 256 // Simple counting lock, acquired, acquired by incrementing the counter257 // to an odd number258 for() {259 uint64_t l = this.lock;260 if(261 (0 == (l % 2))262 && __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)263 ) return;264 Pause();265 }266 267 /* paranoid */ verify( ! __preemption_enabled() );268 }269 270 static inline void unlock(__cluster_proc_list & this) {271 /* paranoid */ verify( ! __preemption_enabled() );272 273 /* paranoid */ verify( 1 == (this.lock % 2) );274 // Simple couting lock, release by incrementing to an even number275 __atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST );276 277 // Release the global lock, which we acquired when locking278 ready_schedule_unlock();279 280 /* paranoid */ verify( ! __preemption_enabled() );281 }282 283 249 //======================================================================= 284 250 // Ready-Queue API … … 312 278 //----------------------------------------------------------------------- 313 279 // Increase the width of the ready queue (number of lanes) by 4 314 void ready_queue_grow (struct cluster * cltr);280 unsigned ready_queue_grow (struct cluster * cltr, int target); 315 281 316 282 //----------------------------------------------------------------------- 317 283 // Decrease the width of the ready queue (number of lanes) by 4 318 void ready_queue_shrink(struct cluster * cltr );284 void ready_queue_shrink(struct cluster * cltr, int target); 319 285 320 286 -
libcfa/src/concurrency/preemption.cfa
r8cfa4ef r2f5ea69 712 712 static void * alarm_loop( __attribute__((unused)) void * args ) { 713 713 __processor_id_t id; 714 register_proc_id(&id);714 id.id = doregister(&id); 715 715 __cfaabi_tls.this_proc_id = &id; 716 716 … … 773 773 EXIT: 774 774 __cfaabi_dbg_print_safe( "Kernel : Preemption thread stopping\n" ); 775 register_proc_id(&id);775 unregister(&id); 776 776 777 777 return 0p; -
libcfa/src/concurrency/ready_queue.cfa
r8cfa4ef r2f5ea69 94 94 //======================================================================= 95 95 // Lock-Free registering/unregistering of threads 96 void register_proc_id( struct __processor_id_t * proc ) with(*__scheduler_lock) {96 unsigned doregister( struct __processor_id_t * proc ) with(*__scheduler_lock) { 97 97 __cfadbg_print_safe(ready_queue, "Kernel : Registering proc %p for RW-Lock\n", proc); 98 98 … … 108 108 /*paranoid*/ verify(0 == (__alignof__(data[i]) % cache_line_size)); 109 109 /*paranoid*/ verify((((uintptr_t)&data[i]) % cache_line_size) == 0); 110 proc->id =i;110 return i; 111 111 } 112 112 } … … 135 135 /*paranoid*/ verify(__alignof__(data[n]) == (2 * cache_line_size)); 136 136 /*paranoid*/ verify((((uintptr_t)&data[n]) % cache_line_size) == 0); 137 proc->id =n;138 } 139 140 void unregister _proc_id( struct __processor_id_t * proc ) with(*__scheduler_lock) {137 return n; 138 } 139 140 void unregister( struct __processor_id_t * proc ) with(*__scheduler_lock) { 141 141 unsigned id = proc->id; 142 142 /*paranoid*/ verify(id < ready); … … 254 254 __attribute__((unused)) int preferred; 255 255 #if defined(BIAS) 256 /* paranoid */ verify(external || kernelTLS().this_processor->cltr_id < lanes.count );257 256 preferred = 258 257 //* … … 345 344 int preferred; 346 345 #if defined(BIAS) 347 / * paranoid */ verify(kernelTLS().this_processor->cltr_id < lanes.count );346 // Don't bother trying locally too much 348 347 preferred = kernelTLS().this_processor->cltr_id; 349 348 #endif … … 542 541 } 543 542 544 static void assign_list(unsigned & value, const int inc, dlist(processor, processor) & list, unsigned count) {545 processor * it = &list`first;546 for(unsigned i = 0; i < count; i++) {547 /* paranoid */ verifyf( it, "Unexpected null iterator, at index %u of %u\n", i, count);548 it->cltr_id = value;549 value += inc;550 it = &(*it)`next;551 }552 }553 554 static void reassign_cltr_id(struct cluster * cltr, const int inc) {555 unsigned preferred = 0;556 assign_list(preferred, inc, cltr->procs.actives, cltr->procs.total - cltr->procs.idle);557 assign_list(preferred, inc, cltr->procs.idles , cltr->procs.idle );558 }559 560 543 // Grow the ready queue 561 void ready_queue_grow(struct cluster * cltr) { 544 unsigned ready_queue_grow(struct cluster * cltr, int target) { 545 unsigned preferred; 562 546 size_t ncount; 563 int target = cltr->procs.total;564 547 565 548 /* paranoid */ verify( ready_mutate_islocked() ); … … 579 562 if(target >= 2) { 580 563 ncount = target * 4; 564 preferred = ncount - 4; 581 565 } else { 582 566 ncount = 1; 567 preferred = 0; 583 568 } 584 569 … … 610 595 } 611 596 612 reassign_cltr_id(cltr, 4);613 614 597 // Make sure that everything is consistent 615 598 /* paranoid */ check( cltr->ready_queue ); … … 618 601 619 602 /* paranoid */ verify( ready_mutate_islocked() ); 603 return preferred; 620 604 } 621 605 622 606 // Shrink the ready queue 623 void ready_queue_shrink(struct cluster * cltr ) {607 void ready_queue_shrink(struct cluster * cltr, int target) { 624 608 /* paranoid */ verify( ready_mutate_islocked() ); 625 609 __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue\n"); … … 627 611 // Make sure that everything is consistent 628 612 /* paranoid */ check( cltr->ready_queue ); 629 630 int target = cltr->procs.total;631 613 632 614 with( cltr->ready_queue ) { … … 697 679 } 698 680 699 reassign_cltr_id(cltr, 4);700 701 681 // Make sure that everything is consistent 702 682 /* paranoid */ check( cltr->ready_queue );
Note:
See TracChangeset
for help on using the changeset viewer.