Changeset 13c5e19
- Timestamp:
- Jun 23, 2020, 4:42:58 PM (5 years ago)
- Branches:
- ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast, new-ast-unique-expr, pthread-emulation, qualifiedEnum
- Children:
- de917da3
- Parents:
- b232745
- Location:
- libcfa/src
- Files:
-
- 3 added
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/concurrency/invoke.h
rb232745 r13c5e19 57 57 } preemption_state; 58 58 59 uint32_t rand_seed;59 __uint128_t rand_seed; 60 60 } kernelTLS __attribute__ ((tls_model ( "initial-exec" ))); 61 61 } -
libcfa/src/concurrency/io.cfa
rb232745 r13c5e19 167 167 struct { 168 168 struct { 169 __processor_id_t id; 169 170 void * stack; 170 171 pthread_t kthrd; … … 334 335 if( this.io->cltr_flags & CFA_CLUSTER_IO_POLLER_USER_THREAD ) { 335 336 with( this.io->poller.fast ) { 336 /* paranoid */ verify( this. procs.head == 0p|| &this == mainCluster );337 /* paranoid */ verify( this.idles.head == 0p || &this == mainCluster);337 /* paranoid */ verify( this.nprocessors == 0 || &this == mainCluster ); 338 /* paranoid */ verify( !ready_mutate_islocked() ); 338 339 339 340 // We need to adjust the clean-up based on where the thread is 340 341 if( thrd.state == Ready || thrd.preempted != __NO_PREEMPTION ) { 341 342 342 // This is the tricky case 343 // The thread was preempted and now it is on the ready queue 344 345 /* paranoid */ verify( thrd.next != 0p ); // The thread should be the last on the list 346 /* paranoid */ verify( this.ready_queue.head == &thrd ); // The thread should be the only thing on the list 347 348 // Remove the thread from the ready queue of this cluster 349 this.ready_queue.head = 1p; 350 thrd.next = 0p; 351 __cfaabi_dbg_debug_do( thrd.unpark_stale = true ); 352 353 // Fixup the thread state 354 thrd.state = Blocked; 355 thrd.preempted = __NO_PREEMPTION; 343 ready_schedule_lock( (struct __processor_id_t *)active_processor() ); 344 345 // This is the tricky case 346 // The thread was preempted and now it is on the ready queue 347 // The thread should be the last on the list 348 /* paranoid */ verify( thrd.link.next != 0p ); 349 350 // Remove the thread from the ready queue of this cluster 351 __attribute__((unused)) bool removed = remove_head( &this, &thrd ); 352 /* paranoid */ verify( removed ); 353 thrd.link.next = 0p; 354 thrd.link.prev = 0p; 355 __cfaabi_dbg_debug_do( thrd.unpark_stale = true ); 356 357 // Fixup the thread state 358 thrd.state = Blocked; 359 thrd.ticket = 0; 360 thrd.preempted = __NO_PREEMPTION; 361 362 ready_schedule_unlock( (struct __processor_id_t *)active_processor() ); 356 363 357 364 // Pretend like the thread was blocked all along … … 365 372 thrd.curr_cluster = active_cluster(); 366 373 367 // unpark the fast io_poller374 // unpark the fast io_poller 368 375 unpark( &thrd __cfaabi_dbg_ctx2 ); 369 376 } … … 458 465 } 459 466 460 verify( (shead + ret) == *ring.submit_q.head ); 467 uint32_t nhead = *ring.submit_q.head; 468 verifyf( (shead + ret) == nhead, "Expected %u got %u\n", (shead + ret), nhead ); 461 469 462 470 // Release the consumed SQEs … … 474 482 // update statistics 475 483 #if !defined(__CFA_NO_STATISTICS__) 476 __tls_stats()->io.submit_q.s tats.submit_avg.rdy += to_submit;477 __tls_stats()->io.submit_q.s tats.submit_avg.csm += ret;478 __tls_stats()->io.submit_q.s tats.submit_avg.avl += avail;479 __tls_stats()->io.submit_q.s tats.submit_avg.cnt += 1;484 __tls_stats()->io.submit_q.submit_avg.rdy += to_submit; 485 __tls_stats()->io.submit_q.submit_avg.csm += ret; 486 __tls_stats()->io.submit_q.submit_avg.avl += avail; 487 __tls_stats()->io.submit_q.submit_avg.cnt += 1; 480 488 #endif 481 489 … … 505 513 data->result = cqe.res; 506 514 if(!in_kernel) { unpark( data->thrd __cfaabi_dbg_ctx2 ); } 507 else { __unpark( data->thrd __cfaabi_dbg_ctx2 ); }515 else { __unpark( &ring.poller.slow.id, data->thrd __cfaabi_dbg_ctx2 ); } 508 516 } 509 517 … … 520 528 521 529 static void * __io_poller_slow( void * arg ) { 530 #if !defined( __CFA_NO_STATISTICS__ ) 531 __stats_t local_stats; 532 __init_stats( &local_stats ); 533 kernelTLS.this_stats = &local_stats; 534 #endif 535 522 536 cluster * cltr = (cluster *)arg; 523 537 struct __io_data & ring = *cltr->io; 538 539 ring.poller.slow.id.id = doregister( &ring.poller.slow.id ); 524 540 525 541 sigset_t mask; … … 551 567 // Update statistics 552 568 #if !defined(__CFA_NO_STATISTICS__) 553 __tls_stats()->io.complete_q. stats.completed_avg.val += count;554 __tls_stats()->io.complete_q. stats.completed_avg.slow_cnt += 1;569 __tls_stats()->io.complete_q.completed_avg.val += count; 570 __tls_stats()->io.complete_q.completed_avg.slow_cnt += 1; 555 571 #endif 556 572 557 573 if(again) { 558 574 __cfadbg_print_safe(io_core, "Kernel I/O : Moving to ring %p to fast poller\n", &ring); 559 __unpark( &ring.poller. fast.thrd __cfaabi_dbg_ctx2 );575 __unpark( &ring.poller.slow.id, &ring.poller.fast.thrd __cfaabi_dbg_ctx2 ); 560 576 wait( ring.poller.sem ); 561 577 } … … 571 587 // Update statistics 572 588 #if !defined(__CFA_NO_STATISTICS__) 573 __tls_stats()->io.complete_q. stats.completed_avg.val += count;574 __tls_stats()->io.complete_q. stats.completed_avg.slow_cnt += 1;589 __tls_stats()->io.complete_q.completed_avg.val += count; 590 __tls_stats()->io.complete_q.completed_avg.slow_cnt += 1; 575 591 #endif 576 592 } … … 578 594 579 595 __cfadbg_print_safe(io_core, "Kernel I/O : Slow poller for ring %p stopping\n", &ring); 596 597 unregister( &ring.poller.slow.id ); 580 598 581 599 return 0p; … … 598 616 int count; 599 617 bool again; 600 [count, again] = __drain_io( *this.ring, 0p, 0, false ); 601 602 if(!again) reset++; 603 604 // Update statistics 605 #if !defined(__CFA_NO_STATISTICS__) 606 __tls_stats()->io.complete_q.stats.completed_avg.val += count; 607 __tls_stats()->io.complete_q.stats.completed_avg.fast_cnt += 1; 608 #endif 618 disable_interrupts(); 619 [count, again] = __drain_io( *this.ring, 0p, 0, false ); 620 621 if(!again) reset++; 622 623 // Update statistics 624 #if !defined(__CFA_NO_STATISTICS__) 625 __tls_stats()->io.complete_q.completed_avg.val += count; 626 __tls_stats()->io.complete_q.completed_avg.fast_cnt += 1; 627 #endif 628 enable_interrupts( __cfaabi_dbg_ctx ); 609 629 610 630 // If we got something, just yield and check again … … 667 687 verify( data != 0 ); 668 688 689 disable_interrupts(); 690 669 691 // Prepare the data we need 670 692 __attribute((unused)) int len = 0; … … 675 697 676 698 // Loop around looking for an available spot 677 LOOKING:for() {699 for() { 678 700 // Look through the list starting at some offset 679 701 for(i; cnt) { … … 688 710 // update statistics 689 711 #if !defined(__CFA_NO_STATISTICS__) 690 __tls_stats()->io.submit_q. stats.alloc_avg.val += len;691 __tls_stats()->io.submit_q. stats.alloc_avg.block += block;692 __tls_stats()->io.submit_q. stats.alloc_avg.cnt += 1;712 __tls_stats()->io.submit_q.alloc_avg.val += len; 713 __tls_stats()->io.submit_q.alloc_avg.block += block; 714 __tls_stats()->io.submit_q.alloc_avg.cnt += 1; 693 715 #endif 716 717 enable_interrupts( __cfaabi_dbg_ctx ); 694 718 695 719 // Success return the data … … 710 734 uint32_t * const tail = ring.submit_q.tail; 711 735 const uint32_t mask = *ring.submit_q.mask; 736 737 disable_interrupts(); 712 738 713 739 // There are 2 submission schemes, check which one we are using … … 743 769 // update statistics 744 770 #if !defined(__CFA_NO_STATISTICS__) 745 __tls_stats()->io.submit_q. stats.look_avg.val += len;746 __tls_stats()->io.submit_q. stats.look_avg.block += block;747 __tls_stats()->io.submit_q. stats.look_avg.cnt += 1;771 __tls_stats()->io.submit_q.look_avg.val += len; 772 __tls_stats()->io.submit_q.look_avg.block += block; 773 __tls_stats()->io.submit_q.look_avg.cnt += 1; 748 774 #endif 749 775 … … 772 798 // update statistics 773 799 #if !defined(__CFA_NO_STATISTICS__) 774 __tls_stats()->io.submit_q.s tats.submit_avg.csm += 1;775 __tls_stats()->io.submit_q.s tats.submit_avg.cnt += 1;800 __tls_stats()->io.submit_q.submit_avg.csm += 1; 801 __tls_stats()->io.submit_q.submit_avg.cnt += 1; 776 802 #endif 777 803 … … 780 806 __cfadbg_print_safe( io, "Kernel I/O : Performed io_submit for %p, returned %d\n", active_thread(), ret ); 781 807 } 808 809 enable_interrupts( __cfaabi_dbg_ctx ); 782 810 } 783 811 -
libcfa/src/concurrency/kernel.cfa
rb232745 r13c5e19 229 229 static void * __invoke_processor(void * arg); 230 230 231 void ?{}(processor & this, const char name[], cluster & cltr) with( this ) {231 void ?{}(processor & this, const char name[], cluster & _cltr) with( this ) { 232 232 this.name = name; 233 this.cltr = & cltr;233 this.cltr = &_cltr; 234 234 id = -1u; 235 235 terminated{ 0 }; … … 245 245 246 246 this.stack = __create_pthread( &this.kernel_thread, __invoke_processor, (void *)&this ); 247 __atomic_fetch_add( &cltr->nprocessors, 1u, __ATOMIC_SEQ_CST ); 247 248 248 249 __cfadbg_print_safe(runtime_core, "Kernel : core %p created\n", &this); … … 264 265 265 266 free( this.stack ); 267 268 __atomic_fetch_sub( &cltr->nprocessors, 1u, __ATOMIC_SEQ_CST ); 266 269 } 267 270 … … 269 272 this.name = name; 270 273 this.preemption_rate = preemption_rate; 274 this.nprocessors = 0; 271 275 ready_queue{}; 272 276 … … 324 328 } 325 329 326 doregister(this->cltr, this);327 328 330 { 329 331 // Setup preemption data … … 357 359 __cfadbg_print_safe(runtime_core, "Kernel : core %p stopping\n", this); 358 360 } 359 360 unregister(this->cltr, this);361 361 362 362 V( this->terminated ); … … 845 845 runner{ &this }; 846 846 __cfadbg_print_safe(runtime_core, "Kernel : constructed main processor context %p\n", &runner); 847 848 __atomic_fetch_add( &cltr->nprocessors, 1u, __ATOMIC_SEQ_CST ); 847 849 } 848 850 … … 918 920 void ^?{}(processor & this) with( this ){ 919 921 /* paranoid */ verify( this.do_terminate == true ); 922 __atomic_fetch_sub( &cltr->nprocessors, 1u, __ATOMIC_SEQ_CST ); 920 923 __cfaabi_dbg_print_safe("Kernel : destroyed main processor context %p\n", &runner); 921 924 } … … 1150 1153 } 1151 1154 1152 void doregister( cluster * cltr, processor * proc ) {1153 // lock (cltr->idle_lock __cfaabi_dbg_ctx2);1154 // cltr->nprocessors += 1;1155 // push_front(cltr->procs, *proc);1156 // unlock (cltr->idle_lock);1157 }1158 1159 void unregister( cluster * cltr, processor * proc ) {1160 // lock (cltr->idle_lock __cfaabi_dbg_ctx2);1161 // remove(cltr->procs, *proc );1162 // cltr->nprocessors -= 1;1163 // unlock(cltr->idle_lock);1164 }1165 1166 1155 //----------------------------------------------------------------------------- 1167 1156 // Debug -
libcfa/src/concurrency/kernel.hfa
rb232745 r13c5e19 186 186 // List of idle processors 187 187 StackLF(processor) idles; 188 unsigned int nprocessors;188 volatile unsigned int nprocessors; 189 189 190 190 // List of threads -
libcfa/src/concurrency/kernel_private.hfa
rb232745 r13c5e19 22 22 #include "stats.hfa" 23 23 24 #include "bits/random.hfa" 25 24 26 25 27 //----------------------------------------------------------------------------- … … 89 91 #define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)] 90 92 91 static inline uint32_t __tls_rand() { 92 kernelTLS.rand_seed ^= kernelTLS.rand_seed << 6; 93 kernelTLS.rand_seed ^= kernelTLS.rand_seed >> 21; 94 kernelTLS.rand_seed ^= kernelTLS.rand_seed << 7; 95 return kernelTLS.rand_seed; 93 static inline uint64_t __tls_rand() { 94 // kernelTLS.rand_seed ^= kernelTLS.rand_seed << 6; 95 // kernelTLS.rand_seed ^= kernelTLS.rand_seed >> 21; 96 // kernelTLS.rand_seed ^= kernelTLS.rand_seed << 7; 97 // return kernelTLS.rand_seed; 98 return __lehmer64( kernelTLS.rand_seed ); 96 99 } 97 100 … … 102 105 void doregister( struct cluster * cltr, struct $thread & thrd ); 103 106 void unregister( struct cluster * cltr, struct $thread & thrd ); 104 105 void doregister( struct cluster * cltr, struct processor * proc );106 void unregister( struct cluster * cltr, struct processor * proc );107 107 108 108 //======================================================================= … … 264 264 265 265 //----------------------------------------------------------------------- 266 // remove thread from the ready queue of a cluster 267 // returns bool if it wasn't found 268 bool remove_head(struct cluster * cltr, struct $thread * thrd); 269 270 //----------------------------------------------------------------------- 266 271 // Increase the width of the ready queue (number of lanes) by 4 267 272 void ready_queue_grow (struct cluster * cltr); -
libcfa/src/concurrency/ready_queue.cfa
rb232745 r13c5e19 26 26 #include <unistd.h> 27 27 28 #include "snzi.hfa" 29 #include "ready_subqueue.hfa" 30 28 31 static const size_t cache_line_size = 64; 29 32 … … 34 37 #endif 35 38 36 #define BIAS 6439 #define BIAS 8 37 40 38 41 // returns the maximum number of processors the RWLock support … … 180 183 181 184 //======================================================================= 182 // Intrusive Queue used by ready queue185 // Cforall Reqdy Queue used for scheduling 183 186 //======================================================================= 184 // Intrusives lanes which are used by the relaxed ready queue185 struct __attribute__((aligned(128))) __intrusive_lane_t {186 // spin lock protecting the queue187 volatile bool lock;188 189 // anchor for the head and the tail of the queue190 struct __sentinel_t {191 // Link lists fields192 // instrusive link field for threads193 // must be exactly as in $thread194 __thread_desc_link link;195 } before, after;196 197 // Optional statistic counters198 #if !defined(__CFA_NO_SCHED_STATS__)199 struct __attribute__((aligned(64))) {200 // difference between number of push and pops201 ssize_t diff;202 203 // total number of pushes and pops204 size_t push;205 size_t pop ;206 } stat;207 #endif208 };209 210 void ?{}(__intrusive_lane_t & this);211 void ^?{}(__intrusive_lane_t & this);212 213 // Get the head pointer (one before the first element) from the anchor214 static inline $thread * head(const __intrusive_lane_t & this) {215 $thread * rhead = ($thread *)(216 (uintptr_t)( &this.before ) - offsetof( $thread, link )217 );218 /* paranoid */ verify(rhead);219 return rhead;220 }221 222 // Get the tail pointer (one after the last element) from the anchor223 static inline $thread * tail(const __intrusive_lane_t & this) {224 $thread * rtail = ($thread *)(225 (uintptr_t)( &this.after ) - offsetof( $thread, link )226 );227 /* paranoid */ verify(rtail);228 return rtail;229 }230 231 // Ctor232 void ?{}( __intrusive_lane_t & this ) {233 this.lock = false;234 235 this.before.link.prev = 0p;236 this.before.link.next = tail(this);237 this.before.link.ts = 0;238 239 this.after .link.prev = head(this);240 this.after .link.next = 0p;241 this.after .link.ts = 0;242 243 #if !defined(__CFA_NO_SCHED_STATS__)244 this.stat.diff = 0;245 this.stat.push = 0;246 this.stat.pop = 0;247 #endif248 249 // We add a boat-load of assertions here because the anchor code is very fragile250 /* paranoid */ verify(((uintptr_t)( head(this) ) + offsetof( $thread, link )) == (uintptr_t)(&this.before));251 /* paranoid */ verify(((uintptr_t)( tail(this) ) + offsetof( $thread, link )) == (uintptr_t)(&this.after ));252 /* paranoid */ verify(head(this)->link.prev == 0p );253 /* paranoid */ verify(head(this)->link.next == tail(this) );254 /* paranoid */ verify(tail(this)->link.next == 0p );255 /* paranoid */ verify(tail(this)->link.prev == head(this) );256 /* paranoid */ verify(&head(this)->link.prev == &this.before.link.prev );257 /* paranoid */ verify(&head(this)->link.next == &this.before.link.next );258 /* paranoid */ verify(&tail(this)->link.prev == &this.after .link.prev );259 /* paranoid */ verify(&tail(this)->link.next == &this.after .link.next );260 /* paranoid */ verify(sizeof(__intrusive_lane_t) == 128);261 /* paranoid */ verify(sizeof(this) == 128);262 /* paranoid */ verify(__alignof__(__intrusive_lane_t) == 128);263 /* paranoid */ verify(__alignof__(this) == 128);264 /* paranoid */ verifyf(((intptr_t)(&this) % 128) == 0, "Expected address to be aligned %p %% 128 == %zd", &this, ((intptr_t)(&this) % 128));265 }266 267 // Dtor is trivial268 void ^?{}( __intrusive_lane_t & this ) {269 // Make sure the list is empty270 /* paranoid */ verify(head(this)->link.prev == 0p );271 /* paranoid */ verify(head(this)->link.next == tail(this) );272 /* paranoid */ verify(tail(this)->link.next == 0p );273 /* paranoid */ verify(tail(this)->link.prev == head(this) );274 }275 276 // Push a thread onto this lane277 // returns true of lane was empty before push, false otherwise278 bool push(__intrusive_lane_t & this, $thread * node) {279 #if defined(__CFA_WITH_VERIFY__)280 /* paranoid */ verify(this.lock);281 /* paranoid */ verify(node->link.ts != 0);282 /* paranoid */ verify(node->link.next == 0p);283 /* paranoid */ verify(node->link.prev == 0p);284 /* paranoid */ verify(tail(this)->link.next == 0p);285 /* paranoid */ verify(head(this)->link.prev == 0p);286 287 if(this.before.link.ts == 0l) {288 /* paranoid */ verify(tail(this)->link.prev == head(this));289 /* paranoid */ verify(head(this)->link.next == tail(this));290 } else {291 /* paranoid */ verify(tail(this)->link.prev != head(this));292 /* paranoid */ verify(head(this)->link.next != tail(this));293 }294 #endif295 296 // Get the relevant nodes locally297 $thread * tail = tail(this);298 $thread * prev = tail->link.prev;299 300 // Do the push301 node->link.next = tail;302 node->link.prev = prev;303 prev->link.next = node;304 tail->link.prev = node;305 306 // Update stats307 #if !defined(__CFA_NO_SCHED_STATS__)308 this.stat.diff++;309 this.stat.push++;310 #endif311 312 verify(node->link.next == tail(this));313 314 // Check if the queue used to be empty315 if(this.before.link.ts == 0l) {316 this.before.link.ts = node->link.ts;317 /* paranoid */ verify(node->link.prev == head(this));318 return true;319 }320 return false;321 }322 323 // Pop a thread from this lane (must be non-empty)324 // returns popped325 // returns true of lane was empty before push, false otherwise326 [$thread *, bool] pop(__intrusive_lane_t & this) {327 /* paranoid */ verify(this.lock);328 /* paranoid */ verify(this.before.link.ts != 0ul);329 330 // Get anchors locally331 $thread * head = head(this);332 $thread * tail = tail(this);333 334 // Get the relevant nodes locally335 $thread * node = head->link.next;336 $thread * next = node->link.next;337 338 /* paranoid */ verify(node != tail);339 /* paranoid */ verify(node);340 341 // Do the pop342 head->link.next = next;343 next->link.prev = head;344 node->link.[next, prev] = 0p;345 346 // Update head time stamp347 this.before.link.ts = next->link.ts;348 349 // Update stats350 #ifndef __CFA_NO_SCHED_STATS__351 this.stat.diff--;352 this.stat.pop ++;353 #endif354 355 // Check if we emptied list and return accordingly356 /* paranoid */ verify(tail(this)->link.next == 0p);357 /* paranoid */ verify(head(this)->link.prev == 0p);358 if(next == tail) {359 /* paranoid */ verify(this.before.link.ts == 0);360 /* paranoid */ verify(tail(this)->link.prev == head(this));361 /* paranoid */ verify(head(this)->link.next == tail(this));362 return [node, true];363 }364 else {365 /* paranoid */ verify(next->link.ts != 0);366 /* paranoid */ verify(tail(this)->link.prev != head(this));367 /* paranoid */ verify(head(this)->link.next != tail(this));368 /* paranoid */ verify(this.before.link.ts != 0);369 return [node, false];370 }371 }372 373 // Check whether or not list is empty374 static inline bool is_empty(__intrusive_lane_t & this) {375 // Cannot verify here since it may not be locked376 return this.before.link.ts == 0;377 }378 379 // Return the timestamp380 static inline unsigned long long ts(__intrusive_lane_t & this) {381 // Cannot verify here since it may not be locked382 return this.before.link.ts;383 }384 385 //=======================================================================386 // Scalable Non-Zero counter387 //=======================================================================388 389 union __snzi_val_t {390 uint64_t _all;391 struct __attribute__((packed)) {392 char cnt;393 uint64_t ver:56;394 };395 };396 397 bool cas(volatile __snzi_val_t & self, __snzi_val_t & exp, char _cnt, uint64_t _ver) {398 __snzi_val_t t;399 t.ver = _ver;400 t.cnt = _cnt;401 /* paranoid */ verify(t._all == ((_ver << 8) | ((unsigned char)_cnt)));402 return __atomic_compare_exchange_n(&self._all, &exp._all, t._all, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);403 }404 405 bool cas(volatile __snzi_val_t & self, __snzi_val_t & exp, const __snzi_val_t & tar) {406 return __atomic_compare_exchange_n(&self._all, &exp._all, tar._all, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);407 }408 409 void ?{}( __snzi_val_t & this ) { this._all = 0; }410 void ?{}( __snzi_val_t & this, const volatile __snzi_val_t & o) { this._all = o._all; }411 412 struct __attribute__((aligned(128))) __snzi_node_t {413 volatile __snzi_val_t value;414 struct __snzi_node_t * parent;415 bool is_root;416 };417 418 static inline void arrive( __snzi_node_t & );419 static inline void depart( __snzi_node_t & );420 421 #define __snzi_half -1422 423 //--------------------------------------------------424 // Root node425 static void arrive_r( __snzi_node_t & this ) {426 /* paranoid */ verify( this.is_root );427 __atomic_fetch_add(&this.value._all, 1, __ATOMIC_SEQ_CST);428 }429 430 static void depart_r( __snzi_node_t & this ) {431 /* paranoid */ verify( this.is_root );432 __atomic_fetch_sub(&this.value._all, 1, __ATOMIC_SEQ_CST);433 }434 435 //--------------------------------------------------436 // Hierarchical node437 static void arrive_h( __snzi_node_t & this ) {438 int undoArr = 0;439 bool success = false;440 while(!success) {441 __snzi_val_t x = { this.value };442 /* paranoid */ verify(x.cnt <= 120);443 if( x.cnt >= 1 ) {444 if( cas( this.value, x, x.cnt + 1, x.ver ) ) {445 success = true;446 }447 }448 /* paranoid */ verify(x.cnt <= 120);449 if( x.cnt == 0 ) {450 if( cas( this.value, x, __snzi_half, x.ver + 1) ) {451 success = true;452 x.cnt = __snzi_half;453 x.ver = x.ver + 1;454 }455 }456 /* paranoid */ verify(x.cnt <= 120);457 if( x.cnt == __snzi_half ) {458 /* paranoid */ verify( this.parent);459 arrive( *this.parent );460 if( !cas( this.value, x, 1, x.ver) ) {461 undoArr = undoArr + 1;462 }463 }464 }465 466 for(int i = 0; i < undoArr; i++) {467 /* paranoid */ verify( this.parent );468 depart( *this.parent );469 }470 }471 472 static void depart_h( __snzi_node_t & this ) {473 while(true) {474 const __snzi_val_t x = { this.value };475 /* paranoid */ verifyf(x.cnt >= 1, "%d", x.cnt);476 if( cas( this.value, x, x.cnt - 1, x.ver ) ) {477 if( x.cnt == 1 ) {478 /* paranoid */ verify( this.parent );479 depart( *this.parent );480 }481 return;482 }483 }484 }485 486 //--------------------------------------------------487 // All nodes488 static inline void arrive( __snzi_node_t & this ) {489 if(this.is_root) arrive_r( this );490 else arrive_h( this );491 }492 493 static inline void depart( __snzi_node_t & this ) {494 if(this.is_root) depart_r( this );495 else depart_h( this );496 }497 498 static inline bool query( __snzi_node_t & this ) {499 /* paranoid */ verify( this.is_root );500 return this.value._all > 0;501 }502 503 //--------------------------------------------------504 // SNZI object505 void ?{}( __snzi_t & this, unsigned depth ) with( this ) {506 mask = (1 << depth) - 1;507 root = (1 << (depth + 1)) - 2;508 nodes = alloc( root + 1 );509 510 int width = 1 << depth;511 for(int i = 0; i < root; i++) {512 nodes[i].value._all = 0;513 nodes[i].parent = &nodes[(i / 2) + width ];514 nodes[i].is_root = false;515 }516 517 nodes[ root ].value._all = 0;518 nodes[ root ].parent = 0p;519 nodes[ root ].is_root = true;520 }521 522 void ^?{}( __snzi_t & this ) {523 free( this.nodes );524 }525 526 static inline void arrive( __snzi_t & this, int idx) {527 idx &= this.mask;528 arrive( this.nodes[idx] );529 }530 531 static inline void depart( __snzi_t & this, int idx) {532 idx &= this.mask;533 depart( this.nodes[idx] );534 }535 536 static inline bool query( const __snzi_t & this ) {537 return query( this.nodes[ this.root ] );538 }539 540 //=======================================================================541 // Cforall Reqdy Queue used by ready queue542 //=======================================================================543 544 187 void ?{}(__ready_queue_t & this) with (this) { 545 188 … … 584 227 unsigned rlow = r % BIAS; 585 228 unsigned rhigh = r / BIAS; 586 if( 0 != (rlow % BIAS) && kernelTLS.this_processor) {229 if((0 != rlow) && kernelTLS.this_processor) { 587 230 // (BIAS - 1) out of BIAS chances 588 231 // Use perferred queues 589 i = (kernelTLS.this_processor->id * 4) + (rhigh % 4); 232 unsigned pid = kernelTLS.this_processor->id * 4; 233 i = pid + (rhigh % 4); 234 235 #if !defined(__CFA_NO_STATISTICS__) 236 __tls_stats()->ready.pick.push.local++; 237 #endif 590 238 } 591 239 else { … … 635 283 } 636 284 285 static struct $thread * try_pop(struct cluster * cltr, unsigned i, unsigned j); 286 static struct $thread * try_pop(struct cluster * cltr, unsigned i); 287 288 // Pop from the ready queue from a given cluster 289 __attribute__((hot)) $thread * pop(struct cluster * cltr) with (cltr->ready_queue) { 290 /* paranoid */ verify( lanes.count > 0 ); 291 #if defined(BIAS) 292 // Don't bother trying locally too much 293 int local_tries = 8; 294 #endif 295 296 // As long as the list is not empty, try finding a lane that isn't empty and pop from it 297 while( query(snzi) ) { 298 // Pick two lists at random 299 unsigned i,j; 300 #if defined(BIAS) 301 uint64_t r = __tls_rand(); 302 unsigned rlow = r % BIAS; 303 uint64_t rhigh = r / BIAS; 304 if(local_tries && 0 != rlow) { 305 // (BIAS - 1) out of BIAS chances 306 // Use perferred queues 307 unsigned pid = kernelTLS.this_processor->id * 4; 308 i = pid + (rhigh % 4); 309 j = pid + ((rhigh >> 32ull) % 4); 310 311 // count the tries 312 local_tries--; 313 314 #if !defined(__CFA_NO_STATISTICS__) 315 __tls_stats()->ready.pick.pop.local++; 316 #endif 317 } 318 else { 319 // 1 out of BIAS chances 320 // Use all queues 321 i = rhigh; 322 j = rhigh >> 32ull; 323 } 324 #else 325 i = __tls_rand(); 326 j = __tls_rand(); 327 #endif 328 329 i %= __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 330 j %= __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 331 332 // try popping from the 2 picked lists 333 struct $thread * thrd = try_pop(cltr, i, j); 334 if(thrd) return thrd; 335 } 336 337 // All lanes where empty return 0p 338 return 0p; 339 } 340 637 341 //----------------------------------------------------------------------- 638 342 // Given 2 indexes, pick the list with the oldest push an try to pop from it 639 static struct $thread * try_pop(struct cluster * cltr, unsigned i, unsigned j) with (cltr->ready_queue) {343 static inline struct $thread * try_pop(struct cluster * cltr, unsigned i, unsigned j) with (cltr->ready_queue) { 640 344 #if !defined(__CFA_NO_STATISTICS__) 641 345 __tls_stats()->ready.pick.pop.attempt++; … … 648 352 } 649 353 354 return try_pop(cltr, w); 355 } 356 357 static inline struct $thread * try_pop(struct cluster * cltr, unsigned w) with (cltr->ready_queue) { 650 358 // Get relevant elements locally 651 359 __intrusive_lane_t & lane = lanes.data[w]; … … 688 396 return thrd; 689 397 } 690 691 // Pop from the ready queue from a given cluster 692 __attribute__((hot)) $thread * pop(struct cluster * cltr) with (cltr->ready_queue) { 693 /* paranoid */ verify( lanes.count > 0 ); 694 695 // As long as the list is not empty, try finding a lane that isn't empty and pop from it 696 while( query(snzi) ) { 697 // Pick two lists at random 698 #if defined(BIAS) 699 unsigned i = __tls_rand(); 700 unsigned j = __tls_rand(); 701 702 if(0 == (i % BIAS)) { 703 i = i / BIAS; 704 } 705 else { 706 i = ((kernelTLS.this_processor->id * 4) + ((i / BIAS) % 4)); 707 j = ((kernelTLS.this_processor->id * 4) + ((j / BIAS) % 4)); 708 } 709 #else 710 unsigned i = __tls_rand(); 711 unsigned j = __tls_rand(); 712 #endif 713 714 i %= __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 715 j %= __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 716 717 // try popping from the 2 picked lists 718 struct $thread * thrd = try_pop(cltr, i, j); 719 if(thrd) return thrd; 720 } 721 722 // All lanes where empty return 0p 723 return 0p; 398 //----------------------------------------------------------------------- 399 400 bool remove_head(struct cluster * cltr, struct $thread * thrd) with (cltr->ready_queue) { 401 for(i; lanes.count) { 402 __intrusive_lane_t & lane = lanes.data[i]; 403 404 bool removed = false; 405 406 __atomic_acquire(&lane.lock); 407 if(head(lane)->link.next == thrd) { 408 $thread * pthrd; 409 bool emptied; 410 [pthrd, emptied] = pop(lane); 411 412 /* paranoid */ verify( pthrd == thrd ); 413 414 removed = true; 415 if(emptied) { 416 depart( snzi, i ); 417 } 418 } 419 __atomic_unlock(&lane.lock); 420 421 if( removed ) return true; 422 } 423 return false; 724 424 } 725 425 -
libcfa/src/concurrency/stats.cfa
rb232745 r13c5e19 12 12 stats->ready.pick.pop .probe = 0; 13 13 stats->ready.pick.pop .attempt = 0; 14 stats->ready.pick.pop .local = 0; 14 15 stats->ready.pick.pop .success = 0; 15 16 stats->ready.sleep.halts = 0; … … 37 38 void __tally_stats( struct __stats_t * cltr, struct __stats_t * proc ) { 38 39 __atomic_fetch_add( &cltr->ready.pick.push.attempt, proc->ready.pick.push.attempt, __ATOMIC_SEQ_CST ); 40 __atomic_fetch_add( &cltr->ready.pick.push.local , proc->ready.pick.push.local , __ATOMIC_SEQ_CST ); 39 41 __atomic_fetch_add( &cltr->ready.pick.push.success, proc->ready.pick.push.success, __ATOMIC_SEQ_CST ); 40 42 __atomic_fetch_add( &cltr->ready.pick.pop .probe , proc->ready.pick.pop .probe , __ATOMIC_SEQ_CST ); 41 43 __atomic_fetch_add( &cltr->ready.pick.pop .attempt, proc->ready.pick.pop .attempt, __ATOMIC_SEQ_CST ); 44 __atomic_fetch_add( &cltr->ready.pick.pop .local , proc->ready.pick.pop .local , __ATOMIC_SEQ_CST ); 42 45 __atomic_fetch_add( &cltr->ready.pick.pop .success, proc->ready.pick.pop .success, __ATOMIC_SEQ_CST ); 43 46 __atomic_fetch_add( &cltr->ready.sleep.halts , proc->ready.sleep.halts , __ATOMIC_SEQ_CST ); … … 63 66 } 64 67 65 void __print_stats( struct __stats_t * stats ) {68 void __print_stats( struct __stats_t * stats ) with( *stats ) { 66 69 67 double push_sur = (100.0 * ((double) stats->ready.pick.push.success) / stats->ready.pick.push.attempt);68 double pop_sur = (100.0 * ((double) stats->ready.pick.pop .success) / stats->ready.pick.pop .attempt);70 double push_sur = (100.0 * ((double)ready.pick.push.success) / ready.pick.push.attempt); 71 double pop_sur = (100.0 * ((double)ready.pick.pop .success) / ready.pick.pop .attempt); 69 72 70 double push_len = ((double) stats->ready.pick.push.attempt) / stats->ready.pick.push.success;71 double pop_len = ((double) stats->ready.pick.pop .attempt) / stats->ready.pick.pop .success;73 double push_len = ((double)ready.pick.push.attempt) / ready.pick.push.success; 74 double pop_len = ((double)ready.pick.pop .attempt) / ready.pick.pop .success; 72 75 73 76 #if defined(HAVE_LINUX_IO_URING_H) 74 double avgrdy = ((double) submit_avg.rdy) /submit_avg.cnt;75 double avgcsm = ((double) submit_avg.csm) /submit_avg.cnt;76 double avgavl = ((double) submit_avg.avl) /submit_avg.cnt;77 double avgrdy = ((double)io.submit_q.submit_avg.rdy) / io.submit_q.submit_avg.cnt; 78 double avgcsm = ((double)io.submit_q.submit_avg.csm) / io.submit_q.submit_avg.cnt; 79 double avgavl = ((double)io.submit_q.submit_avg.avl) / io.submit_q.submit_avg.cnt; 77 80 78 81 double lavgv = 0; 79 82 double lavgb = 0; 80 if( look_avg.cnt != 0) {81 lavgv = ((double) look_avg.val ) /look_avg.cnt;82 lavgb = ((double) look_avg.block) /look_avg.cnt;83 if(io.submit_q.look_avg.cnt != 0) { 84 lavgv = ((double)io.submit_q.look_avg.val ) / io.submit_q.look_avg.cnt; 85 lavgb = ((double)io.submit_q.look_avg.block) / io.submit_q.look_avg.cnt; 83 86 } 84 87 85 88 double aavgv = 0; 86 89 double aavgb = 0; 87 if( alloc_avg.cnt != 0) {88 aavgv = ((double) alloc_avg.val ) /alloc_avg.cnt;89 aavgb = ((double) alloc_avg.block) /alloc_avg.cnt;90 if(io.submit_q.alloc_avg.cnt != 0) { 91 aavgv = ((double)io.submit_q.alloc_avg.val ) / io.submit_q.alloc_avg.cnt; 92 aavgb = ((double)io.submit_q.alloc_avg.block) / io.submit_q.alloc_avg.cnt; 90 93 } 91 94 #endif … … 95 98 "- total threads run : %'15lu\n" 96 99 "- total threads scheduled: %'15lu\n" 97 "- push average probe len : %'18.2lf, %'18.2lf%% (%'15lu attempts )\n"98 "- pop average probe len : %'18.2lf, %'18.2lf%% (%'15lu attempts )\n"100 "- push average probe len : %'18.2lf, %'18.2lf%% (%'15lu attempts, %'15lu local)\n" 101 "- pop average probe len : %'18.2lf, %'18.2lf%% (%'15lu attempts, %'15lu local)\n" 99 102 "- Idle Sleep -\n" 100 103 "-- halts : %'15lu\n" … … 105 108 "\n" 106 109 "----- I/O Stats -----\n" 107 "- total submit calls : %'15l lu\n"110 "- total submit calls : %'15lu\n" 108 111 "- avg ready entries : %'18.2lf\n" 109 112 "- avg submitted entries : %'18.2lf\n" 110 113 "- avg available entries : %'18.2lf\n" 111 "- total ready search : %'15l lu\n"114 "- total ready search : %'15lu\n" 112 115 "- avg ready search len : %'18.2lf\n" 113 116 "- avg ready search block : %'18.2lf\n" 114 "- total alloc search : %'15l lu\n"117 "- total alloc search : %'15lu\n" 115 118 "- avg alloc search len : %'18.2lf\n" 116 119 "- avg alloc search block : %'18.2lf\n" 117 "- total wait calls : %'15l lu (%'llu slow, %'llu fast)\n"120 "- total wait calls : %'15lu (%'lu slow, %'lu fast)\n" 118 121 "- avg completion/wait : %'18.2lf\n" 119 122 #endif 120 , stats->ready.pick.pop.success121 , stats->ready.pick.push.success122 , push_len, push_sur, stats->ready.pick.push.attempt123 , pop_len , pop_sur , stats->ready.pick.pop .attempt124 , stats->ready.sleep.halts, stats->ready.sleep.cancels, stats->ready.sleep.wakes, stats->ready.sleep.exits123 , ready.pick.pop.success 124 , ready.pick.push.success 125 , push_len, push_sur, ready.pick.push.attempt, ready.pick.push.local 126 , pop_len , pop_sur , ready.pick.pop .attempt, ready.pick.pop .local 127 , ready.sleep.halts, ready.sleep.cancels, ready.sleep.wakes, ready.sleep.exits 125 128 #if defined(HAVE_LINUX_IO_URING_H) 126 , submit_avg.cnt129 , io.submit_q.submit_avg.cnt 127 130 , avgrdy 128 131 , avgcsm 129 132 , avgavl 130 , look_avg.cnt133 , io.submit_q.look_avg.cnt 131 134 , lavgv 132 135 , lavgb 133 , alloc_avg.cnt136 , io.submit_q.alloc_avg.cnt 134 137 , aavgv 135 138 , aavgb 136 , completed_avg.slow_cnt +completed_avg.fast_cnt137 , completed_avg.slow_cnt,completed_avg.fast_cnt138 , ((double) completed_avg.val) / (completed_avg.slow_cnt +completed_avg.fast_cnt)139 , io.complete_q.completed_avg.slow_cnt + io.complete_q.completed_avg.fast_cnt 140 , io.complete_q.completed_avg.slow_cnt, io.complete_q.completed_avg.fast_cnt 141 , ((double)io.complete_q.completed_avg.val) / (io.complete_q.completed_avg.slow_cnt + io.complete_q.completed_avg.fast_cnt) 139 142 #endif 140 143 ); -
libcfa/src/concurrency/stats.hfa
rb232745 r13c5e19 16 16 volatile uint64_t attempt; 17 17 18 // number of attemps at pushing something something to preferred queues 19 volatile uint64_t local; 20 18 21 // number of successes at pushing 19 22 volatile uint64_t success; … … 29 32 // number of attemps at poping something 30 33 volatile uint64_t attempt; 34 35 // number of attemps at poping something from preferred queues 36 volatile uint64_t local; 31 37 32 38 // number of successes at poping … … 64 70 struct { 65 71 struct { 66 unsigned long long int val;67 unsigned long long int slow_cnt;68 unsigned long long int fast_cnt;72 volatile uint64_t val; 73 volatile uint64_t slow_cnt; 74 volatile uint64_t fast_cnt; 69 75 } completed_avg; 70 76 } complete_q;
Note: See TracChangeset
for help on using the changeset viewer.