- Timestamp:
- Jun 11, 2020, 3:15:13 PM (5 years ago)
- Branches:
- ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast, new-ast-unique-expr, pthread-emulation, qualifiedEnum
- Children:
- b388ee81
- Parents:
- ab8a023
- Location:
- libcfa/src/concurrency
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/concurrency/kernel.hfa
rab8a023 r61d7bec 156 156 157 157 // Intrusives lanes which are used by the relaxed ready queue 158 struct __attribute__((aligned(128))) __intrusive_lane_t { 159 // spin lock protecting the queue 160 volatile bool lock; 161 162 // anchor for the head and the tail of the queue 163 struct __sentinel_t { 164 // Link lists fields 165 // instrusive link field for threads 166 // must be exactly as in $thread 167 __thread_desc_link link; 168 } before, after; 169 170 #if defined(__CFA_WITH_VERIFY__) 171 // id of last processor to acquire the lock 172 // needed only to check for mutual exclusion violations 173 unsigned int last_id; 174 175 // number of items on this list 176 // needed only to check for deadlocks 177 unsigned int count; 178 #endif 179 180 // Optional statistic counters 181 #if !defined(__CFA_NO_SCHED_STATS__) 182 struct __attribute__((aligned(64))) { 183 // difference between number of push and pops 184 ssize_t diff; 185 186 // total number of pushes and pops 187 size_t push; 188 size_t pop ; 189 } stat; 190 #endif 191 }; 192 158 struct __attribute__((aligned(128))) __intrusive_lane_t; 193 159 void ?{}(__intrusive_lane_t & this); 194 160 void ^?{}(__intrusive_lane_t & this); 195 161 196 typedef unsigned long long __cfa_readyQ_mask_t; 197 198 // enum { 199 // __cfa_ready_queue_mask_size = (64 - sizeof(size_t)) / sizeof(size_t), 200 // __cfa_max_ready_queues = __cfa_ready_queue_mask_size * 8 * sizeof(size_t) 201 // }; 202 203 #define __cfa_lane_mask_size ((64 - sizeof(size_t)) / sizeof(__cfa_readyQ_mask_t)) 204 #define __cfa_max_lanes (__cfa_lane_mask_size * 8 * sizeof(__cfa_readyQ_mask_t)) 162 // Counter used for wether or not the lanes are all empty 163 struct __attribute__((aligned(128))) __snzi_node_t; 164 struct __snzi_t { 165 unsigned mask; 166 int root; 167 __snzi_node_t * nodes; 168 }; 169 170 void ?{}( __snzi_t & this, unsigned depth ); 171 void ^?{}( __snzi_t & this ); 205 172 206 173 //TODO adjust cache size to ARCHITECTURE … … 209 176 // Data tracking how many/which lanes are used 210 177 // Aligned to 128 for cache locality 211 struct { 212 // number of non-empty lanes 213 volatile size_t count; 214 215 // bit mask, set bits indentify which lanes are non-empty 216 volatile __cfa_readyQ_mask_t mask[ __cfa_lane_mask_size ]; 217 } used; 178 __snzi_t snzi; 218 179 219 180 // Data tracking the actual lanes … … 231 192 // Statistics 232 193 #if !defined(__CFA_NO_STATISTICS__) 233 __attribute__((aligned(64))) struct{194 struct __attribute__((aligned(64))) { 234 195 struct { 235 196 // Push statistic -
libcfa/src/concurrency/ready_queue.cfa
rab8a023 r61d7bec 22 22 #define _GNU_SOURCE 23 23 #include "stdlib.hfa" 24 #include "math.hfa" 24 25 25 26 static const size_t cache_line_size = 64; … … 51 52 52 53 return max_cores_l; 53 }54 55 // Picks a random 1 bit in 'mask' according to random number 'rnum'.56 static inline unsigned rand_bit(unsigned rnum, __cfa_readyQ_mask_t mask) {57 #if defined( __i386 )58 static_assert(sizeof(mask) == 4);59 unsigned bit = mask ? rnum % __builtin_popcount(mask) : 0;60 #if !defined(__BMI2__)61 #error rand_bit not implemented for non __BMI2__ i38662 #else63 uint32_t picked = _pdep_u32(1ul << bit, mask);64 return picked ? __builtin_ctz(picked) : 0;65 #endif66 #elif defined( __x86_64 )67 static_assert(sizeof(mask) == 8);68 unsigned bit = mask ? rnum % __builtin_popcountl(mask) : 0;69 #if !defined(__BMI2__)70 uint64_t v = mask; // Input value to find position with rank r.71 unsigned int r = bit + 1;// Input: bit's desired rank [1-64].72 unsigned int s; // Output: Resulting position of bit with rank r [1-64]73 uint64_t a, b, c, d; // Intermediate temporaries for bit count.74 unsigned int t; // Bit count temporary.75 76 // Do a normal parallel bit count for a 64-bit integer,77 // but store all intermediate steps.78 a = v - ((v >> 1) & ~0UL/3);79 b = (a & ~0UL/5) + ((a >> 2) & ~0UL/5);80 c = (b + (b >> 4)) & ~0UL/0x11;81 d = (c + (c >> 8)) & ~0UL/0x101;82 83 84 t = (d >> 32) + (d >> 48);85 // Now do branchless select!86 s = 64;87 s -= ((t - r) & 256) >> 3; r -= (t & ((t - r) >> 8));88 t = (d >> (s - 16)) & 0xff;89 s -= ((t - r) & 256) >> 4; r -= (t & ((t - r) >> 8));90 t = (c >> (s - 8)) & 0xf;91 s -= ((t - r) & 256) >> 5; r -= (t & ((t - r) >> 8));92 t = (b >> (s - 4)) & 0x7;93 s -= ((t - r) & 256) >> 6; r -= (t & ((t - r) >> 8));94 t = (a >> (s - 2)) & 0x3;95 s -= ((t - r) & 256) >> 7; r -= (t & ((t - r) >> 8));96 t = (v >> (s - 1)) & 0x1;97 s -= ((t - r) & 256) >> 8;98 return s - 1;99 #else100 uint64_t picked = _pdep_u64(1ul << bit, mask);101 return picked ? __builtin_ctzl(picked) : 0;102 #endif103 #elif defined( __ARM_ARCH )104 #error rand_bit not implemented for arm105 #else106 #error uknown hardware architecture107 #endif108 }109 110 111 //-----------------------------------------------------------------------------112 // Helpers used by extract113 // (_mask_bitsidx() & X) returns a bit index valid for a __cfa_readyQ_mask_t, where X is any integer114 static inline __cfa_readyQ_mask_t _mask_bitsidx () __attribute__ ((const)) { return (8 * sizeof(__cfa_readyQ_mask_t)) - 1; }115 116 // (X >> _mask_shiftidx()) retuns an index into an array of __cfa_readyQ_mask_t117 static inline __cfa_readyQ_mask_t _mask_shiftidx() __attribute__ ((const)) { return (8 * sizeof(__cfa_readyQ_mask_t)) - __builtin_clzl(_mask_bitsidx()); }118 119 120 // Assuming a large bit mask represented as an array of __cfa_readyQ_mask_t121 // Given an index into the large mask, returns the bit index and which __cfa_readyQ_mask_t index in the array122 static inline [__cfa_readyQ_mask_t, __cfa_readyQ_mask_t] extract(__cfa_readyQ_mask_t idx) {123 __cfa_readyQ_mask_t word = idx >> _mask_shiftidx();124 __cfa_readyQ_mask_t bit = idx & _mask_bitsidx();125 return [bit, word];126 54 } 127 55 … … 247 175 // Intrusive Queue used by ready queue 248 176 //======================================================================= 177 // Intrusives lanes which are used by the relaxed ready queue 178 struct __attribute__((aligned(128))) __intrusive_lane_t { 179 // spin lock protecting the queue 180 volatile bool lock; 181 182 // anchor for the head and the tail of the queue 183 struct __sentinel_t { 184 // Link lists fields 185 // instrusive link field for threads 186 // must be exactly as in $thread 187 __thread_desc_link link; 188 } before, after; 189 190 #if defined(__CFA_WITH_VERIFY__) 191 // id of last processor to acquire the lock 192 // needed only to check for mutual exclusion violations 193 unsigned int last_id; 194 195 // number of items on this list 196 // needed only to check for deadlocks 197 unsigned int count; 198 #endif 199 200 // Optional statistic counters 201 #if !defined(__CFA_NO_SCHED_STATS__) 202 struct __attribute__((aligned(64))) { 203 // difference between number of push and pops 204 ssize_t diff; 205 206 // total number of pushes and pops 207 size_t push; 208 size_t pop ; 209 } stat; 210 #endif 211 }; 212 213 void ?{}(__intrusive_lane_t & this); 214 void ^?{}(__intrusive_lane_t & this); 215 249 216 // Get the head pointer (one before the first element) from the anchor 250 217 static inline $thread * head(const __intrusive_lane_t & this) { … … 303 270 /* paranoid */ verify(__alignof__(this) == 128); 304 271 /* paranoid */ verifyf(((intptr_t)(&this) % 128) == 0, "Expected address to be aligned %p %% 128 == %zd", &this, ((intptr_t)(&this) % 128)); 305 306 /* paranoid */ verifyf(_mask_shiftidx() == 6 , "%llu", _mask_shiftidx());307 /* paranoid */ verifyf(_mask_bitsidx () == 63, "%llu", _mask_bitsidx());308 272 } 309 273 … … 430 394 // Cannot verify here since it may not be locked 431 395 return this.before.link.ts; 396 } 397 398 //======================================================================= 399 // Scalable Non-Zero counter 400 //======================================================================= 401 402 union __snzi_val_t { 403 uint64_t _all; 404 struct __attribute__((packed)) { 405 char cnt; 406 uint64_t ver:56; 407 }; 408 }; 409 410 bool cas(volatile __snzi_val_t & self, __snzi_val_t & exp, char _cnt, uint64_t _ver) { 411 __snzi_val_t t; 412 t.ver = _ver; 413 t.cnt = _cnt; 414 /* paranoid */ verify(t._all == ((_ver << 8) | ((unsigned char)_cnt))); 415 return __atomic_compare_exchange_n(&self._all, &exp._all, t._all, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); 416 } 417 418 bool cas(volatile __snzi_val_t & self, __snzi_val_t & exp, const __snzi_val_t & tar) { 419 return __atomic_compare_exchange_n(&self._all, &exp._all, tar._all, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); 420 } 421 422 void ?{}( __snzi_val_t & this ) { this._all = 0; } 423 void ?{}( __snzi_val_t & this, const volatile __snzi_val_t & o) { this._all = o._all; } 424 425 struct __attribute__((aligned(128))) __snzi_node_t { 426 volatile __snzi_val_t value; 427 struct __snzi_node_t * parent; 428 bool is_root; 429 }; 430 431 static inline void arrive( __snzi_node_t & ); 432 static inline void depart( __snzi_node_t & ); 433 434 #define __snzi_half -1 435 436 //-------------------------------------------------- 437 // Root node 438 static void arrive_r( __snzi_node_t & this ) { 439 /* paranoid */ verify( this.is_root ); 440 __atomic_fetch_add(&this.value._all, 1, __ATOMIC_SEQ_CST); 441 } 442 443 static void depart_r( __snzi_node_t & this ) { 444 /* paranoid */ verify( this.is_root ); 445 __atomic_fetch_sub(&this.value._all, 1, __ATOMIC_SEQ_CST); 446 } 447 448 //-------------------------------------------------- 449 // Hierarchical node 450 static void arrive_h( __snzi_node_t & this ) { 451 int undoArr = 0; 452 bool success = false; 453 while(!success) { 454 __snzi_val_t x = { this.value }; 455 /* paranoid */ verify(x.cnt <= 120); 456 if( x.cnt >= 1 ) { 457 if( cas( this.value, x, x.cnt + 1, x.ver ) ) { 458 success = true; 459 } 460 } 461 /* paranoid */ verify(x.cnt <= 120); 462 if( x.cnt == 0 ) { 463 if( cas( this.value, x, __snzi_half, x.ver + 1) ) { 464 success = true; 465 x.cnt = __snzi_half; 466 x.ver = x.ver + 1; 467 } 468 } 469 /* paranoid */ verify(x.cnt <= 120); 470 if( x.cnt == __snzi_half ) { 471 /* paranoid */ verify( this.parent); 472 arrive( *this.parent ); 473 if( !cas( this.value, x, 1, x.ver) ) { 474 undoArr = undoArr + 1; 475 } 476 } 477 } 478 479 for(int i = 0; i < undoArr; i++) { 480 /* paranoid */ verify( this.parent ); 481 depart( *this.parent ); 482 } 483 } 484 485 static void depart_h( __snzi_node_t & this ) { 486 while(true) { 487 const __snzi_val_t x = { this.value }; 488 /* paranoid */ verifyf(x.cnt >= 1, "%d", x.cnt); 489 if( cas( this.value, x, x.cnt - 1, x.ver ) ) { 490 if( x.cnt == 1 ) { 491 /* paranoid */ verify( this.parent ); 492 depart( *this.parent ); 493 } 494 return; 495 } 496 } 497 } 498 499 //-------------------------------------------------- 500 // All nodes 501 static inline void arrive( __snzi_node_t & this ) { 502 if(this.is_root) arrive_r( this ); 503 else arrive_h( this ); 504 } 505 506 static inline void depart( __snzi_node_t & this ) { 507 if(this.is_root) depart_r( this ); 508 else depart_h( this ); 509 } 510 511 static inline bool query( __snzi_node_t & this ) { 512 /* paranoid */ verify( this.is_root ); 513 return this.value._all > 0; 514 } 515 516 //-------------------------------------------------- 517 // SNZI object 518 void ?{}( __snzi_t & this, unsigned depth ) with( this ) { 519 mask = (1 << depth) - 1; 520 root = (1 << (depth + 1)) - 2; 521 nodes = alloc( root + 1 ); 522 523 int width = 1 << depth; 524 for(int i = 0; i < root; i++) { 525 nodes[i].value._all = 0; 526 nodes[i].parent = &nodes[(i / 2) + width ]; 527 nodes[i].is_root = false; 528 } 529 530 nodes[ root ].value._all = 0; 531 nodes[ root ].parent = 0p; 532 nodes[ root ].is_root = true; 533 } 534 535 void ^?{}( __snzi_t & this ) { 536 free( this.nodes ); 537 } 538 539 static inline void arrive( __snzi_t & this, int idx) { 540 idx &= this.mask; 541 arrive( this.nodes[idx] ); 542 } 543 544 static inline void depart( __snzi_t & this, int idx) { 545 idx &= this.mask; 546 depart( this.nodes[idx] ); 547 } 548 549 static inline bool query( const __snzi_t & this ) { 550 return query( this.nodes[ this.root ] ); 432 551 } 433 552 … … 466 585 467 586 void ?{}(__ready_queue_t & this) with (this) { 468 used.count = 0;469 for( i ; __cfa_lane_mask_size ) {470 used.mask[i] = 0;471 }472 587 473 588 lanes.data = alloc(4); … … 476 591 } 477 592 lanes.count = 4; 593 snzi{ log2( lanes.count / 8 ) }; 478 594 479 595 #if !defined(__CFA_NO_STATISTICS__) … … 491 607 void ^?{}(__ready_queue_t & this) with (this) { 492 608 verify( 4 == lanes.count ); 493 verify( 0 == used .count ); 609 verify( !query( snzi ) ); 610 611 ^(snzi){}; 494 612 495 613 for( i; 4 ) { … … 497 615 } 498 616 free(lanes.data); 499 500 501 #if defined(__CFA_WITH_VERIFY__)502 for( i ; __cfa_lane_mask_size ) {503 assert( 0 == used.mask[i] );504 }505 #endif506 }507 508 //-----------------------------------------------------------------------509 enum mask_strictness {510 STRICT,511 NOCHECK512 };513 514 // Set a given bit in the bit mask array515 // strictness determines of the bit had to be cleared before516 static inline void mask_set(__cfa_readyQ_mask_t * mask, unsigned index, mask_strictness strict) {517 // Extract the array and bit indexes518 __cfa_readyQ_mask_t word;519 __cfa_readyQ_mask_t bit;520 [bit, word] = extract(index);521 522 __cfadbg_print_safe(ready_queue, "Kernel : Ready queue extracted index %u as [bit %llu, word %llu]\n", index, bit, word);523 524 // Conditional check525 verifyf(526 strict != STRICT || // Conditional check if it was expected to be cleared527 ((mask[word] & (1ull << bit)) == 0),528 "Before set %llu:%llu (%u), %llx & %llx", word, bit, index, mask[word], (1ull << bit)529 );530 531 // Atomically set the bit532 __attribute__((unused)) bool ret = __atomic_bts(&mask[word], bit);533 534 // Conditional check535 verifyf(536 strict != STRICT || // Conditional check if it was expected to be cleared537 !ret,538 "Bit was not set but bts returned true"539 );540 541 // Unconditional check542 verifyf(543 (mask[word] & (1ull << bit)) != 0,544 "After set %llu:%llu (%u), %llx & %llx", word, bit, index, mask[word], (1ull << bit)545 );546 }547 548 static inline void mask_clear(__cfa_readyQ_mask_t * mask, unsigned index, mask_strictness strict) {549 // Extract the array and bit indexes550 __cfa_readyQ_mask_t word;551 __cfa_readyQ_mask_t bit;552 [bit, word] = extract(index);553 554 // Conditional check555 verifyf(556 strict != STRICT || // Conditional check if it was expected to be set557 ((mask[word] & (1ull << bit)) != 0),558 "Before clear %llu:%llu (%u), %llx & %llx", word, bit, index, mask[word], (1ull << bit)559 );560 561 // Atomically clear the bit562 __attribute__((unused)) bool ret = __atomic_btr(&mask[word], bit);563 564 // Conditional check565 verifyf(566 strict != STRICT || // Conditional check if it was expected to be cleared567 ret,568 "Bit was set but btr returned false"569 );570 571 // Unconditional check572 verifyf(573 (mask[word] & (1ull << bit)) == 0,574 "After clear %llu:%llu (%u), %llx & %llx", word, bit, index, mask[word], (1ull << bit)575 );576 617 } 577 618 578 619 //----------------------------------------------------------------------- 579 620 __attribute__((hot)) bool push(struct cluster * cltr, struct $thread * thrd) with (cltr->ready_queue) { 580 __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p (mask %llu)\n", thrd, cltr, used.mask[0]);621 __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr); 581 622 582 623 // write timestamp … … 601 642 #endif 602 643 603 __attribute__((unused)) size_t num = __atomic_load_n( &used.count, __ATOMIC_RELAXED );604 644 bool first = false; 605 645 … … 609 649 // If this lane used to be empty we need to do more 610 650 if(lane_first) { 611 // Update the bit mask612 mask_set((__cfa_readyQ_mask_t *)used.mask, i, STRICT);613 614 // Update the global count615 size_t ret = __atomic_fetch_add( &used.count, 1z, __ATOMIC_SEQ_CST);616 617 651 // Check if the entire queue used to be empty 618 first = (ret == 0); 652 first = !query(snzi); 653 654 // Update the snzi 655 arrive( snzi, i ); 619 656 } 620 657 621 658 #if defined(__CFA_WITH_VERIFY__) 622 /* paranoid */ verifyf( used.count <= lanes.count, "Non-empty count (%zu) exceeds actual count (%zu)\n", used.count, lanes.count );623 659 /* paranoid */ verifyf( lanes.data[i].last_id == kernelTLS.this_processor->id, "Expected last processor to lock queue %u to be %u, was %u\n", i, lanes.data[i].last_id, kernelTLS.this_processor->id ); 624 660 /* paranoid */ verifyf( lanes.data[i].lock, "List %u is not locked\n", i ); … … 634 670 #if !defined(__CFA_NO_STATISTICS__) 635 671 tls.pick.push.success++; 636 tls.used.value += num;637 tls.used.count += 1;638 672 #endif 639 673 … … 692 726 // If this was the last element in the lane 693 727 if(emptied) { 694 // Update the global count 695 __atomic_fetch_sub( &used.count, 1z, __ATOMIC_SEQ_CST); 696 697 // Update the bit mask 698 mask_clear((__cfa_readyQ_mask_t *)used.mask, w, STRICT); 728 depart( snzi, w ); 699 729 } 700 730 … … 704 734 #endif 705 735 706 // For statistics, check the count before we release the lock707 #if !defined(__CFA_NO_STATISTICS__)708 int num = __atomic_load_n( &used.count, __ATOMIC_RELAXED );709 #endif710 711 736 // Unlock and return 712 737 __atomic_unlock(&lane.lock); … … 715 740 #if !defined(__CFA_NO_STATISTICS__) 716 741 tls.pick.pop.success++; 717 tls.used.value += num;718 tls.used.count += 1;719 742 #endif 720 743 … … 728 751 729 752 // As long as the list is not empty, try finding a lane that isn't empty and pop from it 730 while( __atomic_load_n( &used.count, __ATOMIC_RELAXED ) != 0) { 731 #if !defined(__CFA_READQ_NO_BITMASK__) 732 // If using bit masks 733 #if !defined(__CFA_NO_SCHED_STATS__) 734 tls.pick.pop.maskrds++; 735 #endif 736 737 // Pick two lists at random 738 unsigned ri = __tls_rand(); 739 unsigned rj = __tls_rand(); 740 741 // Find which __cfa_readyQ_mask_t the two lists belong 742 unsigned num = ((__atomic_load_n( &lanes.count, __ATOMIC_RELAXED ) - 1) >> 6) + 1; 743 unsigned wdxi = (ri >> 6u) % num; 744 unsigned wdxj = (rj >> 6u) % num; 745 746 // Get the actual __cfa_readyQ_mask_t 747 size_t maski = __atomic_load_n( &used.mask[wdxi], __ATOMIC_RELAXED ); 748 size_t maskj = __atomic_load_n( &used.mask[wdxj], __ATOMIC_RELAXED ); 749 750 // If both of these masks are empty, retry 751 if(maski == 0 && maskj == 0) continue; 752 753 // Pick one of the non-zero bits in the masks and get the bit indexes 754 unsigned bi = rand_bit(ri, maski); 755 unsigned bj = rand_bit(rj, maskj); 756 757 // some checks 758 /* paranoid */ verifyf(bi < 64, "%zu %u", maski, bi); 759 /* paranoid */ verifyf(bj < 64, "%zu %u", maskj, bj); 760 761 // get the general list index 762 unsigned i = bi | (wdxi << 6); 763 unsigned j = bj | (wdxj << 6); 764 765 // some more checks 766 /* paranoid */ verifyf(i < lanes.count, "%u", wdxi << 6); 767 /* paranoid */ verifyf(j < lanes.count, "%u", wdxj << 6); 768 769 // try popping from the 2 picked lists 770 struct $thread * thrd = try_pop(cltr, i, j); 771 if(thrd) return thrd; 772 #else 773 // Pick two lists at random 774 int i = __tls_rand() % __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 775 int j = __tls_rand() % __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 776 777 // try popping from the 2 picked lists 778 struct $thread * thrd = try_pop(cltr, i, j); 779 if(thrd) return thrd; 780 #endif 753 while( query(snzi) ) { 754 // Pick two lists at random 755 int i = __tls_rand() % __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 756 int j = __tls_rand() % __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 757 758 // try popping from the 2 picked lists 759 struct $thread * thrd = try_pop(cltr, i, j); 760 if(thrd) return thrd; 781 761 } 782 762 … … 789 769 static void check( __ready_queue_t & q ) with (q) { 790 770 #if defined(__CFA_WITH_VERIFY__) 791 {792 int idx = 0;793 for( w ; __cfa_lane_mask_size ) {794 for( b ; 8 * sizeof(__cfa_readyQ_mask_t) ) {795 bool is_empty = idx < lanes.count ? (ts(lanes.data[idx]) == 0) : true;796 bool should_be_empty = 0 == (used.mask[w] & (1z << b));797 assertf(should_be_empty == is_empty, "Inconsistent list %d, mask expect : %d, actual is got %d", idx, should_be_empty, (bool)is_empty);798 assert(__cfa_max_lanes > idx);799 idx++;800 }801 }802 }803 804 771 { 805 772 for( idx ; lanes.count ) { … … 853 820 // grow the ready queue 854 821 with( cltr->ready_queue ) { 822 ^(snzi){}; 823 855 824 size_t ncount = lanes.count; 856 857 // Check that we have some space left858 if(ncount + 4 >= __cfa_max_lanes) abort("Program attempted to create more than maximum number of Ready Queues (%zu)", __cfa_max_lanes);859 825 860 826 // increase count … … 877 843 lanes.count = ncount; 878 844 879 // fields in 'used' don't need to change when growing 845 // Re-create the snzi 846 snzi{ log2( lanes.count / 8 ) }; 847 for( idx; (size_t)lanes.count ) { 848 if( !is_empty(lanes.data[idx]) ) { 849 arrive(snzi, idx); 850 } 851 } 880 852 } 881 853 … … 900 872 901 873 with( cltr->ready_queue ) { 874 ^(snzi){}; 875 902 876 // Make sure that the total thread count stays the same 903 877 #if defined(__CFA_WITH_VERIFY__) … … 941 915 } 942 916 943 mask_clear((__cfa_readyQ_mask_t *)used.mask, idx, NOCHECK);944 945 917 // Unlock the lane 946 918 __atomic_unlock(&lanes.data[idx].lock); … … 952 924 953 925 __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue displaced %zu threads\n", displaced); 954 955 // recompute the used.count instead of maintaining it956 used.count = 0;957 for( i ; __cfa_lane_mask_size ) {958 used.count += __builtin_popcountl(used.mask[i]);959 }960 926 961 927 // Allocate new array (uses realloc and memcpies the data) … … 965 931 for( idx; (size_t)lanes.count ) { 966 932 fix(lanes.data[idx]); 933 } 934 935 // Re-create the snzi 936 snzi{ log2( lanes.count / 8 ) }; 937 for( idx; (size_t)lanes.count ) { 938 if( !is_empty(lanes.data[idx]) ) { 939 arrive(snzi, idx); 940 } 967 941 } 968 942
Note: See TracChangeset
for help on using the changeset viewer.