Changeset 13c5e19 for libcfa/src/concurrency/ready_queue.cfa
- Timestamp:
- Jun 23, 2020, 4:42:58 PM (4 years ago)
- Branches:
- ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast, new-ast-unique-expr, pthread-emulation, qualifiedEnum
- Children:
- de917da3
- Parents:
- b232745
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/concurrency/ready_queue.cfa
rb232745 r13c5e19 26 26 #include <unistd.h> 27 27 28 #include "snzi.hfa" 29 #include "ready_subqueue.hfa" 30 28 31 static const size_t cache_line_size = 64; 29 32 … … 34 37 #endif 35 38 36 #define BIAS 6439 #define BIAS 8 37 40 38 41 // returns the maximum number of processors the RWLock support … … 180 183 181 184 //======================================================================= 182 // Intrusive Queue used by ready queue185 // Cforall Reqdy Queue used for scheduling 183 186 //======================================================================= 184 // Intrusives lanes which are used by the relaxed ready queue185 struct __attribute__((aligned(128))) __intrusive_lane_t {186 // spin lock protecting the queue187 volatile bool lock;188 189 // anchor for the head and the tail of the queue190 struct __sentinel_t {191 // Link lists fields192 // instrusive link field for threads193 // must be exactly as in $thread194 __thread_desc_link link;195 } before, after;196 197 // Optional statistic counters198 #if !defined(__CFA_NO_SCHED_STATS__)199 struct __attribute__((aligned(64))) {200 // difference between number of push and pops201 ssize_t diff;202 203 // total number of pushes and pops204 size_t push;205 size_t pop ;206 } stat;207 #endif208 };209 210 void ?{}(__intrusive_lane_t & this);211 void ^?{}(__intrusive_lane_t & this);212 213 // Get the head pointer (one before the first element) from the anchor214 static inline $thread * head(const __intrusive_lane_t & this) {215 $thread * rhead = ($thread *)(216 (uintptr_t)( &this.before ) - offsetof( $thread, link )217 );218 /* paranoid */ verify(rhead);219 return rhead;220 }221 222 // Get the tail pointer (one after the last element) from the anchor223 static inline $thread * tail(const __intrusive_lane_t & this) {224 $thread * rtail = ($thread *)(225 (uintptr_t)( &this.after ) - offsetof( $thread, link )226 );227 /* paranoid */ verify(rtail);228 return rtail;229 }230 231 // Ctor232 void ?{}( __intrusive_lane_t & this ) {233 this.lock = false;234 235 this.before.link.prev = 0p;236 this.before.link.next = tail(this);237 this.before.link.ts = 0;238 239 this.after .link.prev = head(this);240 this.after .link.next = 0p;241 this.after .link.ts = 0;242 243 #if !defined(__CFA_NO_SCHED_STATS__)244 this.stat.diff = 0;245 this.stat.push = 0;246 this.stat.pop = 0;247 #endif248 249 // We add a boat-load of assertions here because the anchor code is very fragile250 /* paranoid */ verify(((uintptr_t)( head(this) ) + offsetof( $thread, link )) == (uintptr_t)(&this.before));251 /* paranoid */ verify(((uintptr_t)( tail(this) ) + offsetof( $thread, link )) == (uintptr_t)(&this.after ));252 /* paranoid */ verify(head(this)->link.prev == 0p );253 /* paranoid */ verify(head(this)->link.next == tail(this) );254 /* paranoid */ verify(tail(this)->link.next == 0p );255 /* paranoid */ verify(tail(this)->link.prev == head(this) );256 /* paranoid */ verify(&head(this)->link.prev == &this.before.link.prev );257 /* paranoid */ verify(&head(this)->link.next == &this.before.link.next );258 /* paranoid */ verify(&tail(this)->link.prev == &this.after .link.prev );259 /* paranoid */ verify(&tail(this)->link.next == &this.after .link.next );260 /* paranoid */ verify(sizeof(__intrusive_lane_t) == 128);261 /* paranoid */ verify(sizeof(this) == 128);262 /* paranoid */ verify(__alignof__(__intrusive_lane_t) == 128);263 /* paranoid */ verify(__alignof__(this) == 128);264 /* paranoid */ verifyf(((intptr_t)(&this) % 128) == 0, "Expected address to be aligned %p %% 128 == %zd", &this, ((intptr_t)(&this) % 128));265 }266 267 // Dtor is trivial268 void ^?{}( __intrusive_lane_t & this ) {269 // Make sure the list is empty270 /* paranoid */ verify(head(this)->link.prev == 0p );271 /* paranoid */ verify(head(this)->link.next == tail(this) );272 /* paranoid */ verify(tail(this)->link.next == 0p );273 /* paranoid */ verify(tail(this)->link.prev == head(this) );274 }275 276 // Push a thread onto this lane277 // returns true of lane was empty before push, false otherwise278 bool push(__intrusive_lane_t & this, $thread * node) {279 #if defined(__CFA_WITH_VERIFY__)280 /* paranoid */ verify(this.lock);281 /* paranoid */ verify(node->link.ts != 0);282 /* paranoid */ verify(node->link.next == 0p);283 /* paranoid */ verify(node->link.prev == 0p);284 /* paranoid */ verify(tail(this)->link.next == 0p);285 /* paranoid */ verify(head(this)->link.prev == 0p);286 287 if(this.before.link.ts == 0l) {288 /* paranoid */ verify(tail(this)->link.prev == head(this));289 /* paranoid */ verify(head(this)->link.next == tail(this));290 } else {291 /* paranoid */ verify(tail(this)->link.prev != head(this));292 /* paranoid */ verify(head(this)->link.next != tail(this));293 }294 #endif295 296 // Get the relevant nodes locally297 $thread * tail = tail(this);298 $thread * prev = tail->link.prev;299 300 // Do the push301 node->link.next = tail;302 node->link.prev = prev;303 prev->link.next = node;304 tail->link.prev = node;305 306 // Update stats307 #if !defined(__CFA_NO_SCHED_STATS__)308 this.stat.diff++;309 this.stat.push++;310 #endif311 312 verify(node->link.next == tail(this));313 314 // Check if the queue used to be empty315 if(this.before.link.ts == 0l) {316 this.before.link.ts = node->link.ts;317 /* paranoid */ verify(node->link.prev == head(this));318 return true;319 }320 return false;321 }322 323 // Pop a thread from this lane (must be non-empty)324 // returns popped325 // returns true of lane was empty before push, false otherwise326 [$thread *, bool] pop(__intrusive_lane_t & this) {327 /* paranoid */ verify(this.lock);328 /* paranoid */ verify(this.before.link.ts != 0ul);329 330 // Get anchors locally331 $thread * head = head(this);332 $thread * tail = tail(this);333 334 // Get the relevant nodes locally335 $thread * node = head->link.next;336 $thread * next = node->link.next;337 338 /* paranoid */ verify(node != tail);339 /* paranoid */ verify(node);340 341 // Do the pop342 head->link.next = next;343 next->link.prev = head;344 node->link.[next, prev] = 0p;345 346 // Update head time stamp347 this.before.link.ts = next->link.ts;348 349 // Update stats350 #ifndef __CFA_NO_SCHED_STATS__351 this.stat.diff--;352 this.stat.pop ++;353 #endif354 355 // Check if we emptied list and return accordingly356 /* paranoid */ verify(tail(this)->link.next == 0p);357 /* paranoid */ verify(head(this)->link.prev == 0p);358 if(next == tail) {359 /* paranoid */ verify(this.before.link.ts == 0);360 /* paranoid */ verify(tail(this)->link.prev == head(this));361 /* paranoid */ verify(head(this)->link.next == tail(this));362 return [node, true];363 }364 else {365 /* paranoid */ verify(next->link.ts != 0);366 /* paranoid */ verify(tail(this)->link.prev != head(this));367 /* paranoid */ verify(head(this)->link.next != tail(this));368 /* paranoid */ verify(this.before.link.ts != 0);369 return [node, false];370 }371 }372 373 // Check whether or not list is empty374 static inline bool is_empty(__intrusive_lane_t & this) {375 // Cannot verify here since it may not be locked376 return this.before.link.ts == 0;377 }378 379 // Return the timestamp380 static inline unsigned long long ts(__intrusive_lane_t & this) {381 // Cannot verify here since it may not be locked382 return this.before.link.ts;383 }384 385 //=======================================================================386 // Scalable Non-Zero counter387 //=======================================================================388 389 union __snzi_val_t {390 uint64_t _all;391 struct __attribute__((packed)) {392 char cnt;393 uint64_t ver:56;394 };395 };396 397 bool cas(volatile __snzi_val_t & self, __snzi_val_t & exp, char _cnt, uint64_t _ver) {398 __snzi_val_t t;399 t.ver = _ver;400 t.cnt = _cnt;401 /* paranoid */ verify(t._all == ((_ver << 8) | ((unsigned char)_cnt)));402 return __atomic_compare_exchange_n(&self._all, &exp._all, t._all, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);403 }404 405 bool cas(volatile __snzi_val_t & self, __snzi_val_t & exp, const __snzi_val_t & tar) {406 return __atomic_compare_exchange_n(&self._all, &exp._all, tar._all, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);407 }408 409 void ?{}( __snzi_val_t & this ) { this._all = 0; }410 void ?{}( __snzi_val_t & this, const volatile __snzi_val_t & o) { this._all = o._all; }411 412 struct __attribute__((aligned(128))) __snzi_node_t {413 volatile __snzi_val_t value;414 struct __snzi_node_t * parent;415 bool is_root;416 };417 418 static inline void arrive( __snzi_node_t & );419 static inline void depart( __snzi_node_t & );420 421 #define __snzi_half -1422 423 //--------------------------------------------------424 // Root node425 static void arrive_r( __snzi_node_t & this ) {426 /* paranoid */ verify( this.is_root );427 __atomic_fetch_add(&this.value._all, 1, __ATOMIC_SEQ_CST);428 }429 430 static void depart_r( __snzi_node_t & this ) {431 /* paranoid */ verify( this.is_root );432 __atomic_fetch_sub(&this.value._all, 1, __ATOMIC_SEQ_CST);433 }434 435 //--------------------------------------------------436 // Hierarchical node437 static void arrive_h( __snzi_node_t & this ) {438 int undoArr = 0;439 bool success = false;440 while(!success) {441 __snzi_val_t x = { this.value };442 /* paranoid */ verify(x.cnt <= 120);443 if( x.cnt >= 1 ) {444 if( cas( this.value, x, x.cnt + 1, x.ver ) ) {445 success = true;446 }447 }448 /* paranoid */ verify(x.cnt <= 120);449 if( x.cnt == 0 ) {450 if( cas( this.value, x, __snzi_half, x.ver + 1) ) {451 success = true;452 x.cnt = __snzi_half;453 x.ver = x.ver + 1;454 }455 }456 /* paranoid */ verify(x.cnt <= 120);457 if( x.cnt == __snzi_half ) {458 /* paranoid */ verify( this.parent);459 arrive( *this.parent );460 if( !cas( this.value, x, 1, x.ver) ) {461 undoArr = undoArr + 1;462 }463 }464 }465 466 for(int i = 0; i < undoArr; i++) {467 /* paranoid */ verify( this.parent );468 depart( *this.parent );469 }470 }471 472 static void depart_h( __snzi_node_t & this ) {473 while(true) {474 const __snzi_val_t x = { this.value };475 /* paranoid */ verifyf(x.cnt >= 1, "%d", x.cnt);476 if( cas( this.value, x, x.cnt - 1, x.ver ) ) {477 if( x.cnt == 1 ) {478 /* paranoid */ verify( this.parent );479 depart( *this.parent );480 }481 return;482 }483 }484 }485 486 //--------------------------------------------------487 // All nodes488 static inline void arrive( __snzi_node_t & this ) {489 if(this.is_root) arrive_r( this );490 else arrive_h( this );491 }492 493 static inline void depart( __snzi_node_t & this ) {494 if(this.is_root) depart_r( this );495 else depart_h( this );496 }497 498 static inline bool query( __snzi_node_t & this ) {499 /* paranoid */ verify( this.is_root );500 return this.value._all > 0;501 }502 503 //--------------------------------------------------504 // SNZI object505 void ?{}( __snzi_t & this, unsigned depth ) with( this ) {506 mask = (1 << depth) - 1;507 root = (1 << (depth + 1)) - 2;508 nodes = alloc( root + 1 );509 510 int width = 1 << depth;511 for(int i = 0; i < root; i++) {512 nodes[i].value._all = 0;513 nodes[i].parent = &nodes[(i / 2) + width ];514 nodes[i].is_root = false;515 }516 517 nodes[ root ].value._all = 0;518 nodes[ root ].parent = 0p;519 nodes[ root ].is_root = true;520 }521 522 void ^?{}( __snzi_t & this ) {523 free( this.nodes );524 }525 526 static inline void arrive( __snzi_t & this, int idx) {527 idx &= this.mask;528 arrive( this.nodes[idx] );529 }530 531 static inline void depart( __snzi_t & this, int idx) {532 idx &= this.mask;533 depart( this.nodes[idx] );534 }535 536 static inline bool query( const __snzi_t & this ) {537 return query( this.nodes[ this.root ] );538 }539 540 //=======================================================================541 // Cforall Reqdy Queue used by ready queue542 //=======================================================================543 544 187 void ?{}(__ready_queue_t & this) with (this) { 545 188 … … 584 227 unsigned rlow = r % BIAS; 585 228 unsigned rhigh = r / BIAS; 586 if( 0 != (rlow % BIAS) && kernelTLS.this_processor) {229 if((0 != rlow) && kernelTLS.this_processor) { 587 230 // (BIAS - 1) out of BIAS chances 588 231 // Use perferred queues 589 i = (kernelTLS.this_processor->id * 4) + (rhigh % 4); 232 unsigned pid = kernelTLS.this_processor->id * 4; 233 i = pid + (rhigh % 4); 234 235 #if !defined(__CFA_NO_STATISTICS__) 236 __tls_stats()->ready.pick.push.local++; 237 #endif 590 238 } 591 239 else { … … 635 283 } 636 284 285 static struct $thread * try_pop(struct cluster * cltr, unsigned i, unsigned j); 286 static struct $thread * try_pop(struct cluster * cltr, unsigned i); 287 288 // Pop from the ready queue from a given cluster 289 __attribute__((hot)) $thread * pop(struct cluster * cltr) with (cltr->ready_queue) { 290 /* paranoid */ verify( lanes.count > 0 ); 291 #if defined(BIAS) 292 // Don't bother trying locally too much 293 int local_tries = 8; 294 #endif 295 296 // As long as the list is not empty, try finding a lane that isn't empty and pop from it 297 while( query(snzi) ) { 298 // Pick two lists at random 299 unsigned i,j; 300 #if defined(BIAS) 301 uint64_t r = __tls_rand(); 302 unsigned rlow = r % BIAS; 303 uint64_t rhigh = r / BIAS; 304 if(local_tries && 0 != rlow) { 305 // (BIAS - 1) out of BIAS chances 306 // Use perferred queues 307 unsigned pid = kernelTLS.this_processor->id * 4; 308 i = pid + (rhigh % 4); 309 j = pid + ((rhigh >> 32ull) % 4); 310 311 // count the tries 312 local_tries--; 313 314 #if !defined(__CFA_NO_STATISTICS__) 315 __tls_stats()->ready.pick.pop.local++; 316 #endif 317 } 318 else { 319 // 1 out of BIAS chances 320 // Use all queues 321 i = rhigh; 322 j = rhigh >> 32ull; 323 } 324 #else 325 i = __tls_rand(); 326 j = __tls_rand(); 327 #endif 328 329 i %= __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 330 j %= __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 331 332 // try popping from the 2 picked lists 333 struct $thread * thrd = try_pop(cltr, i, j); 334 if(thrd) return thrd; 335 } 336 337 // All lanes where empty return 0p 338 return 0p; 339 } 340 637 341 //----------------------------------------------------------------------- 638 342 // Given 2 indexes, pick the list with the oldest push an try to pop from it 639 static struct $thread * try_pop(struct cluster * cltr, unsigned i, unsigned j) with (cltr->ready_queue) {343 static inline struct $thread * try_pop(struct cluster * cltr, unsigned i, unsigned j) with (cltr->ready_queue) { 640 344 #if !defined(__CFA_NO_STATISTICS__) 641 345 __tls_stats()->ready.pick.pop.attempt++; … … 648 352 } 649 353 354 return try_pop(cltr, w); 355 } 356 357 static inline struct $thread * try_pop(struct cluster * cltr, unsigned w) with (cltr->ready_queue) { 650 358 // Get relevant elements locally 651 359 __intrusive_lane_t & lane = lanes.data[w]; … … 688 396 return thrd; 689 397 } 690 691 // Pop from the ready queue from a given cluster 692 __attribute__((hot)) $thread * pop(struct cluster * cltr) with (cltr->ready_queue) { 693 /* paranoid */ verify( lanes.count > 0 ); 694 695 // As long as the list is not empty, try finding a lane that isn't empty and pop from it 696 while( query(snzi) ) { 697 // Pick two lists at random 698 #if defined(BIAS) 699 unsigned i = __tls_rand(); 700 unsigned j = __tls_rand(); 701 702 if(0 == (i % BIAS)) { 703 i = i / BIAS; 704 } 705 else { 706 i = ((kernelTLS.this_processor->id * 4) + ((i / BIAS) % 4)); 707 j = ((kernelTLS.this_processor->id * 4) + ((j / BIAS) % 4)); 708 } 709 #else 710 unsigned i = __tls_rand(); 711 unsigned j = __tls_rand(); 712 #endif 713 714 i %= __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 715 j %= __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 716 717 // try popping from the 2 picked lists 718 struct $thread * thrd = try_pop(cltr, i, j); 719 if(thrd) return thrd; 720 } 721 722 // All lanes where empty return 0p 723 return 0p; 398 //----------------------------------------------------------------------- 399 400 bool remove_head(struct cluster * cltr, struct $thread * thrd) with (cltr->ready_queue) { 401 for(i; lanes.count) { 402 __intrusive_lane_t & lane = lanes.data[i]; 403 404 bool removed = false; 405 406 __atomic_acquire(&lane.lock); 407 if(head(lane)->link.next == thrd) { 408 $thread * pthrd; 409 bool emptied; 410 [pthrd, emptied] = pop(lane); 411 412 /* paranoid */ verify( pthrd == thrd ); 413 414 removed = true; 415 if(emptied) { 416 depart( snzi, i ); 417 } 418 } 419 __atomic_unlock(&lane.lock); 420 421 if( removed ) return true; 422 } 423 return false; 724 424 } 725 425
Note: See TracChangeset
for help on using the changeset viewer.