Changeset 9cc3a18 for libcfa/src
- Timestamp:
- Apr 15, 2021, 5:02:04 PM (3 years ago)
- Branches:
- ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast-unique-expr, pthread-emulation, qualifiedEnum
- Children:
- 6abcb4d
- Parents:
- 57b3675
- Location:
- libcfa/src/concurrency
- Files:
-
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/concurrency/invoke.h
r57b3675 r9cc3a18 148 148 struct $thread * prev; 149 149 volatile unsigned long long ts; 150 int preferred;151 150 }; 152 151 -
libcfa/src/concurrency/kernel.hfa
r57b3675 r9cc3a18 140 140 // Cluster Tools 141 141 142 // Intrusives lanes which are used by the re laxed ready queue142 // Intrusives lanes which are used by the ready queue 143 143 struct __attribute__((aligned(128))) __intrusive_lane_t; 144 144 void ?{}(__intrusive_lane_t & this); 145 145 void ^?{}(__intrusive_lane_t & this); 146 147 // Aligned timestamps which are used by the relaxed ready queue 148 struct __attribute__((aligned(128))) __timestamp_t; 149 void ?{}(__timestamp_t & this); 150 void ^?{}(__timestamp_t & this); 146 151 147 152 //TODO adjust cache size to ARCHITECTURE … … 155 160 // Arary of lanes 156 161 __intrusive_lane_t * volatile data; 162 163 // Array of times 164 __timestamp_t * volatile tscs; 157 165 158 166 // Number of lanes (empty or not) -
libcfa/src/concurrency/kernel_private.hfa
r57b3675 r9cc3a18 286 286 // push thread onto a ready queue for a cluster 287 287 // returns true if the list was previously empty, false otherwise 288 __attribute__((hot)) boolpush(struct cluster * cltr, struct $thread * thrd);288 __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd); 289 289 290 290 //----------------------------------------------------------------------- … … 301 301 302 302 //----------------------------------------------------------------------- 303 // remove thread from the ready queue of a cluster304 // returns bool if it wasn't found305 bool remove_head(struct cluster * cltr, struct $thread * thrd);306 307 //-----------------------------------------------------------------------308 303 // Increase the width of the ready queue (number of lanes) by 4 309 304 void ready_queue_grow (struct cluster * cltr); -
libcfa/src/concurrency/ready_queue.cfa
r57b3675 r9cc3a18 19 19 // #define USE_MPSC 20 20 21 #define USE_RELAXED_FIFO 22 // #define USE_WORK_STEALING 23 21 24 #include "bits/defs.hfa" 22 25 #include "kernel_private.hfa" … … 38 41 #endif 39 42 40 #define BIAS 4 43 #if defined(USE_RELAXED_FIFO) 44 #define BIAS 4 45 #define READYQ_SHARD_FACTOR 4 46 #elif defined(USE_WORK_STEALING) 47 #define READYQ_SHARD_FACTOR 2 48 #else 49 #error no scheduling strategy selected 50 #endif 51 52 static inline [unsigned, bool] idx_from_r(unsigned r, unsigned preferred); 53 static inline struct $thread * try_pop(struct cluster * cltr, unsigned w); 54 static struct $thread * try_pop(struct cluster * cltr, unsigned i, unsigned j); 55 41 56 42 57 // returns the maximum number of processors the RWLock support … … 191 206 192 207 //======================================================================= 193 // Cforall Re qdy Queue used for scheduling208 // Cforall Ready Queue used for scheduling 194 209 //======================================================================= 195 210 void ?{}(__ready_queue_t & this) with (this) { 196 211 lanes.data = 0p; 212 lanes.tscs = 0p; 197 213 lanes.count = 0; 198 214 } … … 201 217 verify( 1 == lanes.count ); 202 218 free(lanes.data); 219 free(lanes.tscs); 203 220 } 204 221 205 222 //----------------------------------------------------------------------- 206 static inline [unsigned, bool] idx_from_r(unsigned r, unsigned preferred) { 207 unsigned i; 208 bool local; 209 #if defined(BIAS) 210 unsigned rlow = r % BIAS; 211 unsigned rhigh = r / BIAS; 212 if((0 != rlow) && preferred >= 0) { 213 // (BIAS - 1) out of BIAS chances 214 // Use perferred queues 215 i = preferred + (rhigh % 4); 216 local = true; 217 } 218 else { 219 // 1 out of BIAS chances 220 // Use all queues 221 i = rhigh; 222 local = false; 223 } 224 #else 225 i = r; 226 local = false; 227 #endif 228 return [i, local]; 229 } 230 231 //----------------------------------------------------------------------- 232 __attribute__((hot)) bool push(struct cluster * cltr, struct $thread * thrd) with (cltr->ready_queue) { 223 __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd) with (cltr->ready_queue) { 233 224 __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr); 234 225 235 226 const bool external = (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr); 227 /* paranoid */ verify(external || kernelTLS().this_processor->cltr_id < lanes.count ); 236 228 237 229 // write timestamp 238 230 thrd->link.ts = rdtscl(); 239 231 240 bool first = false; 241 __attribute__((unused)) bool local; 242 __attribute__((unused)) int preferred; 243 #if defined(BIAS) 244 /* paranoid */ verify(external || kernelTLS().this_processor->cltr_id < lanes.count ); 245 preferred = 246 //* 247 external ? -1 : kernelTLS().this_processor->cltr_id; 248 /*/ 249 thrd->link.preferred * 4; 250 //*/ 251 #endif 232 bool local; 233 int preferred = external ? -1 : kernelTLS().this_processor->cltr_id; 252 234 253 235 // Try to pick a lane and lock it … … 255 237 do { 256 238 // Pick the index of a lane 257 // unsigned r = __tls_rand();258 239 unsigned r = __tls_rand_fwd(); 259 240 [i, local] = idx_from_r(r, preferred); … … 304 285 } 305 286 #endif 306 307 // return whether or not the list was empty before this push 308 return first; 309 } 310 311 static struct $thread * try_pop(struct cluster * cltr, unsigned i, unsigned j); 312 static struct $thread * try_pop(struct cluster * cltr, unsigned i); 287 } 313 288 314 289 // Pop from the ready queue from a given cluster 315 290 __attribute__((hot)) $thread * pop(struct cluster * cltr) with (cltr->ready_queue) { 316 291 /* paranoid */ verify( lanes.count > 0 ); 292 /* paranoid */ verify(kernelTLS().this_processor->cltr_id < lanes.count ); 293 317 294 unsigned count = __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 318 int preferred; 319 #if defined(BIAS) 320 /* paranoid */ verify(kernelTLS().this_processor->cltr_id < lanes.count ); 321 preferred = kernelTLS().this_processor->cltr_id; 322 #endif 295 int preferred = kernelTLS().this_processor->cltr_id; 323 296 324 297 … … 326 299 for(25) { 327 300 // Pick two lists at random 328 // unsigned ri = __tls_rand();329 // unsigned rj = __tls_rand();330 301 unsigned ri = __tls_rand_bck(); 331 302 unsigned rj = __tls_rand_bck(); … … 348 319 struct $thread * thrd = try_pop(cltr, i, j); 349 320 if(thrd) { 350 #if defined(BIAS) &&!defined(__CFA_NO_STATISTICS__)321 #if !defined(__CFA_NO_STATISTICS__) 351 322 if( locali || localj ) __tls_stats()->ready.pick.pop.lsuccess++; 352 323 #endif … … 359 330 } 360 331 332 static void fix_times( struct cluster * cltr ) with( cltr->ready_queue ) { 333 lanes.tscs = alloc(lanes.count, lanes.tscs`realloc); 334 for(i; lanes.count) { 335 lanes.tscs[i].tv = ts(lanes.data[i]); 336 } 337 338 } 339 340 //======================================================================= 341 // Various Ready Queue utilities 342 //======================================================================= 343 // these function work the same or almost the same 344 // whether they are using work-stealing or relaxed fifo scheduling 345 346 //----------------------------------------------------------------------- 347 // get index from random number with or without bias towards queues 348 static inline [unsigned, bool] idx_from_r(unsigned r, unsigned preferred) { 349 unsigned i; 350 bool local; 351 unsigned rlow = r % BIAS; 352 unsigned rhigh = r / BIAS; 353 if((0 != rlow) && preferred >= 0) { 354 // (BIAS - 1) out of BIAS chances 355 // Use perferred queues 356 i = preferred + (rhigh % READYQ_SHARD_FACTOR); 357 local = true; 358 } 359 else { 360 // 1 out of BIAS chances 361 // Use all queues 362 i = rhigh; 363 local = false; 364 } 365 return [i, local]; 366 } 367 368 //----------------------------------------------------------------------- 369 // try to pop from a lane given by index w 370 static inline struct $thread * try_pop(struct cluster * cltr, unsigned w) with (cltr->ready_queue) { 371 // Get relevant elements locally 372 __intrusive_lane_t & lane = lanes.data[w]; 373 374 // If list looks empty retry 375 if( is_empty(lane) ) return 0p; 376 377 // If we can't get the lock retry 378 if( !__atomic_try_acquire(&lane.lock) ) return 0p; 379 380 // If list is empty, unlock and retry 381 if( is_empty(lane) ) { 382 __atomic_unlock(&lane.lock); 383 return 0p; 384 } 385 386 // Actually pop the list 387 struct $thread * thrd; 388 thrd = pop(lane); 389 390 /* paranoid */ verify(thrd); 391 /* paranoid */ verify(lane.lock); 392 393 // Unlock and return 394 __atomic_unlock(&lane.lock); 395 396 // Update statistics 397 #if !defined(__CFA_NO_STATISTICS__) 398 __tls_stats()->ready.pick.pop.success++; 399 #endif 400 401 #if defined(USE_WORKSTEALING) 402 lanes.times[i].val = thrd->links.ts; 403 #endif 404 405 // return the popped thread 406 return thrd; 407 } 408 409 //----------------------------------------------------------------------- 410 // try to pop from any lanes making sure you don't miss any threads push 411 // before the start of the function 361 412 __attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr) with (cltr->ready_queue) { 362 413 /* paranoid */ verify( lanes.count > 0 ); … … 375 426 } 376 427 377 378 428 //----------------------------------------------------------------------- 379 // Given 2 indexes, pick the list with the oldest push an try to pop from it 380 static inline struct $thread * try_pop(struct cluster * cltr, unsigned i, unsigned j) with (cltr->ready_queue) { 381 #if !defined(__CFA_NO_STATISTICS__) 382 __tls_stats()->ready.pick.pop.attempt++; 383 #endif 384 385 // Pick the bet list 386 int w = i; 387 if( __builtin_expect(!is_empty(lanes.data[j]), true) ) { 388 w = (ts(lanes.data[i]) < ts(lanes.data[j])) ? i : j; 389 } 390 391 return try_pop(cltr, w); 392 } 393 394 static inline struct $thread * try_pop(struct cluster * cltr, unsigned w) with (cltr->ready_queue) { 395 // Get relevant elements locally 396 __intrusive_lane_t & lane = lanes.data[w]; 397 398 // If list looks empty retry 399 if( is_empty(lane) ) return 0p; 400 401 // If we can't get the lock retry 402 if( !__atomic_try_acquire(&lane.lock) ) return 0p; 403 404 405 // If list is empty, unlock and retry 406 if( is_empty(lane) ) { 407 __atomic_unlock(&lane.lock); 408 return 0p; 409 } 410 411 // Actually pop the list 412 struct $thread * thrd; 413 thrd = pop(lane); 414 415 /* paranoid */ verify(thrd); 416 /* paranoid */ verify(lane.lock); 417 418 // Unlock and return 419 __atomic_unlock(&lane.lock); 420 421 // Update statistics 422 #if !defined(__CFA_NO_STATISTICS__) 423 __tls_stats()->ready.pick.pop.success++; 424 #endif 425 426 // Update the thread bias 427 thrd->link.preferred = w / 4; 428 429 // return the popped thread 430 return thrd; 431 } 432 //----------------------------------------------------------------------- 433 434 bool remove_head(struct cluster * cltr, struct $thread * thrd) with (cltr->ready_queue) { 435 for(i; lanes.count) { 436 __intrusive_lane_t & lane = lanes.data[i]; 437 438 bool removed = false; 439 440 __atomic_acquire(&lane.lock); 441 if(head(lane)->link.next == thrd) { 442 $thread * pthrd; 443 pthrd = pop(lane); 444 445 /* paranoid */ verify( pthrd == thrd ); 446 447 removed = true; 448 } 449 __atomic_unlock(&lane.lock); 450 451 if( removed ) return true; 452 } 453 return false; 454 } 455 456 //----------------------------------------------------------------------- 457 429 // Check that all the intrusive queues in the data structure are still consistent 458 430 static void check( __ready_queue_t & q ) with (q) { 459 431 #if defined(__CFA_WITH_VERIFY__) && !defined(USE_MPSC) … … 480 452 } 481 453 454 //----------------------------------------------------------------------- 455 // Given 2 indexes, pick the list with the oldest push an try to pop from it 456 static inline struct $thread * try_pop(struct cluster * cltr, unsigned i, unsigned j) with (cltr->ready_queue) { 457 #if !defined(__CFA_NO_STATISTICS__) 458 __tls_stats()->ready.pick.pop.attempt++; 459 #endif 460 461 // Pick the bet list 462 int w = i; 463 if( __builtin_expect(!is_empty(lanes.data[j]), true) ) { 464 w = (ts(lanes.data[i]) < ts(lanes.data[j])) ? i : j; 465 } 466 467 return try_pop(cltr, w); 468 } 469 482 470 // Call this function of the intrusive list was moved using memcpy 483 471 // fixes the list so that the pointers back to anchors aren't left dangling … … 499 487 } 500 488 501 static void assign_list(unsigned & value, const int inc,dlist(processor, processor) & list, unsigned count) {489 static void assign_list(unsigned & value, dlist(processor, processor) & list, unsigned count) { 502 490 processor * it = &list`first; 503 491 for(unsigned i = 0; i < count; i++) { 504 492 /* paranoid */ verifyf( it, "Unexpected null iterator, at index %u of %u\n", i, count); 505 493 it->cltr_id = value; 506 value += inc;494 value += READYQ_SHARD_FACTOR; 507 495 it = &(*it)`next; 508 496 } 509 497 } 510 498 511 static void reassign_cltr_id(struct cluster * cltr , const int inc) {499 static void reassign_cltr_id(struct cluster * cltr) { 512 500 unsigned preferred = 0; 513 assign_list(preferred, inc,cltr->procs.actives, cltr->procs.total - cltr->procs.idle);514 assign_list(preferred, inc,cltr->procs.idles , cltr->procs.idle );501 assign_list(preferred, cltr->procs.actives, cltr->procs.total - cltr->procs.idle); 502 assign_list(preferred, cltr->procs.idles , cltr->procs.idle ); 515 503 } 516 504 … … 531 519 // Make sure we always have atleast 1 list 532 520 if(target >= 2) { 533 ncount = target * 4;521 ncount = target * READYQ_SHARD_FACTOR; 534 522 } else { 535 523 ncount = 1; … … 553 541 } 554 542 555 reassign_cltr_id(cltr, 4); 543 fix_times(cltr); 544 545 reassign_cltr_id(cltr); 556 546 557 547 // Make sure that everything is consistent … … 579 569 // Find new count 580 570 // Make sure we always have atleast 1 list 581 lanes.count = target >= 2 ? target * 4: 1;571 lanes.count = target >= 2 ? target * READYQ_SHARD_FACTOR: 1; 582 572 /* paranoid */ verify( ocount >= lanes.count ); 583 /* paranoid */ verify( lanes.count == target * 4|| target < 2 );573 /* paranoid */ verify( lanes.count == target * READYQ_SHARD_FACTOR || target < 2 ); 584 574 585 575 // for printing count the number of displaced threads … … 626 616 } 627 617 628 reassign_cltr_id(cltr, 4); 618 fix_times(cltr); 619 620 reassign_cltr_id(cltr); 629 621 630 622 // Make sure that everything is consistent -
libcfa/src/concurrency/ready_subqueue.hfa
r57b3675 r9cc3a18 246 246 #endif 247 247 } 248 249 // Aligned timestamps which are used by the relaxed ready queue 250 struct __attribute__((aligned(128))) __timestamp_t { 251 volatile unsigned long long tv; 252 }; 253 254 void ?{}(__timestamp_t & this) { this.tv = 0; } 255 void ^?{}(__timestamp_t & this) {} -
libcfa/src/concurrency/thread.cfa
r57b3675 r9cc3a18 39 39 link.next = 0p; 40 40 link.prev = 0p; 41 link.preferred = -1;42 41 #if defined( __CFA_WITH_VERIFY__ ) 43 42 canary = 0x0D15EA5E0D15EA5Ep;
Note: See TracChangeset
for help on using the changeset viewer.