Changeset 431cd4f for libcfa/src
- Timestamp:
- Apr 16, 2021, 12:40:27 PM (3 years ago)
- Branches:
- ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast-unique-expr, pthread-emulation, qualifiedEnum
- Children:
- 6528d75
- Parents:
- 6abcb4d
- Location:
- libcfa/src/concurrency
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/concurrency/kernel.cfa
r6abcb4d r431cd4f 474 474 475 475 ready_schedule_lock(); 476 $thread * thrd = pop ( this );476 $thread * thrd = pop_fast( this ); 477 477 ready_schedule_unlock(); 478 478 -
libcfa/src/concurrency/kernel.hfa
r6abcb4d r431cd4f 69 69 struct cluster * cltr; 70 70 71 // Id within the cluster 72 unsigned cltr_id; 71 // Ready Queue state per processor 72 struct { 73 unsigned short its; 74 unsigned short itr; 75 unsigned id; 76 unsigned target; 77 unsigned long long int cutoff; 78 } rdq; 73 79 74 80 // Set to true to notify the processor should terminate -
libcfa/src/concurrency/kernel/startup.cfa
r6abcb4d r431cd4f 469 469 this.name = name; 470 470 this.cltr = &_cltr; 471 this.cltr_id = -1u; 471 this.rdq.its = 0; 472 this.rdq.itr = 0; 473 this.rdq.id = -1u; 474 this.rdq.target = -1u; 475 this.rdq.cutoff = -1ull; 472 476 do_terminate = false; 473 477 preemption_alarm = 0p; -
libcfa/src/concurrency/kernel_private.hfa
r6abcb4d r431cd4f 292 292 // returns 0p if empty 293 293 // May return 0p spuriously 294 __attribute__((hot)) struct $thread * pop (struct cluster * cltr);294 __attribute__((hot)) struct $thread * pop_fast(struct cluster * cltr); 295 295 296 296 //----------------------------------------------------------------------- -
libcfa/src/concurrency/ready_queue.cfa
r6abcb4d r431cd4f 52 52 static inline [unsigned, bool] idx_from_r(unsigned r, unsigned preferred); 53 53 static inline struct $thread * try_pop(struct cluster * cltr, unsigned w); 54 static struct $thread * try_pop(struct cluster * cltr, unsigned i, unsigned j); 54 static inline struct $thread * try_pop(struct cluster * cltr, unsigned i, unsigned j); 55 static inline struct $thread * search(struct cluster * cltr); 55 56 56 57 … … 221 222 222 223 //----------------------------------------------------------------------- 223 __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd) with (cltr->ready_queue) { 224 __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr); 225 226 const bool external = (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr); 227 /* paranoid */ verify(external || kernelTLS().this_processor->cltr_id < lanes.count ); 228 229 // write timestamp 230 thrd->link.ts = rdtscl(); 231 232 bool local; 233 int preferred = external ? -1 : kernelTLS().this_processor->cltr_id; 234 235 // Try to pick a lane and lock it 236 unsigned i; 237 do { 238 // Pick the index of a lane 239 unsigned r = __tls_rand_fwd(); 240 [i, local] = idx_from_r(r, preferred); 241 242 i %= __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 243 224 #if defined(USE_RELAXED_FIFO) 225 //----------------------------------------------------------------------- 226 // get index from random number with or without bias towards queues 227 static inline [unsigned, bool] idx_from_r(unsigned r, unsigned preferred) { 228 unsigned i; 229 bool local; 230 unsigned rlow = r % BIAS; 231 unsigned rhigh = r / BIAS; 232 if((0 != rlow) && preferred >= 0) { 233 // (BIAS - 1) out of BIAS chances 234 // Use perferred queues 235 i = preferred + (rhigh % READYQ_SHARD_FACTOR); 236 local = true; 237 } 238 else { 239 // 1 out of BIAS chances 240 // Use all queues 241 i = rhigh; 242 local = false; 243 } 244 return [i, local]; 245 } 246 247 __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd) with (cltr->ready_queue) { 248 __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr); 249 250 const bool external = (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr); 251 /* paranoid */ verify(external || kernelTLS().this_processor->rdq.id < lanes.count ); 252 253 // write timestamp 254 thrd->link.ts = rdtscl(); 255 256 bool local; 257 int preferred = external ? -1 : kernelTLS().this_processor->rdq.id; 258 259 // Try to pick a lane and lock it 260 unsigned i; 261 do { 262 // Pick the index of a lane 263 unsigned r = __tls_rand_fwd(); 264 [i, local] = idx_from_r(r, preferred); 265 266 i %= __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 267 268 #if !defined(__CFA_NO_STATISTICS__) 269 if(external) { 270 if(local) __atomic_fetch_add(&cltr->stats->ready.pick.ext.local, 1, __ATOMIC_RELAXED); 271 __atomic_fetch_add(&cltr->stats->ready.pick.ext.attempt, 1, __ATOMIC_RELAXED); 272 } 273 else { 274 if(local) __tls_stats()->ready.pick.push.local++; 275 __tls_stats()->ready.pick.push.attempt++; 276 } 277 #endif 278 279 #if defined(USE_MPSC) 280 // mpsc always succeeds 281 } while( false ); 282 #else 283 // If we can't lock it retry 284 } while( !__atomic_try_acquire( &lanes.data[i].lock ) ); 285 #endif 286 287 // Actually push it 288 push(lanes.data[i], thrd); 289 290 #if !defined(USE_MPSC) 291 // Unlock and return 292 __atomic_unlock( &lanes.data[i].lock ); 293 #endif 294 295 // Mark the current index in the tls rng instance as having an item 296 __tls_rand_advance_bck(); 297 298 __cfadbg_print_safe(ready_queue, "Kernel : Pushed %p on cluster %p (idx: %u, mask %llu, first %d)\n", thrd, cltr, i, used.mask[0], lane_first); 299 300 // Update statistics 244 301 #if !defined(__CFA_NO_STATISTICS__) 245 302 if(external) { 246 if(local) __atomic_fetch_add(&cltr->stats->ready.pick.ext.l ocal, 1, __ATOMIC_RELAXED);247 __atomic_fetch_add(&cltr->stats->ready.pick.ext. attempt, 1, __ATOMIC_RELAXED);303 if(local) __atomic_fetch_add(&cltr->stats->ready.pick.ext.lsuccess, 1, __ATOMIC_RELAXED); 304 __atomic_fetch_add(&cltr->stats->ready.pick.ext.success, 1, __ATOMIC_RELAXED); 248 305 } 249 306 else { 250 if(local) __tls_stats()->ready.pick.push.l ocal++;251 __tls_stats()->ready.pick.push. attempt++;307 if(local) __tls_stats()->ready.pick.push.lsuccess++; 308 __tls_stats()->ready.pick.push.success++; 252 309 } 253 310 #endif 254 255 #if defined(USE_MPSC) 256 // mpsc always succeeds 257 } while( false ); 258 #else 259 // If we can't lock it retry 260 } while( !__atomic_try_acquire( &lanes.data[i].lock ) ); 261 #endif 262 263 // Actually push it 264 push(lanes.data[i], thrd); 265 266 #if !defined(USE_MPSC) 267 // Unlock and return 268 __atomic_unlock( &lanes.data[i].lock ); 269 #endif 270 271 // Mark the current index in the tls rng instance as having an item 272 __tls_rand_advance_bck(); 273 274 __cfadbg_print_safe(ready_queue, "Kernel : Pushed %p on cluster %p (idx: %u, mask %llu, first %d)\n", thrd, cltr, i, used.mask[0], lane_first); 275 276 // Update statistics 277 #if !defined(__CFA_NO_STATISTICS__) 278 if(external) { 279 if(local) __atomic_fetch_add(&cltr->stats->ready.pick.ext.lsuccess, 1, __ATOMIC_RELAXED); 280 __atomic_fetch_add(&cltr->stats->ready.pick.ext.success, 1, __ATOMIC_RELAXED); 281 } 282 else { 283 if(local) __tls_stats()->ready.pick.push.lsuccess++; 284 __tls_stats()->ready.pick.push.success++; 285 } 286 #endif 287 } 288 289 // Pop from the ready queue from a given cluster 290 __attribute__((hot)) $thread * pop(struct cluster * cltr) with (cltr->ready_queue) { 291 /* paranoid */ verify( lanes.count > 0 ); 292 /* paranoid */ verify(kernelTLS().this_processor->cltr_id < lanes.count ); 293 294 unsigned count = __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 295 int preferred = kernelTLS().this_processor->cltr_id; 296 297 298 // As long as the list is not empty, try finding a lane that isn't empty and pop from it 299 for(25) { 300 // Pick two lists at random 301 unsigned ri = __tls_rand_bck(); 302 unsigned rj = __tls_rand_bck(); 303 304 unsigned i, j; 305 __attribute__((unused)) bool locali, localj; 306 [i, locali] = idx_from_r(ri, preferred); 307 [j, localj] = idx_from_r(rj, preferred); 308 309 #if !defined(__CFA_NO_STATISTICS__) 310 if(locali && localj) { 311 __tls_stats()->ready.pick.pop.local++; 311 } 312 313 // Pop from the ready queue from a given cluster 314 __attribute__((hot)) $thread * pop_fast(struct cluster * cltr) with (cltr->ready_queue) { 315 /* paranoid */ verify( lanes.count > 0 ); 316 /* paranoid */ verify( kernelTLS().this_processor ); 317 /* paranoid */ verify( kernelTLS().this_processor->rdq.id < lanes.count ); 318 319 unsigned count = __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 320 int preferred = kernelTLS().this_processor->rdq.id; 321 322 323 // As long as the list is not empty, try finding a lane that isn't empty and pop from it 324 for(25) { 325 // Pick two lists at random 326 unsigned ri = __tls_rand_bck(); 327 unsigned rj = __tls_rand_bck(); 328 329 unsigned i, j; 330 __attribute__((unused)) bool locali, localj; 331 [i, locali] = idx_from_r(ri, preferred); 332 [j, localj] = idx_from_r(rj, preferred); 333 334 #if !defined(__CFA_NO_STATISTICS__) 335 if(locali && localj) { 336 __tls_stats()->ready.pick.pop.local++; 337 } 338 #endif 339 340 i %= count; 341 j %= count; 342 343 // try popping from the 2 picked lists 344 struct $thread * thrd = try_pop(cltr, i, j); 345 if(thrd) { 346 #if !defined(__CFA_NO_STATISTICS__) 347 if( locali || localj ) __tls_stats()->ready.pick.pop.lsuccess++; 348 #endif 349 return thrd; 312 350 } 351 } 352 353 // All lanes where empty return 0p 354 return 0p; 355 } 356 357 __attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr) { 358 return search(cltr); 359 } 360 #endif 361 #if defined(USE_WORK_STEALING) 362 __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd) with (cltr->ready_queue) { 363 __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr); 364 365 const bool external = (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr); 366 /* paranoid */ verify(external || kernelTLS().this_processor->rdq.id < lanes.count ); 367 368 // write timestamp 369 thrd->link.ts = rdtscl(); 370 371 // Try to pick a lane and lock it 372 unsigned i; 373 do { 374 if(unlikely(external)) { 375 i = __tls_rand() % lanes.count; 376 } 377 else { 378 processor * proc = kernelTLS().this_processor; 379 unsigned r = proc->rdq.its++; 380 i = proc->rdq.id + (r % READYQ_SHARD_FACTOR); 381 } 382 383 384 #if defined(USE_MPSC) 385 // mpsc always succeeds 386 } while( false ); 387 #else 388 // If we can't lock it retry 389 } while( !__atomic_try_acquire( &lanes.data[i].lock ) ); 313 390 #endif 314 391 315 i %= count; 316 j %= count; 317 318 // try popping from the 2 picked lists 319 struct $thread * thrd = try_pop(cltr, i, j); 320 if(thrd) { 321 #if !defined(__CFA_NO_STATISTICS__) 322 if( locali || localj ) __tls_stats()->ready.pick.pop.lsuccess++; 323 #endif 324 return thrd; 325 } 326 } 327 328 // All lanes where empty return 0p 329 return 0p; 330 } 331 332 static void fix_times( struct cluster * cltr ) with( cltr->ready_queue ) { 333 lanes.tscs = alloc(lanes.count, lanes.tscs`realloc); 334 for(i; lanes.count) { 335 lanes.tscs[i].tv = ts(lanes.data[i]); 336 } 337 338 } 392 // Actually push it 393 push(lanes.data[i], thrd); 394 395 #if !defined(USE_MPSC) 396 // Unlock and return 397 __atomic_unlock( &lanes.data[i].lock ); 398 #endif 399 400 __cfadbg_print_safe(ready_queue, "Kernel : Pushed %p on cluster %p (idx: %u, mask %llu, first %d)\n", thrd, cltr, i, used.mask[0], lane_first); 401 } 402 403 // Pop from the ready queue from a given cluster 404 __attribute__((hot)) $thread * pop_fast(struct cluster * cltr) with (cltr->ready_queue) { 405 /* paranoid */ verify( lanes.count > 0 ); 406 /* paranoid */ verify( kernelTLS().this_processor ); 407 /* paranoid */ verify( kernelTLS().this_processor->rdq.id < lanes.count ); 408 409 processor * proc = kernelTLS().this_processor; 410 411 if(proc->rdq.target == -1u) { 412 proc->rdq.target = __tls_rand() % lanes.count; 413 unsigned it1 = proc->rdq.itr; 414 unsigned it2 = proc->rdq.itr + 1; 415 unsigned idx1 = proc->rdq.id + (it1 % READYQ_SHARD_FACTOR); 416 unsigned idx2 = proc->rdq.id + (it1 % READYQ_SHARD_FACTOR); 417 unsigned long long tsc1 = ts(lanes.data[idx1]); 418 unsigned long long tsc2 = ts(lanes.data[idx2]); 419 proc->rdq.cutoff = min(tsc1, tsc2); 420 } 421 else if(lanes.tscs[proc->rdq.target].tv < proc->rdq.cutoff) { 422 $thread * t = try_pop(cltr, proc->rdq.target); 423 proc->rdq.target = -1u; 424 if(t) return t; 425 } 426 427 for(READYQ_SHARD_FACTOR) { 428 unsigned i = proc->rdq.id + (--proc->rdq.itr % READYQ_SHARD_FACTOR); 429 if($thread * t = try_pop(cltr, i)) return t; 430 } 431 return 0p; 432 } 433 434 __attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr) with (cltr->ready_queue) { 435 for(25) { 436 unsigned i = __tls_rand() % lanes.count; 437 $thread * t = try_pop(cltr, i); 438 if(t) return t; 439 } 440 441 return search(cltr); 442 } 443 #endif 339 444 340 445 //======================================================================= … … 345 450 346 451 //----------------------------------------------------------------------- 347 // get index from random number with or without bias towards queues348 static inline [unsigned, bool] idx_from_r(unsigned r, unsigned preferred) {349 unsigned i;350 bool local;351 unsigned rlow = r % BIAS;352 unsigned rhigh = r / BIAS;353 if((0 != rlow) && preferred >= 0) {354 // (BIAS - 1) out of BIAS chances355 // Use perferred queues356 i = preferred + (rhigh % READYQ_SHARD_FACTOR);357 local = true;358 }359 else {360 // 1 out of BIAS chances361 // Use all queues362 i = rhigh;363 local = false;364 }365 return [i, local];366 }367 368 //-----------------------------------------------------------------------369 452 // try to pop from a lane given by index w 370 453 static inline struct $thread * try_pop(struct cluster * cltr, unsigned w) with (cltr->ready_queue) { … … 399 482 #endif 400 483 401 #if defined(USE_WORK STEALING)402 lanes.t imes[i].val = thrd->links.ts;484 #if defined(USE_WORK_STEALING) 485 lanes.tscs[w].tv = thrd->link.ts; 403 486 #endif 404 487 … … 410 493 // try to pop from any lanes making sure you don't miss any threads push 411 494 // before the start of the function 412 __attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr) with (cltr->ready_queue) {495 static inline struct $thread * search(struct cluster * cltr) with (cltr->ready_queue) { 413 496 /* paranoid */ verify( lanes.count > 0 ); 414 497 unsigned count = __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); … … 491 574 for(unsigned i = 0; i < count; i++) { 492 575 /* paranoid */ verifyf( it, "Unexpected null iterator, at index %u of %u\n", i, count); 493 it->cltr_id = value; 576 it->rdq.id = value; 577 it->rdq.target = -1u; 494 578 value += READYQ_SHARD_FACTOR; 495 579 it = &(*it)`next; … … 501 585 assign_list(preferred, cltr->procs.actives, cltr->procs.total - cltr->procs.idle); 502 586 assign_list(preferred, cltr->procs.idles , cltr->procs.idle ); 587 } 588 589 static void fix_times( struct cluster * cltr ) with( cltr->ready_queue ) { 590 #if defined(USE_WORK_STEALING) 591 lanes.tscs = alloc(lanes.count, lanes.tscs`realloc); 592 for(i; lanes.count) { 593 lanes.tscs[i].tv = ts(lanes.data[i]); 594 } 595 #endif 503 596 } 504 597
Note: See TracChangeset
for help on using the changeset viewer.