Changes in / [a77f25b:f55f110]
- Location:
- libcfa/src/concurrency
- Files:
-
- 2 edited
-
kernel.hfa (modified) (2 diffs)
-
ready_queue.cfa (modified) (8 diffs)
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/concurrency/kernel.hfa
ra77f25b rf55f110 68 68 unsigned last; 69 69 signed cpu; 70 // unsigned long long int cutoff;71 70 } rdq; 72 71 … … 154 153 }; 155 154 156 struct __attribute__((aligned(1 28))) __cache_id_t {155 struct __attribute__((aligned(16))) __cache_id_t { 157 156 volatile unsigned id; 158 157 }; -
libcfa/src/concurrency/ready_queue.cfa
ra77f25b rf55f110 303 303 lanes.help[idx].dst = 0; 304 304 lanes.help[idx].tri = 0; 305 }306 307 caches = alloc( cpu_info.llc_count );308 for( idx; (size_t)cpu_info.llc_count ) {309 (caches[idx]){};310 305 } 311 306 #else … … 404 399 /* paranoid */ verify(cpu < cpu_info.hthrd_count); 405 400 unsigned this_cache = cpu_info.llc_map[cpu].cache; 406 __atomic_store_n(&lanes.caches[this / READYQ_SHARD_FACTOR].id, this_cache, __ATOMIC_RELAXED); 401 402 // Super important: don't write the same value over and over again 403 // We want to maximise our chances that his particular values stays in cache 404 if(lanes.caches[this / READYQ_SHARD_FACTOR].id != this_cache) 405 __atomic_store_n(&lanes.caches[this / READYQ_SHARD_FACTOR].id, this_cache, __ATOMIC_RELAXED); 407 406 408 407 const unsigned long long ctsc = rdtscl(); … … 506 505 } 507 506 508 static inline int pop_getcpu(processor * proc, __ready_queue_caches_t * caches) {509 const int prv = proc->rdq.cpu;510 const int cpu = __kernel_getcpu();511 if( prv != proc->rdq.cpu ) {512 unsigned pidx = cpu_info.llc_map[prv].cache;513 /* paranoid */ verify(pidx < cpu_info.llc_count);514 515 unsigned nidx = cpu_info.llc_map[cpu].cache;516 /* paranoid */ verify(pidx < cpu_info.llc_count);517 518 depart(caches[pidx]);519 arrive(caches[nidx]);520 521 __STATS( /* cpu migs++ */ )522 }523 return proc->rdq.cpu = cpu;524 }525 526 507 // Pop from the ready queue from a given cluster 527 508 __attribute__((hot)) thread$ * pop_fast(struct cluster * cltr) with (cltr->ready_queue) { … … 530 511 531 512 processor * const proc = kernelTLS().this_processor; 532 const int cpu = pop_getcpu( proc, caches ); 533 // const int cpu = __kernel_getcpu(); 513 const int cpu = __kernel_getcpu(); 534 514 /* paranoid */ verify(cpu >= 0); 535 515 /* paranoid */ verify(cpu < cpu_info.hthrd_count); … … 548 528 unsigned long long max = 0; 549 529 for(i; READYQ_SHARD_FACTOR) { 550 unsigned long long tsc = moving_average(ctsc -ts(lanes.data[start + i]), lanes.tscs[start + i].ma);530 unsigned long long tsc = moving_average(ctsc, ts(lanes.data[start + i]), lanes.tscs[start + i].ma); 551 531 if(tsc > max) max = tsc; 552 532 } … … 569 549 unsigned long long max = 0; 570 550 for(i; READYQ_SHARD_FACTOR) { 571 unsigned long long tsc = moving_average(ctsc -ts(lanes.data[start + i]), lanes.tscs[start + i].ma);551 unsigned long long tsc = moving_average(ctsc, ts(lanes.data[start + i]), lanes.tscs[start + i].ma); 572 552 if(tsc > max) max = tsc; 573 553 } … … 577 557 proc->rdq.target = MAX; 578 558 lanes.help[target / READYQ_SHARD_FACTOR].tri++; 579 if(moving_average(ctsc - lanes.tscs[target].tv, lanes.tscs[target].ma) > cutoff) { 580 __STATS( __tls_stats()->ready.pop.helped[target]++; ) 559 if(moving_average(ctsc, lanes.tscs[target].tv, lanes.tscs[target].ma) > cutoff) { 581 560 thread$ * t = try_pop(cltr, target __STATS(, __tls_stats()->ready.pop.help)); 582 561 proc->rdq.last = target; … … 587 566 588 567 unsigned last = proc->rdq.last; 589 if(last != MAX && moving_average(ctsc - lanes.tscs[last].tv, lanes.tscs[last].ma) > cutoff) { 590 __STATS( __tls_stats()->ready.pop.helped[last]++; ) 568 if(last != MAX && moving_average(ctsc, lanes.tscs[last].tv, lanes.tscs[last].ma) > cutoff) { 591 569 thread$ * t = try_pop(cltr, last __STATS(, __tls_stats()->ready.pop.help)); 592 570 if(t) return t;
Note:
See TracChangeset
for help on using the changeset viewer.