Changeset 038a0bd for libcfa/src
- Timestamp:
- Jan 18, 2022, 8:49:43 PM (4 years ago)
- Branches:
- ADT, ast-experimental, enum, forall-pointer-decay, master, pthread-emulation, qualifiedEnum
- Children:
- 6a33e40
- Parents:
- b5f17e14 (diff), adfd125 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)
links above to see all the changes relative to each parent. - Location:
- libcfa/src
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/common.hfa
rb5f17e14 r038a0bd 1 // 1 // 2 2 // Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo 3 3 // 4 4 // The contents of this file are covered under the licence agreement in the 5 5 // file "LICENCE" distributed with Cforall. 6 // 7 // common --8 // 6 // 7 // common.hfa -- 8 // 9 9 // Author : Peter A. Buhr 10 10 // Created On : Wed Jul 11 17:54:36 2018 … … 12 12 // Last Modified On : Wed May 5 14:02:04 2021 13 13 // Update Count : 18 14 // 14 // 15 15 16 16 #pragma once -
libcfa/src/concurrency/io.cfa
rb5f17e14 r038a0bd 144 144 __ioarbiter_flush( ctx ); 145 145 146 __STATS__( true, io.calls.flush++; ) 147 int ret = syscall( __NR_io_uring_enter, ctx.fd, ctx.sq.to_submit, min_comp, min_comp > 0 ? IORING_ENTER_GETEVENTS : 0, (sigset_t *)0p, _NSIG / 8); 148 if( ret < 0 ) { 149 switch((int)errno) { 150 case EAGAIN: 151 case EINTR: 152 case EBUSY: 153 // Update statistics 154 __STATS__( false, io.calls.errors.busy ++; ) 155 return false; 156 default: 157 abort( "KERNEL ERROR: IO_URING SYSCALL - (%d) %s\n", (int)errno, strerror(errno) ); 146 if(ctx.sq.to_submit != 0 || min_comp > 0) { 147 148 __STATS__( true, io.calls.flush++; ) 149 int ret = syscall( __NR_io_uring_enter, ctx.fd, ctx.sq.to_submit, min_comp, min_comp > 0 ? IORING_ENTER_GETEVENTS : 0, (sigset_t *)0p, _NSIG / 8); 150 if( ret < 0 ) { 151 switch((int)errno) { 152 case EAGAIN: 153 case EINTR: 154 case EBUSY: 155 // Update statistics 156 __STATS__( false, io.calls.errors.busy ++; ) 157 return false; 158 default: 159 abort( "KERNEL ERROR: IO_URING SYSCALL - (%d) %s\n", (int)errno, strerror(errno) ); 160 } 158 161 } 159 } 160 161 __cfadbg_print_safe(io, "Kernel I/O : %u submitted to io_uring %d\n", ret, ctx.fd); 162 __STATS__( true, io.calls.submitted += ret; ) 163 /* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num ); 164 /* paranoid */ verify( ctx.sq.to_submit >= ret ); 165 166 ctx.sq.to_submit -= ret; 167 168 /* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num ); 169 170 // Release the consumed SQEs 171 __release_sqes( ctx ); 172 173 /* paranoid */ verify( ! __preemption_enabled() ); 174 175 ctx.proc->io.pending = false; 162 163 __cfadbg_print_safe(io, "Kernel I/O : %u submitted to io_uring %d\n", ret, ctx.fd); 164 __STATS__( true, io.calls.submitted += ret; ) 165 /* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num ); 166 /* paranoid */ verify( ctx.sq.to_submit >= ret ); 167 168 ctx.sq.to_submit -= ret; 169 170 /* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num ); 171 172 // Release the consumed SQEs 173 __release_sqes( ctx ); 174 175 /* paranoid */ verify( ! __preemption_enabled() ); 176 177 ctx.proc->io.pending = false; 178 } 179 176 180 ready_schedule_lock(); 177 181 bool ret = __cfa_io_drain( proc ); -
libcfa/src/concurrency/kernel.hfa
rb5f17e14 r038a0bd 68 68 unsigned last; 69 69 signed cpu; 70 // unsigned long long int cutoff;71 70 } rdq; 72 71 … … 154 153 }; 155 154 156 struct __attribute__((aligned(1 28))) __cache_id_t {155 struct __attribute__((aligned(16))) __cache_id_t { 157 156 volatile unsigned id; 158 157 }; -
libcfa/src/concurrency/ready_queue.cfa
rb5f17e14 r038a0bd 303 303 lanes.help[idx].dst = 0; 304 304 lanes.help[idx].tri = 0; 305 }306 307 caches = alloc( cpu_info.llc_count );308 for( idx; (size_t)cpu_info.llc_count ) {309 (caches[idx]){};310 305 } 311 306 #else … … 404 399 /* paranoid */ verify(cpu < cpu_info.hthrd_count); 405 400 unsigned this_cache = cpu_info.llc_map[cpu].cache; 406 __atomic_store_n(&lanes.caches[this / READYQ_SHARD_FACTOR].id, this_cache, __ATOMIC_RELAXED); 401 402 // Super important: don't write the same value over and over again 403 // We want to maximise our chances that his particular values stays in cache 404 if(lanes.caches[this / READYQ_SHARD_FACTOR].id != this_cache) 405 __atomic_store_n(&lanes.caches[this / READYQ_SHARD_FACTOR].id, this_cache, __ATOMIC_RELAXED); 407 406 408 407 const unsigned long long ctsc = rdtscl(); … … 506 505 } 507 506 508 static inline int pop_getcpu(processor * proc, __ready_queue_caches_t * caches) {509 const int prv = proc->rdq.cpu;510 const int cpu = __kernel_getcpu();511 if( prv != proc->rdq.cpu ) {512 unsigned pidx = cpu_info.llc_map[prv].cache;513 /* paranoid */ verify(pidx < cpu_info.llc_count);514 515 unsigned nidx = cpu_info.llc_map[cpu].cache;516 /* paranoid */ verify(pidx < cpu_info.llc_count);517 518 depart(caches[pidx]);519 arrive(caches[nidx]);520 521 __STATS( /* cpu migs++ */ )522 }523 return proc->rdq.cpu = cpu;524 }525 526 507 // Pop from the ready queue from a given cluster 527 508 __attribute__((hot)) thread$ * pop_fast(struct cluster * cltr) with (cltr->ready_queue) { … … 530 511 531 512 processor * const proc = kernelTLS().this_processor; 532 const int cpu = pop_getcpu( proc, caches ); 533 // const int cpu = __kernel_getcpu(); 513 const int cpu = __kernel_getcpu(); 534 514 /* paranoid */ verify(cpu >= 0); 535 515 /* paranoid */ verify(cpu < cpu_info.hthrd_count); … … 548 528 unsigned long long max = 0; 549 529 for(i; READYQ_SHARD_FACTOR) { 550 unsigned long long tsc = moving_average(ctsc -ts(lanes.data[start + i]), lanes.tscs[start + i].ma);530 unsigned long long tsc = moving_average(ctsc, ts(lanes.data[start + i]), lanes.tscs[start + i].ma); 551 531 if(tsc > max) max = tsc; 552 532 } … … 569 549 unsigned long long max = 0; 570 550 for(i; READYQ_SHARD_FACTOR) { 571 unsigned long long tsc = moving_average(ctsc -ts(lanes.data[start + i]), lanes.tscs[start + i].ma);551 unsigned long long tsc = moving_average(ctsc, ts(lanes.data[start + i]), lanes.tscs[start + i].ma); 572 552 if(tsc > max) max = tsc; 573 553 } … … 577 557 proc->rdq.target = MAX; 578 558 lanes.help[target / READYQ_SHARD_FACTOR].tri++; 579 if(moving_average(ctsc - lanes.tscs[target].tv, lanes.tscs[target].ma) > cutoff) { 580 __STATS( __tls_stats()->ready.pop.helped[target]++; ) 559 if(moving_average(ctsc, lanes.tscs[target].tv, lanes.tscs[target].ma) > cutoff) { 581 560 thread$ * t = try_pop(cltr, target __STATS(, __tls_stats()->ready.pop.help)); 582 561 proc->rdq.last = target; … … 587 566 588 567 unsigned last = proc->rdq.last; 589 if(last != MAX && moving_average(ctsc - lanes.tscs[last].tv, lanes.tscs[last].ma) > cutoff) { 590 __STATS( __tls_stats()->ready.pop.helped[last]++; ) 568 if(last != MAX && moving_average(ctsc, lanes.tscs[last].tv, lanes.tscs[last].ma) > cutoff) { 591 569 thread$ * t = try_pop(cltr, last __STATS(, __tls_stats()->ready.pop.help)); 592 570 if(t) return t;
Note:
See TracChangeset
for help on using the changeset viewer.