Context Navigation

← Previous Change
Next Change →

Changeset 038a0bd for libcfa

Timestamp:

Jan 18, 2022, 8:49:43 PM (4 years ago)

Author:

Peter A. Buhr <pabuhr@…>

Branches:

ADT, ast-experimental, enum, forall-pointer-decay, master, pthread-emulation, qualifiedEnum

Children:

6a33e40

Parents:

b5f17e14 (diff), adfd125 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.

Message:

Merge branch 'master' of plg.uwaterloo.ca:software/cfa/cfa-cc

Location:

libcfa/src

Files:

: 4 edited

common.hfa (modified) (2 diffs)
concurrency/io.cfa (modified) (1 diff)
concurrency/kernel.hfa (modified) (2 diffs)
concurrency/ready_queue.cfa (modified) (8 diffs)

Legend:

: Unmodified
: Added
: Removed

libcfa/src/common.hfa

-              rb5f17e14
+              r038a0bd
 //
+//
 // Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
 //
 // The contents of this file are covered under the licence agreement in the
 // file "LICENCE" distributed with Cforall.
 //
 // common --
 //
+//
+// common.hfa --
+//
 // Author           : Peter A. Buhr
 // Created On       : Wed Jul 11 17:54:36 2018
 …
 // Last Modified On : Wed May  5 14:02:04 2021
 // Update Count     : 18
 //
+//
 #pragma once

libcfa/src/concurrency/io.cfa

-              rb5f17e14
+              r038a0bd
                 __ioarbiter_flush( ctx );
+                __STATS__( true, io.calls.flush++; )
+                int ret = syscall( __NR_io_uring_enter, ctx.fd, ctx.sq.to_submit, min_comp, min_comp > 0 ? IORING_ENTER_GETEVENTS : 0, (sigset_t *)0p, _NSIG / 8);
+                if( ret < 0 ) {
+                        switch((int)errno) {
+                        case EAGAIN:
+                        case EINTR:
+                        case EBUSY:
+                                // Update statistics
+                                __STATS__( false, io.calls.errors.busy ++; )
+                                return false;
+                        default:
+                                abort( "KERNEL ERROR: IO_URING SYSCALL - (%d) %s\n", (int)errno, strerror(errno) );
+                if(ctx.sq.to_submit != 0 || min_comp > 0) {
+                        __STATS__( true, io.calls.flush++; )
+                        int ret = syscall( __NR_io_uring_enter, ctx.fd, ctx.sq.to_submit, min_comp, min_comp > 0 ? IORING_ENTER_GETEVENTS : 0, (sigset_t *)0p, _NSIG / 8);
+                        if( ret < 0 ) {
+                                switch((int)errno) {
+                                case EAGAIN:
+                                case EINTR:
+                                case EBUSY:
+                                        // Update statistics
+                                        __STATS__( false, io.calls.errors.busy ++; )
+                                        return false;
+                                default:
+                                        abort( "KERNEL ERROR: IO_URING SYSCALL - (%d) %s\n", (int)errno, strerror(errno) );
+                                }
+                        }
+                }
+                __cfadbg_print_safe(io, "Kernel I/O : %u submitted to io_uring %d\n", ret, ctx.fd);
+                __STATS__( true, io.calls.submitted += ret; )
+                /* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num );
+                /* paranoid */ verify( ctx.sq.to_submit >= ret );
+                ctx.sq.to_submit -= ret;
+                /* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num );
+                // Release the consumed SQEs
+                __release_sqes( ctx );
+                /* paranoid */ verify( ! __preemption_enabled() );
+                ctx.proc->io.pending = false;
+                        __cfadbg_print_safe(io, "Kernel I/O : %u submitted to io_uring %d\n", ret, ctx.fd);
+                        __STATS__( true, io.calls.submitted += ret; )
+                        /* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num );
+                        /* paranoid */ verify( ctx.sq.to_submit >= ret );
+                        ctx.sq.to_submit -= ret;
+                        /* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num );
+                        // Release the consumed SQEs
+                        __release_sqes( ctx );
+                        /* paranoid */ verify( ! __preemption_enabled() );
+                        ctx.proc->io.pending = false;
+                }
                 ready_schedule_lock();
                 bool ret = __cfa_io_drain( proc );

libcfa/src/concurrency/kernel.hfa

-              rb5f17e14
+              r038a0bd
                 unsigned last;
                 signed   cpu;
-                // unsigned long long int cutoff;
         } rdq;
 …
 };
 struct __attribute__((aligned(128))) __cache_id_t {
+struct __attribute__((aligned(16))) __cache_id_t {
         volatile unsigned id;
 };

libcfa/src/concurrency/ready_queue.cfa

-              rb5f17e14
+              r038a0bd
                         lanes.help[idx].dst = 0;
                         lanes.help[idx].tri = 0;
+                }
-                caches = alloc( cpu_info.llc_count );
-                for( idx; (size_t)cpu_info.llc_count ) {
-                        (caches[idx]){};
+                }
         #else
 …
                 /* paranoid */ verify(cpu < cpu_info.hthrd_count);
                 unsigned this_cache = cpu_info.llc_map[cpu].cache;
+                __atomic_store_n(&lanes.caches[this / READYQ_SHARD_FACTOR].id, this_cache, __ATOMIC_RELAXED);
+                // Super important: don't write the same value over and over again
+                // We want to maximise our chances that his particular values stays in cache
+                if(lanes.caches[this / READYQ_SHARD_FACTOR].id != this_cache)
+                        __atomic_store_n(&lanes.caches[this / READYQ_SHARD_FACTOR].id, this_cache, __ATOMIC_RELAXED);
                 const unsigned long long ctsc = rdtscl();
 …
+        }
-        static inline int pop_getcpu(processor * proc, __ready_queue_caches_t * caches) {
-                const int prv = proc->rdq.cpu;
-                const int cpu = __kernel_getcpu();
-                if( prv != proc->rdq.cpu ) {
-                        unsigned pidx = cpu_info.llc_map[prv].cache;
-                        /* paranoid */ verify(pidx < cpu_info.llc_count);
-                        unsigned nidx = cpu_info.llc_map[cpu].cache;
-                        /* paranoid */ verify(pidx < cpu_info.llc_count);
-                        depart(caches[pidx]);
-                        arrive(caches[nidx]);
-                        __STATS( /* cpu migs++ */ )
+                }
-                return proc->rdq.cpu = cpu;
+        }
         // Pop from the ready queue from a given cluster
         __attribute__((hot)) thread$ * pop_fast(struct cluster * cltr) with (cltr->ready_queue) {
 …
                 processor * const proc = kernelTLS().this_processor;
+                const int cpu = pop_getcpu( proc, caches );
+                // const int cpu = __kernel_getcpu();
+                const int cpu = __kernel_getcpu();
                 /* paranoid */ verify(cpu >= 0);
                 /* paranoid */ verify(cpu < cpu_info.hthrd_count);
 …
                         unsigned long long max = 0;
                         for(i; READYQ_SHARD_FACTOR) {
                                 unsigned long long tsc = moving_average(ctsc - ts(lanes.data[start + i]), lanes.tscs[start + i].ma);
+                                unsigned long long tsc = moving_average(ctsc, ts(lanes.data[start + i]), lanes.tscs[start + i].ma);
                                 if(tsc > max) max = tsc;
+                        }
 …
                         unsigned long long max = 0;
                         for(i; READYQ_SHARD_FACTOR) {
                                 unsigned long long tsc = moving_average(ctsc - ts(lanes.data[start + i]), lanes.tscs[start + i].ma);
+                                unsigned long long tsc = moving_average(ctsc, ts(lanes.data[start + i]), lanes.tscs[start + i].ma);
                                 if(tsc > max) max = tsc;
+                        }
 …
                                 proc->rdq.target = MAX;
                                 lanes.help[target / READYQ_SHARD_FACTOR].tri++;
+                                if(moving_average(ctsc - lanes.tscs[target].tv, lanes.tscs[target].ma) > cutoff) {
+                                        __STATS( __tls_stats()->ready.pop.helped[target]++; )
+                                if(moving_average(ctsc, lanes.tscs[target].tv, lanes.tscs[target].ma) > cutoff) {
                                         thread$ * t = try_pop(cltr, target __STATS(, __tls_stats()->ready.pop.help));
                                         proc->rdq.last = target;
 …
                         unsigned last = proc->rdq.last;
+                        if(last != MAX && moving_average(ctsc - lanes.tscs[last].tv, lanes.tscs[last].ma) > cutoff) {
+                                __STATS( __tls_stats()->ready.pop.helped[last]++; )
+                        if(last != MAX && moving_average(ctsc, lanes.tscs[last].tv, lanes.tscs[last].ma) > cutoff) {
                                 thread$ * t = try_pop(cltr, last __STATS(, __tls_stats()->ready.pop.help));
                                 if(t) return t;

Note: See TracChangeset for help on using the changeset viewer.