Context Navigation

← Previous Changeset
Next Changeset →

Changeset 038a0bd

Timestamp:

Jan 18, 2022, 8:49:43 PM (3 years ago)

Author:

Peter A. Buhr <pabuhr@…>

Branches:

ADT, ast-experimental, enum, forall-pointer-decay, master, pthread-emulation, qualifiedEnum

Children:

6a33e40

Parents:

b5f17e1 (diff), adfd125 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.

Message:

Merge branch 'master' of plg.uwaterloo.ca:software/cfa/cfa-cc

Files:

: 9 edited

benchmark/readyQ/transfer.cfa (modified) (6 diffs)
libcfa/src/common.hfa (modified) (2 diffs)
libcfa/src/concurrency/io.cfa (modified) (1 diff)
libcfa/src/concurrency/kernel.hfa (modified) (2 diffs)
libcfa/src/concurrency/ready_queue.cfa (modified) (8 diffs)
src/AST/Decl.hpp (modified) (2 diffs)
src/AST/Eval.hpp (modified) (1 diff)
src/Validate/InitializerLength.cpp (modified) (1 diff)
src/Validate/InitializerLength.hpp (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

benchmark/readyQ/transfer.cfa

-                      rb5f17e1
+                      r038a0bd
 #include "rq_bench.hfa"
 #include <fstream.hfa>
+#include <locale.h>
 Duration default_preemption() {
 …
 #define PRINT(...)
 __lehmer64_state_t lead_seed;
+__uint128_t lead_seed;
 volatile unsigned leader;
 volatile size_t lead_idx;
 …
         waitgroup();
         unsigned nleader = __lehmer64( lead_seed ) % nthreads;
+        unsigned nleader = lehmer64( lead_seed ) % nthreads;
         __atomic_store_n( &leader, nleader, __ATOMIC_SEQ_CST );
 …
 // ==================================================
 int main(int argc, char * argv[]) {
         __lehmer64_state_t lead_seed = getpid();
         for(10) __lehmer64( lead_seed );
+        uint64_t lead_seed = getpid();
+        for(10) lehmer64( lead_seed );
         unsigned nprocs = 2;
 …
         lead_idx = 0;
         leader = __lehmer64( lead_seed ) % nthreads;
+        leader = lehmer64( lead_seed ) % nthreads;
         size_t rechecks = 0;
 …
+        }
+        setlocale( LC_NUMERIC, getenv( "LANG" ) );
         sout | "Duration (ms)           : " | ws(3, 3, unit(eng((end - start)`dms)));
         sout | "Number of processors    : " | nprocs;

libcfa/src/common.hfa

-                      rb5f17e1
+                      r038a0bd
 //
+//
 // Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
 //
 // The contents of this file are covered under the licence agreement in the
 // file "LICENCE" distributed with Cforall.
 //
 // common --
 //
+//
+// common.hfa --
+//
 // Author           : Peter A. Buhr
 // Created On       : Wed Jul 11 17:54:36 2018
 …
 // Last Modified On : Wed May  5 14:02:04 2021
 // Update Count     : 18
 //
+//
 #pragma once

libcfa/src/concurrency/io.cfa

-                      rb5f17e1
+                      r038a0bd
                 __ioarbiter_flush( ctx );
+                __STATS__( true, io.calls.flush++; )
+                int ret = syscall( __NR_io_uring_enter, ctx.fd, ctx.sq.to_submit, min_comp, min_comp > 0 ? IORING_ENTER_GETEVENTS : 0, (sigset_t *)0p, _NSIG / 8);
+                if( ret < 0 ) {
+                        switch((int)errno) {
+                        case EAGAIN:
+                        case EINTR:
+                        case EBUSY:
+                                // Update statistics
+                                __STATS__( false, io.calls.errors.busy ++; )
+                                return false;
+                        default:
+                                abort( "KERNEL ERROR: IO_URING SYSCALL - (%d) %s\n", (int)errno, strerror(errno) );
+                if(ctx.sq.to_submit != 0 || min_comp > 0) {
+                        __STATS__( true, io.calls.flush++; )
+                        int ret = syscall( __NR_io_uring_enter, ctx.fd, ctx.sq.to_submit, min_comp, min_comp > 0 ? IORING_ENTER_GETEVENTS : 0, (sigset_t *)0p, _NSIG / 8);
+                        if( ret < 0 ) {
+                                switch((int)errno) {
+                                case EAGAIN:
+                                case EINTR:
+                                case EBUSY:
+                                        // Update statistics
+                                        __STATS__( false, io.calls.errors.busy ++; )
+                                        return false;
+                                default:
+                                        abort( "KERNEL ERROR: IO_URING SYSCALL - (%d) %s\n", (int)errno, strerror(errno) );
+                                }
+                        }
+                }
+                __cfadbg_print_safe(io, "Kernel I/O : %u submitted to io_uring %d\n", ret, ctx.fd);
+                __STATS__( true, io.calls.submitted += ret; )
+                /* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num );
+                /* paranoid */ verify( ctx.sq.to_submit >= ret );
+                ctx.sq.to_submit -= ret;
+                /* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num );
+                // Release the consumed SQEs
+                __release_sqes( ctx );
+                /* paranoid */ verify( ! __preemption_enabled() );
+                ctx.proc->io.pending = false;
+                        __cfadbg_print_safe(io, "Kernel I/O : %u submitted to io_uring %d\n", ret, ctx.fd);
+                        __STATS__( true, io.calls.submitted += ret; )
+                        /* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num );
+                        /* paranoid */ verify( ctx.sq.to_submit >= ret );
+                        ctx.sq.to_submit -= ret;
+                        /* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num );
+                        // Release the consumed SQEs
+                        __release_sqes( ctx );
+                        /* paranoid */ verify( ! __preemption_enabled() );
+                        ctx.proc->io.pending = false;
+                }
                 ready_schedule_lock();
                 bool ret = __cfa_io_drain( proc );

libcfa/src/concurrency/kernel.hfa

-                      rb5f17e1
+                      r038a0bd
                 unsigned last;
                 signed   cpu;
-                // unsigned long long int cutoff;
         } rdq;
 …
 };
 struct __attribute__((aligned(128))) __cache_id_t {
+struct __attribute__((aligned(16))) __cache_id_t {
         volatile unsigned id;
 };

libcfa/src/concurrency/ready_queue.cfa

-                      rb5f17e1
+                      r038a0bd
                         lanes.help[idx].dst = 0;
                         lanes.help[idx].tri = 0;
+                }
-                caches = alloc( cpu_info.llc_count );
-                for( idx; (size_t)cpu_info.llc_count ) {
-                        (caches[idx]){};
+                }
         #else
 …
                 /* paranoid */ verify(cpu < cpu_info.hthrd_count);
                 unsigned this_cache = cpu_info.llc_map[cpu].cache;
+                __atomic_store_n(&lanes.caches[this / READYQ_SHARD_FACTOR].id, this_cache, __ATOMIC_RELAXED);
+                // Super important: don't write the same value over and over again
+                // We want to maximise our chances that his particular values stays in cache
+                if(lanes.caches[this / READYQ_SHARD_FACTOR].id != this_cache)
+                        __atomic_store_n(&lanes.caches[this / READYQ_SHARD_FACTOR].id, this_cache, __ATOMIC_RELAXED);
                 const unsigned long long ctsc = rdtscl();
 …
+        }
-        static inline int pop_getcpu(processor * proc, __ready_queue_caches_t * caches) {
-                const int prv = proc->rdq.cpu;
-                const int cpu = __kernel_getcpu();
-                if( prv != proc->rdq.cpu ) {
-                        unsigned pidx = cpu_info.llc_map[prv].cache;
-                        /* paranoid */ verify(pidx < cpu_info.llc_count);
-                        unsigned nidx = cpu_info.llc_map[cpu].cache;
-                        /* paranoid */ verify(pidx < cpu_info.llc_count);
-                        depart(caches[pidx]);
-                        arrive(caches[nidx]);
-                        __STATS( /* cpu migs++ */ )
+                }
-                return proc->rdq.cpu = cpu;
+        }
         // Pop from the ready queue from a given cluster
         __attribute__((hot)) thread$ * pop_fast(struct cluster * cltr) with (cltr->ready_queue) {
 …
                 processor * const proc = kernelTLS().this_processor;
+                const int cpu = pop_getcpu( proc, caches );
+                // const int cpu = __kernel_getcpu();
+                const int cpu = __kernel_getcpu();
                 /* paranoid */ verify(cpu >= 0);
                 /* paranoid */ verify(cpu < cpu_info.hthrd_count);
 …
                         unsigned long long max = 0;
                         for(i; READYQ_SHARD_FACTOR) {
                                 unsigned long long tsc = moving_average(ctsc - ts(lanes.data[start + i]), lanes.tscs[start + i].ma);
+                                unsigned long long tsc = moving_average(ctsc, ts(lanes.data[start + i]), lanes.tscs[start + i].ma);
                                 if(tsc > max) max = tsc;
+                        }
 …
                         unsigned long long max = 0;
                         for(i; READYQ_SHARD_FACTOR) {
                                 unsigned long long tsc = moving_average(ctsc - ts(lanes.data[start + i]), lanes.tscs[start + i].ma);
+                                unsigned long long tsc = moving_average(ctsc, ts(lanes.data[start + i]), lanes.tscs[start + i].ma);
                                 if(tsc > max) max = tsc;
+                        }
 …
                                 proc->rdq.target = MAX;
                                 lanes.help[target / READYQ_SHARD_FACTOR].tri++;
+                                if(moving_average(ctsc - lanes.tscs[target].tv, lanes.tscs[target].ma) > cutoff) {
+                                        __STATS( __tls_stats()->ready.pop.helped[target]++; )
+                                if(moving_average(ctsc, lanes.tscs[target].tv, lanes.tscs[target].ma) > cutoff) {
                                         thread$ * t = try_pop(cltr, target __STATS(, __tls_stats()->ready.pop.help));
                                         proc->rdq.last = target;
 …
                         unsigned last = proc->rdq.last;
+                        if(last != MAX && moving_average(ctsc - lanes.tscs[last].tv, lanes.tscs[last].ma) > cutoff) {
+                                __STATS( __tls_stats()->ready.pop.helped[last]++; )
+                        if(last != MAX && moving_average(ctsc, lanes.tscs[last].tv, lanes.tscs[last].ma) > cutoff) {
                                 thread$ * t = try_pop(cltr, last __STATS(, __tls_stats()->ready.pop.help));
                                 if(t) return t;

src/AST/Decl.hpp

-                      rb5f17e1
+                      r038a0bd
 // Must be included in *all* AST classes; should be #undef'd at the end of the file
 #define MUTATE_FRIEND \
     template<typename node_t> friend node_t * mutate(const node_t * node); \
+        template<typename node_t> friend node_t * mutate(const node_t * node); \
         template<typename node_t> friend node_t * shallowCopy(const node_t * node);
 …
         std::vector< ptr<Expr> > withExprs;
         FunctionDecl( const CodeLocation & loc, const std::string & name, std::vector<ptr<TypeDecl>>&& forall,
                 std::vector<ptr<DeclWithType>>&& params, std::vector<ptr<DeclWithType>>&& returns,

src/AST/Eval.hpp

-                      rb5f17e1
+                      r038a0bd
 template< typename... Args >
 UntypedExpr * call( const CodeLocation & loc, const std::string & name, Args &&... args ) {
         return new UntypedExpr {
                 loc, new NameExpr { loc, name },
+        return new UntypedExpr {
+                loc, new NameExpr { loc, name },
                 std::vector< ptr< Expr > > { std::forward< Args >( args )... } };
+}

src/Validate/InitializerLength.cpp

rb5f17e1	r038a0bd
14	14	//
15	15
16		//#include "InitializerLength.hpp"
	16	#include "InitializerLength.hpp"
17	17
18	18	#include "AST/Expr.hpp"

src/Validate/InitializerLength.hpp

-                      rb5f17e1
+                      r038a0bd
 //
+#pragma once
+namespace ast {
+        class TranslationUnit;
+}
 namespace Validate {

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 038a0bd

Legend:

Download in other formats: