Changeset 7a80113


Ignore:
Timestamp:
Sep 22, 2020, 11:29:12 AM (13 months ago)
Author:
Thierry Delisle <tdelisle@…>
Branches:
arm-eh, jacob/cs343-translation, master, new-ast-unique-expr
Children:
0a945fd
Parents:
1c507eb (diff), 08f3ad3 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'master' of plg.uwaterloo.ca:software/cfa/cfa-cc

Files:
18 added
57 edited
1 moved

Legend:

Unmodified
Added
Removed
  • Jenkinsfile

    r1c507eb r7a80113  
    102102
    103103                echo GitLogMessage()
    104 
    105                 // This is a complete hack but it solves problems with automake thinking it needs to regenerate makefiles
    106                 // We fudged automake/missing to handle that but automake stills bakes prints inside the makefiles
    107                 // and these cause more problems.
    108                 sh 'find . -name Makefile.in -exec touch {} +'
    109104        }
    110105}
     
    465460                                        description: 'Which compiler to use',                                   \
    466461                                        name: 'Compiler',                                                                       \
    467                                         choices: 'gcc-9\ngcc-8\ngcc-7\ngcc-6\ngcc-5\ngcc-4.9\nclang',                                   \
     462                                        choices: 'gcc-9\ngcc-8\ngcc-7\ngcc-6\ngcc-5\ngcc-4.9\nclang',   \
    468463                                        defaultValue: 'gcc-8',                                                          \
    469464                                ],                                                                                              \
  • benchmark/io/http/filecache.cfa

    r1c507eb r7a80113  
    7373        cache_line * entries;
    7474        size_t size;
     75        int * rawfds;
     76        int nfds;
    7577} file_cache;
    7678
     
    98100}
    99101
    100 int put_file( cache_line & entry ) {
     102int put_file( cache_line & entry, int fd ) {
    101103        uint32_t idx = murmur3_32( (const uint8_t *)entry.file, strlen(entry.file), options.file_cache.hash_seed ) % file_cache.size;
    102104
     
    108110
    109111        file_cache.entries[idx] = entry;
     112        file_cache.entries[idx].fd = fd;
    110113        return i > 0 ? 1 : 0;
    111114}
     
    121124        size_t fcount = 0;
    122125        size_t fsize = 16;
    123         cache_line * raw = 0p;
    124         raw = alloc(raw, fsize, true);
     126        cache_line * raw = alloc(fsize);
    125127        // Step 1 get a dense array of all files
    126128        int walk(const char *fpath, const struct stat *sb, int typeflag) {
     
    131133                if(fcount > fsize) {
    132134                        fsize *= 2;
    133                         raw = alloc(raw, fsize, true);
     135                        raw = alloc(fsize, raw`realloc);
    134136                }
    135137
     
    162164        file_cache.entries = anew(file_cache.size);
    163165
     166        if(options.file_cache.fixed_fds) {
     167                file_cache.nfds   = fcount;
     168                file_cache.rawfds = alloc(fcount);
     169        }
     170
    164171        // Step 3 fill the cache
    165172        int conflicts = 0;
    166173        for(i; fcount) {
    167                 conflicts += put_file( raw[i] );
     174                int fd;
     175                if(options.file_cache.fixed_fds) {
     176                        file_cache.rawfds[i] = raw[i].fd;
     177                        fd = i;
     178                }
     179                else {
     180                        fd = raw[i].fd;
     181                }
     182                conflicts += put_file( raw[i], fd );
    168183        }
    169184        printf("Filled cache from path \"%s\" with %zu files\n", path, fcount);
     
    197212        }
    198213
    199         return [aalloc(extra), 0];
     214        size_t s = file_cache.nfds + extra;
     215        int * data = alloc(s, file_cache.rawfds`realloc);
     216        return [data, file_cache.nfds];
    200217}
    201218
  • benchmark/io/http/main.cfa

    r1c507eb r7a80113  
    1212#include <kernel.hfa>
    1313#include <stats.hfa>
     14#include <time.hfa>
    1415#include <thread.hfa>
    1516
    16 #include "channel.hfa"
    1717#include "filecache.hfa"
    1818#include "options.hfa"
    1919#include "worker.hfa"
    2020
     21extern void register_fixed_files( cluster &, int *, unsigned count );
     22
     23Duration default_preemption() {
     24        return 0;
     25}
     26
    2127//=============================================================================================
    2228// Globals
    2329//=============================================================================================
    24 channel & wait_connect;
    25 
    2630struct ServerProc {
    2731        processor self;
     
    8488        // Run Server Cluster
    8589        {
    86                 cluster cl = { "Server Cluster", options.clopts.flags };
     90                cluster cl = { "Server Cluster", options.clopts.params };
    8791                #if !defined(__CFA_NO_STATISTICS__)
    8892                        print_stats_at_exit( cl, CFA_STATS_READY_Q | CFA_STATS_IO );
    8993                #endif
    9094                options.clopts.instance = &cl;
    91 
    92                 channel chan = { options.clopts.chan_size };
    93                 &wait_connect = &chan;
    9495
    9596                int pipe_cnt = options.clopts.nworkers * 2;
     
    102103                }
    103104
     105                if(options.file_cache.fixed_fds) {
     106                        register_fixed_files(cl, fds, pipe_off);
     107                }
     108
    104109                {
    105110                        ServerProc procs[options.clopts.nprocs];
     
    107112                                Worker workers[options.clopts.nworkers];
    108113                                for(i; options.clopts.nworkers) {
    109                                         if( options.file_cache.fixed_fds ) {
    110                                                 workers[i].pipe[0] = pipe_off + (i * 2) + 0;
    111                                                 workers[i].pipe[1] = pipe_off + (i * 2) + 1;
    112                                         }
    113                                         else {
     114                                        // if( options.file_cache.fixed_fds ) {
     115                                        //      workers[i].pipe[0] = pipe_off + (i * 2) + 0;
     116                                        //      workers[i].pipe[1] = pipe_off + (i * 2) + 1;
     117                                        // }
     118                                        // else
     119                                        {
    114120                                                workers[i].pipe[0] = fds[pipe_off + (i * 2) + 0];
    115121                                                workers[i].pipe[1] = fds[pipe_off + (i * 2) + 1];
     122                                                workers[i].sockfd  = server_fd;
     123                                                workers[i].addr    = (struct sockaddr *)&address;
     124                                                workers[i].addrlen = (socklen_t*)&addrlen;
     125                                                workers[i].flags   = 0;
    116126                                        }
    117127                                        unpark( workers[i] __cfaabi_dbg_ctx2 );
     
    119129                                printf("%d workers started on %d processors\n", options.clopts.nworkers, options.clopts.nprocs);
    120130                                {
    121                                         Acceptor acceptor = { server_fd, (struct sockaddr *)&address, (socklen_t*)&addrlen, 0 };
    122 
    123131                                        char buffer[128];
    124132                                        while(!feof(stdin)) {
     
    127135
    128136                                        printf("Shutting Down\n");
    129                                 }
    130                                 printf("Acceptor Closed\n");
    131 
    132                                 // Clean-up the workers
    133                                 for(options.clopts.nworkers) {
    134                                         put( wait_connect, -1 );
    135137                                }
    136138                        }
  • benchmark/io/http/options.cfa

    r1c507eb r7a80113  
    3131                1,     // nworkers;
    3232                0,     // flags;
    33                 10,    // chan_size;
    3433                false, // procstats
    3534                false, // viewhalts
     
    3938
    4039const char * parse_options( int argc, char * argv[] ) {
    41         bool uthrdpo = false;
    4240        bool subthrd = false;
    4341        bool eagrsub = false;
     
    5250                {'t', "threads",        "Number of worker threads to use", options.clopts.nworkers},
    5351                {'b', "accept-backlog", "Maximum number of pending accepts", options.socket.backlog},
    54                 {'B', "channel-size",   "Maximum number of accepted connection pending", options.clopts.chan_size},
    5552                {'r', "request_len",    "Maximum number of bytes in the http request, requests with more data will be answered with Http Code 414", options.socket.buflen},
    5653                {'S', "seed",           "seed to use for hashing", options.file_cache.hash_seed },
    5754                {'C', "cache-size",     "Size of the cache to use, if set to small, will uses closes power of 2", options.file_cache.size },
    5855                {'l', "list-files",     "List the files in the specified path and exit", options.file_cache.list, parse_settrue },
    59                 {'u', "userthread",     "If set, cluster uses user-thread to poll I/O", uthrdpo, parse_settrue },
    6056                {'s', "submitthread",   "If set, cluster uses polling thread to submit I/O", subthrd, parse_settrue },
    6157                {'e', "eagersubmit",    "If set, cluster submits I/O eagerly but still aggregates submits", eagrsub, parse_settrue},
     
    7167        parse_args( argc, argv, opt, opt_cnt, "[OPTIONS]... [PATH]\ncforall http server", left );
    7268
    73         if( uthrdpo ) {
    74                 options.clopts.flags |= CFA_CLUSTER_IO_POLLER_USER_THREAD;
    75         }
    76 
    77         if( subthrd ) {
    78                 options.clopts.flags |= CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS;
    79         }
    80 
    81         if( eagrsub ) {
    82                 options.clopts.flags |= CFA_CLUSTER_IO_EAGER_SUBMITS;
    83         }
     69        options.clopts.params.poller_submits = subthrd;
     70        options.clopts.params.eager_submits  = eagrsub;
    8471
    8572        if( fixedfd ) {
     
    8875
    8976        if( sqkpoll ) {
    90                 options.clopts.flags |= CFA_CLUSTER_IO_KERNEL_POLL_SUBMITS;
     77                options.clopts.params.poll_submit = true;
    9178                options.file_cache.fixed_fds = true;
    9279        }
    9380
    9481        if( iokpoll ) {
    95                 options.clopts.flags |= CFA_CLUSTER_IO_KERNEL_POLL_COMPLETES;
     82                options.clopts.params.poll_complete = true;
    9683                options.file_cache.open_flags |= O_DIRECT;
    9784        }
    9885
    99         options.clopts.flags |= (sublen << CFA_CLUSTER_IO_BUFFLEN_OFFSET);
     86        options.clopts.params.num_ready = sublen;
    10087
    10188        if( left[0] == 0p ) { return "."; }
  • benchmark/io/http/options.hfa

    r1c507eb r7a80113  
    22
    33#include <stdint.h>
     4
     5#include <kernel.hfa>
    46
    57struct cluster;
     
    2325                int nprocs;
    2426                int nworkers;
    25                 int flags;
    26                 int chan_size;
     27                io_context_params params;
    2728                bool procstats;
    2829                bool viewhalts;
  • benchmark/io/http/protocol.cfa

    r1c507eb r7a80113  
    1111extern "C" {
    1212      int snprintf ( char * s, size_t n, const char * format, ... );
     13        #include <linux/io_uring.h>
    1314}
    1415#include <string.h>
    15 
    1616#include <errno.h>
    1717
     18#include "options.hfa"
    1819
    1920const char * http_msgs[] = {
     
    7475        READ:
    7576        for() {
    76                 int ret = cfa_read(fd, it, count);
    77                 if(ret == 0 ) return [OK200, true, 0p, 0];
     77                int ret = cfa_read(fd, (void*)it, count, 0, -1`s, 0p, 0p);
     78                if(ret == 0 ) return [OK200, true, 0, 0];
    7879                if(ret < 0 ) {
    7980                        if( errno == EAGAIN || errno == EWOULDBLOCK) continue READ;
     
    8889                count -= ret;
    8990
    90                 if( count < 1 ) return [E414, false, 0p, 0];
     91                if( count < 1 ) return [E414, false, 0, 0];
    9192        }
    9293
     
    9596        it = buffer;
    9697        int ret = memcmp(it, "GET /", 5);
    97         if( ret != 0 ) return [E400, false, 0p, 0];
     98        if( ret != 0 ) return [E400, false, 0, 0];
    9899        it += 5;
    99100
     
    106107        ssize_t ret;
    107108        SPLICE1: while(count > 0) {
    108                 ret = cfa_splice(ans_fd, &offset, pipe[1], 0p, count, SPLICE_F_MOVE | SPLICE_F_MORE);
     109                ret = cfa_splice(ans_fd, &offset, pipe[1], 0p, count, SPLICE_F_MOVE | SPLICE_F_MORE, 0, -1`s, 0p, 0p);
    109110                if( ret < 0 ) {
    110111                        if( errno != EAGAIN && errno != EWOULDBLOCK) continue SPLICE1;
     
    116117                size_t in_pipe = ret;
    117118                SPLICE2: while(in_pipe > 0) {
    118                         ret = cfa_splice(pipe[0], 0p, fd, 0p, in_pipe, SPLICE_F_MOVE | SPLICE_F_MORE);
     119                        ret = cfa_splice(pipe[0], 0p, fd, 0p, in_pipe, SPLICE_F_MOVE | SPLICE_F_MORE, 0, -1`s, 0p, 0p);
    119120                        if( ret < 0 ) {
    120121                                if( errno != EAGAIN && errno != EWOULDBLOCK) continue SPLICE2;
  • benchmark/io/http/worker.cfa

    r1c507eb r7a80113  
    2828        CONNECTION:
    2929        for() {
    30                 int fd = take(wait_connect);
    31                 if (fd < 0) break;
     30                int fd = cfa_accept4( this.[sockfd, addr, addrlen, flags], 0, -1`s, 0p, 0p );
     31                if(fd < 0) {
     32                        if( errno == ECONNABORTED ) break;
     33                        abort( "accept error: (%d) %s\n", (int)errno, strerror(errno) );
     34                }
    3235
    3336                printf("New connection %d, waiting for requests\n", fd);
     
    8285        }
    8386}
    84 
    85 //=============================================================================================
    86 // Acceptor Thread
    87 //=============================================================================================
    88 void ?{}( Acceptor & this, int sockfd, struct sockaddr * addr, socklen_t * addrlen, int flags ) {
    89         ((thread&)this){ "Acceptor Thread", *options.clopts.instance };
    90         this.sockfd  = sockfd;
    91         this.addr    = addr;
    92         this.addrlen = addrlen;
    93         this.flags   = flags;
    94 }
    95 
    96 void main( Acceptor & this ) {
    97         for() {
    98                 int ret = cfa_accept4( this.[sockfd, addr, addrlen, flags] );
    99                 if(ret < 0) {
    100                         if( errno == ECONNABORTED ) break;
    101                         abort( "accept error: (%d) %s\n", (int)errno, strerror(errno) );
    102                 }
    103 
    104                 printf("New connection accepted\n");
    105                 put( wait_connect, ret );
    106         }
    107 }
  • benchmark/io/http/worker.hfa

    r1c507eb r7a80113  
    77}
    88
    9 #include "channel.hfa"
    10 
    11 extern channel & wait_connect;
    12 
    139//=============================================================================================
    1410// Worker Thread
     
    1713thread Worker {
    1814        int pipe[2];
    19 };
    20 void ?{}( Worker & this );
    21 void main( Worker & );
    22 
    23 //=============================================================================================
    24 // Acceptor Thread
    25 //=============================================================================================
    26 thread Acceptor {
    2715        int sockfd;
    2816        struct sockaddr * addr;
     
    3018        int flags;
    3119};
    32 
    33 void ?{}( Acceptor & this, int sockfd, struct sockaddr * addr, socklen_t * addrlen, int flags );
    34 void main( Acceptor & this );
     20void ?{}( Worker & this);
     21void main( Worker & );
  • doc/theses/thierry_delisle_PhD/comp_II/comp_II.tex

    r1c507eb r7a80113  
    6060\section{Introduction}
    6161\subsection{\CFA and the \CFA concurrency package}
    62 \CFA\cite{Moss18} is a modern, polymorphic, non-object-oriented, concurrent, backwards-compatible extension of the C programming language.
     62\CFA~\cite{Moss18} is a modern, polymorphic, non-object-oriented, concurrent, backwards-compatible extension of the C programming language.
    6363It aims to add high-productivity features while maintaining the predictable performance of C.
    64 As such, concurrency in \CFA\cite{Delisle19} aims to offer simple and safe high-level tools while still allowing performant code.
    65 \CFA concurrent code is written in the synchronous programming paradigm but uses \glspl{uthrd} in order to achieve the simplicity and maintainability of synchronous programming without sacrificing the efficiency of asynchronous programming.
     64As such, concurrency in \CFA~\cite{Delisle19} aims to offer simple and safe high-level tools while still allowing performant code.
     65\CFA concurrent code is written in the synchronous programming paradigm but uses \glspl{uthrd} to achieve the simplicity and maintainability of synchronous programming without sacrificing the efficiency of asynchronous programming.
    6666As such, the \CFA \newterm{scheduler} is a preemptive user-level scheduler that maps \glspl{uthrd} onto \glspl{kthrd}.
    6767
     68\subsection{Scheduling}
    6869\newterm{Scheduling} occurs when execution switches from one thread to another, where the second thread is implicitly chosen by the scheduler.
    69 This scheduling is an indirect handoff, as opposed to generators and coroutines which explicitly switch to the next generator and coroutine respectively.
     70This scheduling is an indirect handoff, as opposed to generators and coroutines that explicitly switch to the next generator and coroutine respectively.
    7071The cost of switching between two threads for an indirect handoff has two components:
    7172\begin{enumerate}
     
    7576and the cost of scheduling, \ie deciding which thread to run next among all the threads ready to run.
    7677\end{enumerate}
    77 The first cost is generally constant and fixed\footnote{Affecting the constant context-switch cost is whether it is done in one step, after the scheduling, or in two steps, context-switching to a third fixed thread before scheduling.}, while the scheduling cost can vary based on the system state.
    78 Adding multiple \glspl{kthrd} does not fundamentally change the scheduler semantics or requirements, it simply adds new correctness requirements, \ie \newterm{linearizability}\footnote{Meaning, however fast the CPU threads run, there is an equivalent sequential order that gives the same result.}, and a new dimension to performance: scalability, where scheduling cost now also depends on contention.
     78The first cost is generally constant\footnote{Affecting the constant context-switch cost is whether it is done in one step, where the first thread schedules the second, or in two steps, where the first thread context switches to a third scheduler thread.}, while the scheduling cost can vary based on the system state.
     79Adding multiple \glspl{kthrd} does not fundamentally change the scheduler semantics or requirements, it simply adds new correctness requirements, \ie \newterm{linearizability}\footnote{Meaning however fast the CPU threads run, there is an equivalent sequential order that gives the same result.}, and a new dimension to performance: scalability, where scheduling cost also depends on contention.
    7980The more threads switch, the more the administration cost of scheduling becomes noticeable.
    8081It is therefore important to build a scheduler with the lowest possible cost and latency.
    8182Another important consideration is \newterm{fairness}.
    8283In principle, scheduling should give the illusion of perfect fairness, where all threads ready to run are running \emph{simultaneously}.
     84In practice, there can be advantages to unfair scheduling, similar to the express cash register at a grocery store.
    8385While the illusion of simultaneity is easier to reason about, it can break down if the scheduler allows too much unfairness.
    8486Therefore, the scheduler should offer as much fairness as needed to guarantee eventual progress, but use unfairness to help performance.
    85 In practice, threads must wait in turn but there can be advantages to unfair scheduling, similar to the express cash register at a grocery store.
    86 
    87 The goal of this research is to produce a scheduler that is simple for programmers to understand and offers good performance.
     87
     88\subsection{Research Goal}
     89The goal of this research is to produce a scheduler that is simple for programmers to understand and offers good general performance.
    8890Here understandability does not refer to the API but to how much scheduling concerns programmers need to take into account when writing a \CFA concurrent package.
    89 Therefore, the main goal of this proposal is :
     91Therefore, the main consequence of this goal is :
    9092\begin{quote}
    9193The \CFA scheduler should be \emph{viable} for \emph{any} workload.
    9294\end{quote}
    9395
    94 For a general-purpose scheduler, it is impossible to produce an optimal algorithm as it would require knowledge of the future behaviour of threads.
    95 As such, scheduling performance is generally either defined by the best-case scenario, \ie a workload to which the scheduler is tailored, or the worst-case scenario, \ie the scheduler behaves no worse than \emph{X}.
     96For a general-purpose scheduler, it is impossible to produce an optimal algorithm as that requires knowledge of the future behaviour of threads.
     97As such, scheduling performance is generally either defined by a best-case scenario, \ie a workload to which the scheduler is tailored, or a worst-case scenario, \ie the scheduler behaves no worse than \emph{X}.
    9698For this proposal, the performance is evaluated using the second approach to allow \CFA programmers to rely on scheduling performance.
    9799Because there is no optimal scheduler, ultimately \CFA may allow programmers to write their own scheduler; but that is not the subject of this proposal, which considers only the default scheduler.
     
    103105        \item creating an abstraction layer over the operating system to handle kernel-threads spinning unnecessarily,
    104106        \item scheduling blocking I/O operations,
    105         \item and writing sufficient library tools to allow developers to indirectly use the scheduler, either through tuning knobs or replacing the default scheduler.
     107        \item and writing sufficient library tools to allow developers to indirectly use the scheduler, either through tuning knobs in the default scheduler or replacing the default scheduler.
    106108\end{enumerate}
    107109
     
    119121\paragraph{Performance} The performance of a scheduler can generally be measured in terms of scheduling cost, scalability and latency.
    120122\newterm{Scheduling cost} is the cost to switch from one thread to another, as mentioned above.
    121 For simple applications, where a single kernel thread does most of the scheduling, it is generally the dominating cost.
    122 \newterm{Scalability} is the cost of adding multiple kernel threads because it increases the time for context switching because of contention by multiple threads accessing shared resources, \eg the ready queue.
     123For compute-bound concurrent applications with little context switching, the scheduling cost is negligible.
     124For applications with high context-switch rates, scheduling cost can begin to dominating the cost.
     125\newterm{Scalability} is the cost of adding multiple kernel threads.
     126It can increase the time for scheduling because of contention from the multiple threads accessing shared resources, \eg a single ready queue.
    123127Finally, \newterm{tail latency} is service delay and relates to thread fairness.
    124 Specifically, latency measures how long a thread waits to run once scheduled and is evaluated in the worst case.
     128Specifically, latency measures how long a thread waits to run once scheduled and is evaluated by the worst case.
    125129The \CFA scheduler should offer good performance for all three metrics.
    126130
     
    128132\newterm{Eventual progress} guarantees every scheduled thread is eventually run, \ie prevent starvation.
    129133As a hard requirement, the \CFA scheduler must guarantee eventual progress, otherwise the above-mentioned illusion of simultaneous execution is broken and the scheduler becomes much more complex to reason about.
    130 \newterm{Predictability} and \newterm{reliability} mean similar workloads achieve similar performance and programmer execution intuition is respected.
     134\newterm{Predictability} and \newterm{reliability} mean similar workloads achieve similar performance so programmer execution intuition is respected.
    131135For example, a thread that yields aggressively should not run more often than other threads.
    132136While this is intuitive, it does not hold true for many work-stealing or feedback based schedulers.
    133 The \CFA scheduler must guarantee eventual progress and should be predictable and offer reliable performance.
     137The \CFA scheduler must guarantee eventual progress, should be predictable, and offer reliable performance.
    134138
    135139\paragraph{Efficiency} Finally, efficient usage of CPU resources is also an important requirement and is discussed in depth towards the end of the proposal.
    136 \newterm{Efficiency} means avoiding using CPU cycles when there are no threads to run, and conversely, use all CPUs available when the workload can benefit from it.
     140\newterm{Efficiency} means avoiding using CPU cycles when there are no threads to run (conserve energy/heat), and conversely, using as many available CPU cycles when the workload can benefit from it.
    137141Balancing these two states is where the complexity lies.
    138142The \CFA scheduler should be efficient with respect to the underlying (shared) computer.
     
    146150\begin{enumerate}
    147151        \item Threads live long enough for useful feedback information to be gathered.
    148         \item Threads belong to multiple users so fairness across threads is insufficient.
     152        \item Threads belong to multiple users so fairness across users is largely invisible.
    149153\end{enumerate}
    150154
     
    158162Security concerns mean more precise and robust fairness metrics must be used to guarantee fairness across processes created by users as well as threads created within a process.
    159163In the case of the \CFA scheduler, every thread runs in the same user space and is controlled by the same user.
    160 Fairness across users is therefore a given and it is then possible to safely ignore the possibility that threads are malevolent.
    161 This approach allows for a much simpler fairness metric and in this proposal \emph{fairness} is defined as: when multiple threads are cycling through the system, the total ordering of threads being scheduled, \ie pushed onto the ready queue, should not differ much from the total ordering of threads being executed, \ie popped from the ready queue.
     164Fairness across threads is therefore a given and it is then possible to safely ignore the possibility that threads are malevolent.
     165This approach allows for a much simpler fairness metric, and in this proposal, \emph{fairness} is defined as:
     166\begin{quote}
     167When multiple threads are cycling through the system, the total ordering of threads being scheduled, \ie pushed onto the ready queue, should not differ much from the total ordering of threads being executed, \ie popped from the ready queue.
     168\end{quote}
    162169
    163170Since feedback is not necessarily feasible within the lifetime of all threads and a simple fairness metric can be used, the scheduling strategy proposed for the \CFA runtime does not use per-threads feedback.
     
    169176Threads with equal priority are scheduled using a secondary strategy, often something simple like round robin or FIFO.
    170177A consequence of priority is that, as long as there is a thread with a higher priority that desires to run, a thread with a lower priority does not run.
    171 This possible starving of threads can dramatically increase programming complexity since starving threads and priority inversion (prioritizing a lower priority thread) can both lead to serious problems.
     178The potential for thread starvation dramatically increases programming complexity since starving threads and priority inversion (prioritizing a lower priority thread) can both lead to serious problems.
    172179
    173180An important observation is that threads do not need to have explicit priorities for problems to occur.
    174 Indeed, any system with multiple ready queues that attempts to exhaust one queue before accessing the other queues, essentially provide implicit priority, which can encounter starvation problems.
     181Indeed, any system with multiple ready queues that attempts to exhaust one queue before accessing the other queues, essentially provides implicit priority, which can encounter starvation problems.
    175182For example, a popular scheduling strategy that suffers from implicit priorities is work stealing.
    176183\newterm{Work stealing} is generally presented as follows:
     
    180187        \item If a processor's ready queue is empty, attempt to run threads from some other processor's ready queue.
    181188\end{enumerate}
    182 
    183189In a loaded system\footnote{A \newterm{loaded system} is a system where threads are being run at the same rate they are scheduled.}, if a thread does not yield, block, or preempt for an extended period of time, threads on the same processor's list starve if no other processors exhaust their list.
    184190
    185 Since priorities can be complex for programmers to incorporate into their execution intuition, the scheduling strategy proposed for the \CFA runtime does not use a strategy with either implicit or explicit thread priorities.
     191Since priorities can be complex for programmers to incorporate into their execution intuition, the \CFA scheduling strategy does not provided explicit priorities and attempts to eliminate implicit priorities.
    186192
    187193\subsection{Schedulers without feedback or priorities}
    188194This proposal conjectures that it is possible to construct a default scheduler for the \CFA runtime that offers good scalability and a simple fairness guarantee that is easy for programmers to reason about.
    189 The simplest fairness guarantee is FIFO ordering, \ie threads scheduled first run first.
     195The simplest fairness guarantee is FIFO ordering, \ie threads scheduled first come first.
    190196However, enforcing FIFO ordering generally conflicts with scalability across multiple processors because of the additional synchronization.
    191197Thankfully, strict FIFO is not needed for sufficient fairness.
    192198Since concurrency is inherently non-deterministic, fairness concerns in scheduling are only a problem if a thread repeatedly runs before another thread can run.
    193 Some relaxation is possible because non-determinism means programmers already handle ordering problems to produce correct code and hence rely on weak guarantees, \eg that a specific thread will \emph{eventually} run.
     199Some relaxation is possible because non-determinism means programmers already handle ordering problems to produce correct code and hence rely on weak guarantees, \eg that a thread \emph{eventually} runs.
    194200Since some reordering does not break correctness, the FIFO fairness guarantee can be significantly relaxed without causing problems.
    195201For this proposal, the target guarantee is that the \CFA scheduler provides \emph{probable} FIFO ordering, which allows reordering but makes it improbable that threads are reordered far from their position in total ordering.
    196202
    197203The \CFA scheduler fairness is defined as follows:
    198 \begin{itemize}
    199         \item Given two threads $X$ and $Y$, the odds that thread $X$ runs $N$ times \emph{after} thread $Y$ is scheduled but \emph{before} it is run, decreases exponentially with regard to $N$.
    200 \end{itemize}
     204\begin{quote}
     205Given two threads $X$ and $Y$, the odds that thread $X$ runs $N$ times \emph{after} thread $Y$ is scheduled but \emph{before} it is run, decreases exponentially with regard to $N$.
     206\end{quote}
    201207While this is not a bounded guarantee, the probability that unfairness persist for long periods of times decreases exponentially, making persisting unfairness virtually impossible.
    202208
     
    210216The described queue uses an array of underlying strictly FIFO queues as shown in Figure~\ref{fig:base}\footnote{For this section, the number of underlying queues is assumed to be constant.
    211217Section~\ref{sec:resize} discusses resizing the array.}.
    212 Pushing new data is done by selecting one of these underlying queues at random, recording a timestamp for the operation and pushing to the selected queue.
     218Pushing new data is done by selecting one of the underlying queues at random, recording a timestamp for the operation, and pushing to the selected queue.
    213219Popping is done by selecting two queues at random and popping from the queue with the oldest timestamp.
    214 A higher number of underlying queues lead to less contention on each queue and therefore better performance.
     220A higher number of underlying queues leads to less contention on each queue and therefore better performance.
    215221In a loaded system, it is highly likely the queues are non-empty, \ie several threads are on each of the underlying queues.
    216 This means that selecting a queue at random to pop from is highly likely to yield a queue with available items.
     222For this case, selecting a queue at random to pop from is highly likely to yield a queue with available items.
    217223In Figure~\ref{fig:base}, ignoring the ellipsis, the chances of getting an empty queue is 2/7 per pick, meaning two random picks yield an item approximately 9 times out of 10.
    218224
     
    221227                \input{base.pstex_t}
    222228        \end{center}
    223         \caption{Relaxed FIFO list at the base of the scheduler: an array of strictly FIFO lists.
    224         The timestamp is in all nodes and cell arrays.}
     229        \caption{Loaded relaxed FIFO list base on an array of strictly FIFO lists.
     230        A timestamp appears in each node and array cell.}
    225231        \label{fig:base}
    226232\end{figure}
     
    230236                \input{empty.pstex_t}
    231237        \end{center}
    232         \caption{``More empty'' state of the queue: the array contains many empty cells.}
     238        \caption{Unloaded relaxed FIFO list where the array contains many empty cells.}
    233239        \label{fig:empty}
    234240\end{figure}
    235241
    236 When the ready queue is \emph{more empty}, \ie several of the queues are empty, selecting a random queue for popping is less likely to yield a successful selection and more attempts are needed, resulting in a performance degradation.
     242In an unloaded system, several of the queues are empty, so selecting a random queue for popping is less likely to yield a successful selection and more attempts are needed, resulting in a performance degradation.
    237243Figure~\ref{fig:empty} shows an example with fewer elements, where the chances of getting an empty queue is 5/7 per pick, meaning two random picks yield an item only half the time.
    238244Since the ready queue is not empty, the pop operation \emph{must} find an element before returning and therefore must retry.
     
    262268\end{table}
    263269
    264 Performance can be improved in case~D (Table~\ref{tab:perfcases}) by adding information to help processors find which inner queues are used.
     270Performance can be improved in Table~\ref{tab:perfcases} case~D by adding information to help processors find which inner queues are used.
    265271This addition aims to avoid the cost of retrying the pop operation but does not affect contention on the underlying queues and can incur some management cost for both push and pop operations.
    266272The approach used to encode this information can vary in density and be either global or local.
     
    273279With a multi-word bitmask, this maximum limit can be increased arbitrarily, but it is not possible to check if the queue is empty by reading the bitmask atomically.
    274280
    275 Finally, a dense bitmap, either single or multi-word, causes additional problems in case C (Table 1), because many processors are continuously scanning the bitmask to find the few available threads.
     281Finally, a dense bitmap, either single or multi-word, causes additional problems in Table~\ref{tab:perfcases} case C, because many processors are continuously scanning the bitmask to find the few available threads.
    276282This increased contention on the bitmask(s) reduces performance because of cache misses after updates and the bitmask is updated more frequently by the scanning processors racing to read and/or update that information.
    277283This increased update frequency means the information in the bitmask is more often stale before a processor can use it to find an item, \ie mask read says there are available user threads but none on queue.
     
    279285\begin{figure}
    280286        \begin{center}
    281                 {\resizebox{0.8\textwidth}{!}{\input{emptybit}}}
    282         \end{center}
    283         \caption{``More empty'' queue with added bitmask to indicate which array cells have items.}
     287                {\resizebox{0.73\textwidth}{!}{\input{emptybit}}}
     288        \end{center}
     289        \vspace*{-5pt}
     290        \caption{Unloaded queue with added bitmask to indicate which array cells have items.}
    284291        \label{fig:emptybit}
     292        \begin{center}
     293                {\resizebox{0.73\textwidth}{!}{\input{emptytree}}}
     294        \end{center}
     295        \vspace*{-5pt}
     296        \caption{Unloaded queue with added binary search tree indicate which array cells have items.}
     297        \label{fig:emptytree}
     298        \begin{center}
     299                {\resizebox{0.9\textwidth}{!}{\input{emptytls}}}
     300        \end{center}
     301        \vspace*{-5pt}
     302        \caption{Unloaded queue with added per processor bitmask to indicate which array cells have items.}
     303        \label{fig:emptytls}
    285304\end{figure}
    286305
    287 Figure~\ref{fig:emptytree} shows another approach using a hierarchical tree data-structure to reduce contention and has been shown to work in similar cases~\cite{ellen2007snzi}\footnote{This particular paper seems to be patented in the US.
     306Figure~\ref{fig:emptytree} shows an approach using a hierarchical tree data-structure to reduce contention and has been shown to work in similar cases~\cite{ellen2007snzi}\footnote{This particular paper seems to be patented in the US.
    288307How does that affect \CFA? Can I use it in my work?}.
    289 However, this approach may lead to poorer performance in case~B (Table~\ref{tab:perfcases}) due to the inherent pointer chasing cost and already low contention cost in that case.
    290 
    291 \begin{figure}
    292         \begin{center}
    293                 {\resizebox{0.8\textwidth}{!}{\input{emptytree}}}
    294         \end{center}
    295         \caption{``More empty'' queue with added binary search tree indicate which array cells have items.}
    296         \label{fig:emptytree}
    297 \end{figure}
    298 
    299 Finally, a third approach is to use dense information, similar to the bitmap, but have each thread keep its own independent copy of it.
     308However, this approach may lead to poorer performance in Table~\ref{tab:perfcases} case~B due to the inherent pointer chasing cost and already low contention cost in that case.
     309
     310Figure~\ref{fig:emptytls} shows an approach using dense information, similar to the bitmap, but have each thread keep its own independent copy of it.
    300311While this approach can offer good scalability \emph{and} low latency, the liveliness of the information can become a problem.
    301 In the simple cases, local copies of which underlying queues are empty can become stale and end-up not being useful for the pop operation.
     312In the simple cases, local copies with empty underlying queues can become stale and end-up not being useful for the pop operation.
    302313A more serious problem is that reliable information is necessary for some parts of this algorithm to be correct.
    303314As mentioned in this section, processors must know \emph{reliably} whether the list is empty or not to decide if they can return \texttt{NULL} or if they must keep looking during a pop operation.
    304315Section~\ref{sec:sleep} discusses another case where reliable information is required for the algorithm to be correct.
    305316
    306 \begin{figure}
    307         \begin{center}
    308                 \input{emptytls}
    309         \end{center}
    310         \caption{``More empty'' queue with added per processor bitmask to indicate which array cells have items.}
    311         \label{fig:emptytls}
    312 \end{figure}
    313 
    314317There is a fundamental tradeoff among these approach.
    315 Dense global information about empty underlying queues helps zero-contention cases at the cost of high-contention case.
    316 Sparse global information helps high-contention cases but increases latency in zero-contention-cases, to read and ``aggregate'' the information\footnote{Hierarchical structures, \eg binary search tree, effectively aggregate information but follow pointer chains, learning information at each node.
     318Dense global information about empty underlying queues helps zero-contention cases at the cost of the high-contention case.
     319Sparse global information helps high-contention cases but increases latency in zero-contention cases to read and ``aggregate'' the information\footnote{Hierarchical structures, \eg binary search tree, effectively aggregate information but follow pointer chains, learning information at each node.
    317320Similarly, other sparse schemes need to read multiple cachelines to acquire all the information needed.}.
    318 Finally, dense local information has both the advantages of low latency in zero-contention cases and scalability in high-contention cases. However the information can become stale making it difficult to use to ensure correctness.
     321Finally, dense local information has both the advantages of low latency in zero-contention cases and scalability in high-contention cases.
     322However, the information can become stale making it difficult to use to ensure correctness.
    319323The fact that these solutions have these fundamental limits suggest to me a better solution that attempts to combine these properties in an interesting way.
    320324Also, the lock discussed in Section~\ref{sec:resize} allows for solutions that adapt to the number of processors, which could also prove useful.
     
    323327
    324328How much scalability is actually needed is highly debatable.
    325 \emph{libfibre}\cite{libfibre} has compared favourably to other schedulers in webserver tests\cite{Karsten20} and uses a single atomic counter in its scheduling algorithm similarly to the proposed bitmask.
     329\emph{libfibre}~\cite{libfibre} has compared favourably to other schedulers in webserver tests~\cite{Karsten20} and uses a single atomic counter in its scheduling algorithm similarly to the proposed bitmask.
    326330As such, the single atomic instruction on a shared cacheline may be sufficiently performant.
    327331
    328 I have built a prototype of this ready queue in the shape of a data queue, \ie nodes on the queue are structures with a single int representing a thread and intrusive data fields.
    329 Using this prototype, I ran preliminary performance experiments that confirm the expected performance in Table~\ref{tab:perfcases}.
    330 However, these experiments only offer a hint at the actual performance of the scheduler since threads form more complex operations than simple integer nodes, \eg threads are not independent of each other, when a thread blocks some other thread must intervene to wake it.
     332I have built a prototype of this ready queue in the shape of a data queue, \ie nodes on the queue are structures with a single $int$ representing a thread and intrusive data fields.
     333Using this prototype, preliminary performance experiments confirm the expected performance in Table~\ref{tab:perfcases}.
     334However, these experiments only offer a hint at the actual performance of the scheduler since threads are involved in more complex operations, \eg threads are not independent of each other: when a thread blocks some other thread must intervene to wake it.
    331335
    332336I have also integrated this prototype into the \CFA runtime, but have not yet created performance experiments to compare results, as creating one-to-one comparisons between the prototype and the \CFA runtime will be complex.
     
    345349Threads on a cluster are always scheduled on one of the processors of the cluster.
    346350Currently, the runtime handles dynamically adding and removing processors from clusters at any time.
    347 Since this is part of the existing design, the proposed scheduler must also support this behaviour.
     351Since this feature is part of the existing design, the proposed scheduler must also support this behaviour.
    348352However, dynamically resizing a cluster is considered a rare event associated with setup, tear down and major configuration changes.
    349353This assumption is made both in the design of the proposed scheduler as well as in the original design of the \CFA runtime system.
    350354As such, the proposed scheduler must honour the correctness of this behaviour but does not have any performance objectives with regard to resizing a cluster.
    351 How long adding or removing processors take and how much this disrupts the performance of other threads is considered a secondary concern since it should be amortized over long periods of times.
     355That is, the time to add or remove processors and how much this disrupts the performance of other threads is considered a secondary concern since it should be amortized over long periods of times.
    352356However, as mentioned in Section~\ref{sec:queue}, contention on the underlying queues can have a direct impact on performance.
    353357The number of underlying queues must therefore be adjusted as the number of processors grows or shrinks.
     
    371375
    372376There are possible alternatives to the reader-writer lock solution.
    373 This problem is effectively a memory reclamation problem and as such there is a large body of research on the subject\cite{michael2004hazard, brown2015reclaiming}.
     377This problem is effectively a memory reclamation problem and as such there is a large body of research on the subject~\cite{brown2015reclaiming, michael2004hazard}.
    374378However, the reader-write lock-solution is simple and can be leveraged to solve other problems (\eg processor ordering and memory reclamation of threads), which makes it an attractive solution.
    375379
     
    401405Individual processors always finish scheduling user threads before looking for new work, which means that the last processor to go to sleep cannot miss threads scheduled from inside the cluster (if they do, that demonstrates the ready queue is not linearizable).
    402406However, this guarantee does not hold if threads are scheduled from outside the cluster, either due to an external event like timers and I/O, or due to a user (or kernel) thread migrating from a different cluster.
    403 In this case, missed signals can lead to the cluster deadlocking\footnote{Clusters should only deadlock in cases where a \CFA programmer \emph{actually} write \CFA code that leads to a deadlock.}.
     407In this case, missed signals can lead to the cluster deadlocking\footnote{Clusters should only deadlock in cases where a \CFA programmer \emph{actually} writes \CFA code that leads to a deadlock.}.
    404408Therefore, it is important that the scheduling of threads include a mechanism where signals \emph{cannot} be missed.
    405409For performance reasons, it can be advantageous to have a secondary mechanism that allows signals to be missed in cases where it cannot lead to a deadlock.
    406 To be safe, this process must include a ``handshake'' where it is guaranteed that either~: the sleeping processor notices that a user thread is scheduled after the sleeping processor signalled its intent to block or code scheduling threads sees the intent to sleep before scheduling and be able to wake-up the processor.
     410To be safe, this process must include a ``handshake'' where it is guaranteed that either:
     411\begin{enumerate}
     412\item
     413the sleeping processor notices that a user thread is scheduled after the sleeping processor signalled its intent to block or
     414\item
     415code scheduling threads sees the intent to sleep before scheduling and be able to wake-up the processor.
     416\end{enumerate}
    407417This matter is complicated by the fact that pthreads and Linux offer few tools to implement this solution and no guarantee of ordering of threads waking up for most of these tools.
    408418
    409419Another important issue is avoiding kernel threads sleeping and waking frequently because there is a significant operating-system cost.
    410 This scenario happens when a program oscillates between high and low activity, needing most and then fewer processors.
     420This scenario happens when a program oscillates between high and low activity, needing most and then few processors.
    411421A possible partial solution is to order the processors so that the one which most recently went to sleep is woken up.
    412422This allows other sleeping processors to reach deeper sleep state (when these are available) while keeping ``hot'' processors warmer.
     
    417427Processors that are unnecessarily unblocked lead to unnecessary contention, CPU usage, and power consumption, while too many sleeping processors can lead to suboptimal throughput.
    418428Furthermore, transitions from sleeping to awake and vice versa also add unnecessary latency.
    419 There is already a wealth of research on the subject\cite{schillings1996engineering, wiki:thunderherd} and I may use an existing approach for the idle-sleep heuristic in this project, \eg\cite{Karsten20}.
     429There is already a wealth of research on the subject~\cite{schillings1996engineering, wiki:thunderherd} and I may use an existing approach for the idle-sleep heuristic in this project, \eg~\cite{Karsten20}.
    420430
    421431\subsection{Asynchronous I/O}
     
    432442an event-engine to (de)multiplex the operations,
    433443\item
    434 and a synchronous interface for users to use.
     444and a synchronous interface for users.
    435445\end{enumerate}
    436446None of these components currently exist in \CFA and I will need to build all three for this project.
    437447
    438 \paragraph{OS Abstraction}
    439 One fundamental part for converting blocking I/O operations into non-blocking ones is having an underlying asynchronous I/O interface to direct the I/O operations.
     448\paragraph{OS Asynchronous Abstraction}
     449One fundamental part for converting blocking I/O operations into non-blocking is having an underlying asynchronous I/O interface to direct the I/O operations.
    440450While there exists many different APIs for asynchronous I/O, it is not part of this proposal to create a novel API.
    441451It is sufficient to make one work in the complex context of the \CFA runtime.
    442 \uC uses the $select$\cite{select} as its interface, which handles ttys, pipes and sockets, but not disk.
     452\uC uses the $select$~\cite{select} as its interface, which handles ttys, pipes and sockets, but not disk.
    443453$select$ entails significant complexity and is being replaced in UNIX operating systems, which make it a less interesting alternative.
    444 Another popular interface is $epoll$\cite{epoll}, which is supposed to be cheaper than $select$.
    445 However, $epoll$ also does not handle the file system and anecdotal evidence suggest it has problems with Linux pipes and $TTY$s.
    446 A popular cross-platform alternative is $libuv$\cite{libuv}, which offers asynchronous sockets and asynchronous file system operations (among other features).
     454Another popular interface is $epoll$~\cite{epoll}, which is supposed to be cheaper than $select$.
     455However, $epoll$ also does not handle the file system and anecdotal evidence suggest it has problems with Linux pipes and ttys.
     456A popular cross-platform alternative is $libuv$~\cite{libuv}, which offers asynchronous sockets and asynchronous file system operations (among other features).
    447457However, as a full-featured library it includes much more than I need and could conflict with other features of \CFA unless significant effort is made to merge them together.
    448 A very recent alternative that I am investigating is $io_uring$\cite{io_uring}.
     458A very recent alternative that I am investigating is $io_uring$~\cite{io_uring}.
    449459It claims to address some of the issues with $epoll$ and my early investigating suggests that the claim is accurate.
    450 $io_uring$ uses a much more general approach where system calls are registered to a queue and later executed by the kernel, rather than relying on system calls to return an error instead of blocking and subsequently waiting for changes on file descriptors.
    451 I believe this approach allows for fewer problems, \eg the manpage for $open$\cite{open} states:
     460$io_uring$ uses a much more general approach where system calls are registered to a queue and later executed by the kernel, rather than relying on system calls to subsequently wait for changes on file descriptors or return an error.
     461I believe this approach allows for fewer problems, \eg the manpage for $open$~\cite{open} states:
    452462\begin{quote}
    453463Note that [the $O_NONBLOCK$ flag] has no effect for regular files and block devices;
     
    455465Since $O_NONBLOCK$ semantics might eventually be implemented, applications should not depend upon blocking behaviour when specifying this flag for regular files and block devices.
    456466\end{quote}
    457 This makes approach based on $epoll$/$select$ less reliable since they may not work for every file descriptors.
    458 For this reason, I plan to use $io_uring$ as the OS abstraction for the \CFA runtime unless further work shows problems I haven't encountered yet.
    459 However, only a small subset of the features are available in Ubuntu as of April 2020\cite{wiki:ubuntu-linux}, which will limit performance comparisons.
     467This makes approaches based on $select$/$epoll$ less reliable since they may not work for every file descriptors.
     468For this reason, I plan to use $io_uring$ as the OS abstraction for the \CFA runtime unless further work encounters a fatal problem.
     469However, only a small subset of the features are available in Ubuntu as of April 2020~\cite{wiki:ubuntu-linux}, which will limit performance comparisons.
    460470I do not believe this will affect the comparison result.
    461471
    462472\paragraph{Event Engine}
    463 Laying on top of the asynchronous interface layer is the event engine.
     473Above the OS asynchronous abstraction is the event engine.
    464474This engine is responsible for multiplexing (batching) the synchronous I/O requests into asynchronous I/O requests and demultiplexing the results to appropriate blocked user threads.
    465475This step can be straightforward for simple cases, but becomes quite complex when there are thousands of user threads performing both reads and writes, possibly on overlapping file descriptors.
     
    478488The interface can be novel but it is preferable to match the existing POSIX interface when possible to be compatible with existing code.
    479489Matching allows C programs written using this interface to be transparently converted to \CFA with minimal effort.
    480 Where new functionality is needed, I will create a novel interface to fill gaps and provide advanced features.
     490Where new functionality is needed, I will add novel interface extensions to fill gaps and provide advanced features.
    481491
    482492
     
    485495\section{Discussion}
    486496I believe that runtime system and scheduling are still open topics.
    487 Many ``state of the art'' production frameworks still use single-threaded event loops because of performance considerations, \eg \cite{nginx-design}, and, to my knowledge, no widely available system language offers modern threading facilities.
     497Many ``state of the art'' production frameworks still use single-threaded event loops because of performance considerations, \eg~\cite{nginx-design}, and, to my knowledge, no widely available system language offers modern threading facilities.
    488498I believe the proposed work offers a novel runtime and scheduling package, where existing work only offers fragments that users must assemble themselves when possible.
    489499
  • libcfa/configure.ac

    r1c507eb r7a80113  
    166166AH_TEMPLATE([CFA_HAVE_IORING_OP_PROVIDE_BUFFERS],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_PROVIDE_BUFFERS.])
    167167AH_TEMPLATE([CFA_HAVE_IORING_OP_REMOVE_BUFFER],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_REMOVE_BUFFER.])
     168AH_TEMPLATE([CFA_HAVE_IORING_OP_TEE],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_TEE.])
    168169AH_TEMPLATE([CFA_HAVE_IOSQE_FIXED_FILE],[Defined if io_uring support is present when compiling libcfathread and supports the flag FIXED_FILE.])
    169170AH_TEMPLATE([CFA_HAVE_IOSQE_IO_DRAIN],[Defined if io_uring support is present when compiling libcfathread and supports the flag IO_DRAIN.])
     
    173174AH_TEMPLATE([CFA_HAVE_SPLICE_F_FD_IN_FIXED],[Defined if io_uring support is present when compiling libcfathread and supports the flag SPLICE_F_FD_IN_FIXED.])
    174175AH_TEMPLATE([CFA_HAVE_IORING_SETUP_ATTACH_WQ],[Defined if io_uring support is present when compiling libcfathread and supports the flag IORING_SETUP_ATTACH_WQ.])
    175 AH_TEMPLATE([HAVE_PREADV2],[Defined if preadv2 support is present when compiling libcfathread.])
    176 AH_TEMPLATE([HAVE_PWRITEV2],[Defined if pwritev2 support is present when compiling libcfathread.])
     176AH_TEMPLATE([CFA_HAVE_PREADV2],[Defined if preadv2 support is present when compiling libcfathread.])
     177AH_TEMPLATE([CFA_HAVE_PWRITEV2],[Defined if pwritev2 support is present when compiling libcfathread.])
     178AH_TEMPLATE([CFA_HAVE_PWRITEV2],[Defined if pwritev2 support is present when compiling libcfathread.])
     179AH_TEMPLATE([CFA_HAVE_STATX],[Defined if statx support is present when compiling libcfathread.])
     180AH_TEMPLATE([CFA_HAVE_OPENAT2],[Defined if openat2 support is present when compiling libcfathread.])
    177181AH_TEMPLATE([__CFA_NO_STATISTICS__],[Defined if libcfathread was compiled without support for statistics.])
    178182
    179 define(ioring_ops, [IORING_OP_NOP,IORING_OP_READV,IORING_OP_WRITEV,IORING_OP_FSYNC,IORING_OP_READ_FIXED,IORING_OP_WRITE_FIXED,IORING_OP_POLL_ADD,IORING_OP_POLL_REMOVE,IORING_OP_SYNC_FILE_RANGE,IORING_OP_SENDMSG,IORING_OP_RECVMSG,IORING_OP_TIMEOUT,IORING_OP_TIMEOUT_REMOVE,IORING_OP_ACCEPT,IORING_OP_ASYNC_CANCEL,IORING_OP_LINK_TIMEOUT,IORING_OP_CONNECT,IORING_OP_FALLOCATE,IORING_OP_OPENAT,IORING_OP_CLOSE,IORING_OP_FILES_UPDATE,IORING_OP_STATX,IORING_OP_READ,IORING_OP_WRITE,IORING_OP_FADVISE,IORING_OP_MADVISE,IORING_OP_SEND,IORING_OP_RECV,IORING_OP_OPENAT2,IORING_OP_EPOLL_CTL,IORING_OP_SPLICE,IORING_OP_PROVIDE_BUFFERS,IORING_OP_REMOVE_BUFFER])
     183define(ioring_ops, [IORING_OP_NOP,IORING_OP_READV,IORING_OP_WRITEV,IORING_OP_FSYNC,IORING_OP_READ_FIXED,IORING_OP_WRITE_FIXED,IORING_OP_POLL_ADD,IORING_OP_POLL_REMOVE,IORING_OP_SYNC_FILE_RANGE,IORING_OP_SENDMSG,IORING_OP_RECVMSG,IORING_OP_TIMEOUT,IORING_OP_TIMEOUT_REMOVE,IORING_OP_ACCEPT,IORING_OP_ASYNC_CANCEL,IORING_OP_LINK_TIMEOUT,IORING_OP_CONNECT,IORING_OP_FALLOCATE,IORING_OP_OPENAT,IORING_OP_CLOSE,IORING_OP_FILES_UPDATE,IORING_OP_STATX,IORING_OP_READ,IORING_OP_WRITE,IORING_OP_FADVISE,IORING_OP_MADVISE,IORING_OP_SEND,IORING_OP_RECV,IORING_OP_OPENAT2,IORING_OP_EPOLL_CTL,IORING_OP_SPLICE,IORING_OP_PROVIDE_BUFFERS,IORING_OP_REMOVE_BUFFER,IORING_OP_TEE])
    180184define(ioring_flags, [IOSQE_FIXED_FILE,IOSQE_IO_DRAIN,IOSQE_ASYNC,IOSQE_IO_LINK,IOSQE_IO_HARDLINK,SPLICE_F_FD_IN_FIXED,IORING_SETUP_ATTACH_WQ])
    181185
     
    222226        ])
    223227])
    224 AC_CHECK_FUNCS([preadv2 pwritev2])
     228AC_CHECK_FUNC([preadv2], [AC_DEFINE([CFA_HAVE_PREADV2])])
     229AC_CHECK_FUNC([pwritev2], [AC_DEFINE([CFA_HAVE_PWRITEV2])])
    225230
    226231AC_CONFIG_FILES([
     
    229234        prelude/Makefile
    230235        ])
     236AC_CONFIG_FILES([src/concurrency/io/call.cfa], [python3 ${srcdir}/src/concurrency/io/call.cfa.in > src/concurrency/io/call.cfa])
    231237
    232238AC_CONFIG_HEADERS(prelude/defines.hfa)
  • libcfa/prelude/defines.hfa.in

    r1c507eb r7a80113  
    117117
    118118/* Defined if io_uring support is present when compiling libcfathread and
     119   supports the operation IORING_OP_TEE. */
     120#undef CFA_HAVE_IORING_OP_TEE
     121
     122/* Defined if io_uring support is present when compiling libcfathread and
    119123   supports the operation IORING_OP_TIMEOUT. */
    120124#undef CFA_HAVE_IORING_OP_TIMEOUT
     
    163167#undef CFA_HAVE_LINUX_IO_URING_H
    164168
     169/* Defined if openat2 support is present when compiling libcfathread. */
     170#undef CFA_HAVE_OPENAT2
     171
     172/* Defined if preadv2 support is present when compiling libcfathread. */
     173#undef CFA_HAVE_PREADV2
     174
     175/* Defined if pwritev2 support is present when compiling libcfathread. */
     176#undef CFA_HAVE_PWRITEV2
     177
    165178/* Defined if io_uring support is present when compiling libcfathread and
    166179   supports the flag SPLICE_F_FD_IN_FIXED. */
    167180#undef CFA_HAVE_SPLICE_F_FD_IN_FIXED
    168181
     182/* Defined if statx support is present when compiling libcfathread. */
     183#undef CFA_HAVE_STATX
     184
    169185/* Location of include files. */
    170186#undef CFA_INCDIR
     
    188204#undef HAVE_MEMORY_H
    189205
    190 /* Define to 1 if you have the `preadv2' function. */
    191 #undef HAVE_PREADV2
    192 
    193 /* Define to 1 if you have the `pwritev2' function. */
    194 #undef HAVE_PWRITEV2
    195 
    196206/* Define to 1 if you have the <stdint.h> header file. */
    197207#undef HAVE_STDINT_H
  • libcfa/src/Makefile.am

    r1c507eb r7a80113  
    6262        iterator.hfa \
    6363        limits.hfa \
     64        memory.hfa \
    6465        parseargs.hfa \
    6566        rational.hfa \
     
    107108        concurrency/io/setup.cfa \
    108109        concurrency/io/types.hfa \
    109         concurrency/iocall.cfa \
     110        concurrency/io/call.cfa \
    110111        concurrency/iofwd.hfa \
    111112        concurrency/kernel_private.hfa \
  • libcfa/src/bits/locks.hfa

    r1c507eb r7a80113  
    357357                                struct oneshot * expected = this.ptr;
    358358                                // was this abandoned?
    359                                 if( expected == 3p ) { free( &this ); return false; }
     359                                #if defined(__GNUC__) && __GNUC__ >= 7
     360                                        #pragma GCC diagnostic push
     361                                        #pragma GCC diagnostic ignored "-Wfree-nonheap-object"
     362                                #endif
     363                                        if( expected == 3p ) { free( &this ); return false; }
     364                                #if defined(__GNUC__) && __GNUC__ >= 7
     365                                        #pragma GCC diagnostic pop
     366                                #endif
    360367
    361368                                /* paranoid */ verify( expected != 1p ); // Future is already fulfilled, should not happen
  • libcfa/src/concurrency/coroutine.cfa

    r1c507eb r7a80113  
    4747
    4848//-----------------------------------------------------------------------------
     49FORALL_DATA_INSTANCE(CoroutineCancelled,
     50                (dtype coroutine_t | sized(coroutine_t)), (coroutine_t))
     51
     52struct __cfaehm_node {
     53        struct _Unwind_Exception unwind_exception;
     54        struct __cfaehm_node * next;
     55        int handler_index;
     56};
     57
     58forall(dtype T)
     59void mark_exception(CoroutineCancelled(T) *) {}
     60
     61forall(dtype T | sized(T))
     62void copy(CoroutineCancelled(T) * dst, CoroutineCancelled(T) * src) {
     63        dst->the_coroutine = src->the_coroutine;
     64        dst->the_exception = src->the_exception;
     65}
     66
     67forall(dtype T)
     68const char * msg(CoroutineCancelled(T) *) {
     69        return "CoroutineCancelled(...)";
     70}
     71
     72// This code should not be inlined. It is the error path on resume.
     73forall(dtype T | is_coroutine(T))
     74void __cfaehm_cancelled_coroutine( T & cor, $coroutine * desc ) {
     75        verify( desc->cancellation );
     76        desc->state = Cancelled;
     77        exception_t * except = (exception_t *)(1 + (__cfaehm_node *)desc->cancellation);
     78
     79        CoroutineCancelled(T) except;
     80        except.the_coroutine = &cor;
     81        except.the_exception = except;
     82        throwResume except;
     83
     84        except->virtual_table->free( except );
     85        free( desc->cancellation );
     86        desc->cancellation = 0p;
     87}
     88
     89//-----------------------------------------------------------------------------
    4990// Global state variables
    5091
     
    180221        this->storage->limit = storage;
    181222        this->storage->base  = (void*)((intptr_t)storage + size);
     223        this->storage->exception_context.top_resume = 0p;
     224        this->storage->exception_context.current_exception = 0p;
    182225        __attribute__((may_alias)) intptr_t * istorage = (intptr_t*)&this->storage;
    183226        *istorage |= userStack ? 0x1 : 0x0;
  • libcfa/src/concurrency/coroutine.hfa

    r1c507eb r7a80113  
    1818#include <assert.h>
    1919#include "invoke.h"
     20#include "../exception.hfa"
     21
     22//-----------------------------------------------------------------------------
     23// Exception thrown from resume when a coroutine stack is cancelled.
     24// Should not have to be be sized (see trac #196).
     25FORALL_DATA_EXCEPTION(CoroutineCancelled,
     26                (dtype coroutine_t | sized(coroutine_t)), (coroutine_t)) (
     27        coroutine_t * the_coroutine;
     28        exception_t * the_exception;
     29);
     30
     31forall(dtype T)
     32void mark_exception(CoroutineCancelled(T) *);
     33
     34forall(dtype T | sized(T))
     35void copy(CoroutineCancelled(T) * dst, CoroutineCancelled(T) * src);
     36
     37forall(dtype T)
     38const char * msg(CoroutineCancelled(T) *);
    2039
    2140//-----------------------------------------------------------------------------
     
    2342// Anything that implements this trait can be resumed.
    2443// Anything that is resumed is a coroutine.
    25 trait is_coroutine(dtype T) {
    26       void main(T & this);
    27       $coroutine * get_coroutine(T & this);
     44trait is_coroutine(dtype T | sized(T)
     45                | is_resumption_exception(CoroutineCancelled(T))
     46                | VTABLE_ASSERTION(CoroutineCancelled, (T))) {
     47        void main(T & this);
     48        $coroutine * get_coroutine(T & this);
    2849};
    2950
     
    112133        }
    113134}
     135
     136forall(dtype T | is_coroutine(T))
     137void __cfaehm_cancelled_coroutine( T & cor, $coroutine * desc );
    114138
    115139// Resume implementation inlined for performance
     
    145169        // always done for performance testing
    146170        $ctx_switch( src, dst );
     171        if ( unlikely(dst->cancellation) ) {
     172                __cfaehm_cancelled_coroutine( cor, dst );
     173        }
    147174
    148175        return cor;
  • libcfa/src/concurrency/exception.cfa

    r1c507eb r7a80113  
    5757
    5858STOP_AT_END_FUNCTION(coroutine_cancelstop,
    59         // TODO: Instead pass information to the last resumer.
     59        struct $coroutine * src = ($coroutine *)stop_param;
     60        struct $coroutine * dst = src->last;
     61
     62        $ctx_switch( src, dst );
    6063        abort();
    6164)
  • libcfa/src/concurrency/exception.hfa

    r1c507eb r7a80113  
    1818#include "bits/defs.hfa"
    1919#include "invoke.h"
    20 struct _Unwind_Exception;
    21 
    22 // It must also be usable as a C header file.
    2320
    2421#ifdef __cforall
    2522extern "C" {
     23
     24#define HIDE_EXPORTS
    2625#endif
     26#include "unwind.h"
    2727
    2828struct exception_context_t * this_exception_context(void) OPTIONAL_THREAD;
     
    3232
    3333#ifdef __cforall
     34#undef HIDE_EXPORTS
    3435}
    3536#endif
  • libcfa/src/concurrency/invoke.h

    r1c507eb r7a80113  
    6868        };
    6969
    70         enum __Coroutine_State { Halted, Start, Primed, Blocked, Ready, Active };
     70        enum __Coroutine_State { Halted, Start, Primed, Blocked, Ready, Active, Cancelled };
    7171
    7272        struct $coroutine {
  • libcfa/src/concurrency/io.cfa

    r1c507eb r7a80113  
    159159
    160160        static inline void process(struct io_uring_cqe & cqe ) {
    161                 struct __io_user_data_t * data = (struct __io_user_data_t *)(uintptr_t)cqe.user_data;
    162                 __cfadbg_print_safe( io, "Kernel I/O : Syscall completed : cqe %p, result %d for %p\n", data, cqe.res, data->thrd );
    163 
    164                 data->result = cqe.res;
    165                 post( data->sem );
     161                struct io_future_t * future = (struct io_future_t *)(uintptr_t)cqe.user_data;
     162                __cfadbg_print_safe( io, "Kernel I/O : Syscall completed : cqe %p, result %d for %p\n", future, cqe.res, data->thrd );
     163
     164                fulfil( *future, cqe.res );
    166165        }
    167166
  • libcfa/src/concurrency/io/types.hfa

    r1c507eb r7a80113  
    1616#pragma once
    1717
     18extern "C" {
     19        #include <linux/types.h>
     20}
     21
     22#include "bits/locks.hfa"
     23
    1824#if defined(CFA_HAVE_LINUX_IO_URING_H)
    19         extern "C" {
    20                 #include <linux/types.h>
    21         }
    22 
    23       #include "bits/locks.hfa"
    24 
    2525        #define LEADER_LOCK
    2626        struct __leaderlock_t {
     
    101101        };
    102102
    103 
    104         //-----------------------------------------------------------------------
    105         // IO user data
    106         struct __io_user_data_t {
    107                 __s32 result;
    108                 oneshot sem;
    109         };
    110 
    111103        //-----------------------------------------------------------------------
    112104        // Misc
     
    143135        void __ioctx_prepare_block($io_ctx_thread & ctx, struct epoll_event & ev);
    144136#endif
     137
     138//-----------------------------------------------------------------------
     139// IO user data
     140struct io_future_t {
     141        future_t self;
     142        __s32 result;
     143};
     144
     145static inline {
     146        bool fulfil( io_future_t & this, __s32 result ) {
     147                this.result = result;
     148                return fulfil(this.self);
     149        }
     150
     151        // Wait for the future to be fulfilled
     152        bool wait( io_future_t & this ) {
     153                return wait(this.self);
     154        }
     155}
  • libcfa/src/concurrency/iofwd.hfa

    r1c507eb r7a80113  
    4040
    4141struct cluster;
     42struct io_future_t;
    4243struct io_context;
    4344struct io_cancellation;
     
    4849struct statx;
    4950
    50 extern ssize_t cfa_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
    51 extern ssize_t cfa_pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
    52 extern int cfa_fsync(int fd, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
    53 extern int cfa_sync_file_range(int fd, int64_t offset, int64_t nbytes, unsigned int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
    54 extern ssize_t cfa_sendmsg(int sockfd, const struct msghdr *msg, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
    55 extern ssize_t cfa_recvmsg(int sockfd, struct msghdr *msg, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
    56 extern ssize_t cfa_send(int sockfd, const void *buf, size_t len, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
    57 extern ssize_t cfa_recv(int sockfd, void *buf, size_t len, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
    58 extern int cfa_accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
    59 extern int cfa_connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
    60 extern int cfa_fallocate(int fd, int mode, uint64_t offset, uint64_t len, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
    61 extern int cfa_fadvise(int fd, uint64_t offset, uint64_t len, int advice, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
    62 extern int cfa_madvise(void *addr, size_t length, int advice, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
    63 extern int cfa_openat(int dirfd, const char *pathname, int flags, mode_t mode, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
    64 extern int cfa_close(int fd, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
    65 extern int cfa_statx(int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
    66 extern ssize_t cfa_read(int fd, void *buf, size_t count, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
    67 extern ssize_t cfa_write(int fd, void *buf, size_t count, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
    68 extern ssize_t cfa_splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
    69 extern ssize_t cfa_tee(int fd_in, int fd_out, size_t len, unsigned int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
     51//----------
     52// synchronous calls
     53#if defined(CFA_HAVE_PREADV2)
     54        extern ssize_t cfa_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
     55#endif
     56#if defined(CFA_HAVE_PWRITEV2)
     57        extern ssize_t cfa_pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
     58#endif
     59extern int cfa_fsync(int fd, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
     60extern int cfa_epoll_ctl(int epfd, int op, int fd, struct epoll_event *event, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
     61extern int cfa_sync_file_range(int fd, off64_t offset, off64_t nbytes, unsigned int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
     62extern  ssize_t cfa_sendmsg(int sockfd, const struct msghdr *msg, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
     63extern ssize_t cfa_recvmsg(int sockfd, struct msghdr *msg, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
     64extern ssize_t cfa_send(int sockfd, const void *buf, size_t len, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
     65extern ssize_t cfa_recv(int sockfd, void *buf, size_t len, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
     66extern int cfa_accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
     67extern int cfa_connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
     68extern int cfa_fallocate(int fd, int mode, off_t offset, off_t len, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
     69extern int cfa_posix_fadvise(int fd, off_t offset, off_t len, int advice, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
     70extern int cfa_madvise(void *addr, size_t length, int advice, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
     71extern int cfa_openat(int dirfd, const char *pathname, int flags, mode_t mode, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
     72#if defined(CFA_HAVE_OPENAT2)
     73        extern int cfa_openat2(int dirfd, const char *pathname, struct open_how * how, size_t size, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
     74#endif
     75extern int cfa_close(int fd, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
     76#if defined(CFA_HAVE_STATX)
     77        extern int cfa_statx(int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
     78#endif
     79extern ssize_t cfa_read(int fd, void * buf, size_t count, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
     80extern ssize_t cfa_write(int fd, void * buf, size_t count, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
     81extern ssize_t cfa_splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
     82extern ssize_t cfa_tee(int fd_in, int fd_out, size_t len, unsigned int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
     83
     84//----------
     85// asynchronous calls
     86#if defined(CFA_HAVE_PREADV2)
     87        extern void async_preadv2(io_future_t & future, int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
     88#endif
     89#if defined(CFA_HAVE_PWRITEV2)
     90        extern void async_pwritev2(io_future_t & future, int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
     91#endif
     92extern void async_fsync(io_future_t & future, int fd, int submit_flags, io_cancellation * cancellation, io_context * context);
     93extern void async_epoll_ctl(io_future_t & future, int epfd, int op, int fd, struct epoll_event *event, int submit_flags, io_cancellation * cancellation, io_context * context);
     94extern void async_sync_file_range(io_future_t & future, int fd, off64_t offset, off64_t nbytes, unsigned int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
     95extern void async_sendmsg(io_future_t & future, int sockfd, const struct msghdr *msg, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
     96extern void async_recvmsg(io_future_t & future, int sockfd, struct msghdr *msg, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
     97extern void async_send(io_future_t & future, int sockfd, const void *buf, size_t len, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
     98extern void async_recv(io_future_t & future, int sockfd, void *buf, size_t len, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
     99extern void async_accept4(io_future_t & future, int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
     100extern void async_connect(io_future_t & future, int sockfd, const struct sockaddr *addr, socklen_t addrlen, int submit_flags, io_cancellation * cancellation, io_context * context);
     101extern void async_fallocate(io_future_t & future, int fd, int mode, off_t offset, off_t len, int submit_flags, io_cancellation * cancellation, io_context * context);
     102extern void async_posix_fadvise(io_future_t & future, int fd, off_t offset, off_t len, int advice, int submit_flags, io_cancellation * cancellation, io_context * context);
     103extern void async_madvise(io_future_t & future, void *addr, size_t length, int advice, int submit_flags, io_cancellation * cancellation, io_context * context);
     104extern void async_openat(io_future_t & future, int dirfd, const char *pathname, int flags, mode_t mode, int submit_flags, io_cancellation * cancellation, io_context * context);
     105#if defined(CFA_HAVE_OPENAT2)
     106        extern void async_openat2(io_future_t & future, int dirfd, const char *pathname, struct open_how * how, size_t size, int submit_flags, io_cancellation * cancellation, io_context * context);
     107#endif
     108extern void async_close(io_future_t & future, int fd, int submit_flags, io_cancellation * cancellation, io_context * context);
     109#if defined(CFA_HAVE_STATX)
     110        extern void async_statx(io_future_t & future, int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf, int submit_flags, io_cancellation * cancellation, io_context * context);
     111#endif
     112void async_read(io_future_t & future, int fd, void * buf, size_t count, int submit_flags, io_cancellation * cancellation, io_context * context);
     113extern void async_write(io_future_t & future, int fd, void * buf, size_t count, int submit_flags, io_cancellation * cancellation, io_context * context);
     114extern void async_splice(io_future_t & future, int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
     115extern void async_tee(io_future_t & future, int fd_in, int fd_out, size_t len, unsigned int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
     116
    70117
    71118//-----------------------------------------------------------------------------
  • libcfa/src/concurrency/kernel.hfa

    r1c507eb r7a80113  
    2323
    2424extern "C" {
    25 #include <bits/pthreadtypes.h>
     25        #include <bits/pthreadtypes.h>
     26        #include <linux/types.h>
    2627}
    2728
     
    157158
    158159struct io_cancellation {
    159         uint32_t target;
     160        __u64 target;
    160161};
    161162
  • libcfa/src/concurrency/monitor.cfa

    r1c507eb r7a80113  
    8989        __cfaabi_dbg_print_safe( "Kernel : %10p Entering mon %p (%p)\n", thrd, this, this->owner);
    9090
    91         if( !this->owner ) {
     91        if( unlikely(0 != (0x1 & (uintptr_t)this->owner)) ) {
     92                abort( "Attempt by thread \"%.256s\" (%p) to access joined monitor %p.", thrd->self_cor.name, thrd, this );
     93        }
     94        else if( !this->owner ) {
    9295                // No one has the monitor, just take it
    9396                __set_owner( this, thrd );
     
    137140}
    138141
    139 static void __dtor_enter( $monitor * this, fptr_t func ) {
     142static void __dtor_enter( $monitor * this, fptr_t func, bool join ) {
    140143        // Lock the monitor spinlock
    141144        lock( this->lock __cfaabi_dbg_ctx2 );
     
    157160                return;
    158161        }
    159         else if( this->owner == thrd) {
     162        else if( this->owner == thrd && !join) {
    160163                // We already have the monitor... but where about to destroy it so the nesting will fail
    161164                // Abort!
    162165                abort( "Attempt to destroy monitor %p by thread \"%.256s\" (%p) in nested mutex.", this, thrd->self_cor.name, thrd );
     166        }
     167        // SKULLDUGGERY: join will act as a dtor so it would normally trigger to above check
     168        // to avoid that it sets the owner to the special value thrd | 1p before exiting
     169        else if( this->owner == ($thread*)(1 | (uintptr_t)thrd) ) {
     170                // restore the owner and just return
     171                __cfaabi_dbg_print_safe( "Kernel : Destroying free mon %p\n", this);
     172
     173                // No one has the monitor, just take it
     174                this->owner = thrd;
     175
     176                verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
     177
     178                unlock( this->lock );
     179                return;
    163180        }
    164181
     
    251268
    252269// Leave single monitor for the last time
    253 void __dtor_leave( $monitor * this ) {
     270void __dtor_leave( $monitor * this, bool join ) {
    254271        __cfaabi_dbg_debug_do(
    255272                if( TL_GET( this_thread ) != this->owner ) {
    256273                        abort( "Destroyed monitor %p has inconsistent owner, expected %p got %p.\n", this, TL_GET( this_thread ), this->owner);
    257274                }
    258                 if( this->recursion != 1 ) {
     275                if( this->recursion != 1  && !join ) {
    259276                        abort( "Destroyed monitor %p has %d outstanding nested calls.\n", this, this->recursion - 1);
    260277                }
    261278        )
     279
     280        this->owner = ($thread*)(1 | (uintptr_t)this->owner);
    262281}
    263282
     
    307326}
    308327
     328// Join a thread
     329forall( dtype T | is_thread(T) )
     330T & join( T & this ) {
     331        $monitor *    m = get_monitor(this);
     332        void (*dtor)(T& mutex this) = ^?{};
     333        monitor_dtor_guard_t __guard = { &m, (fptr_t)dtor, true };
     334        {
     335                return this;
     336        }
     337}
     338
    309339// Enter multiple monitor
    310340// relies on the monitor array being sorted
     
    366396// Ctor for monitor guard
    367397// Sorts monitors before entering
    368 void ?{}( monitor_dtor_guard_t & this, $monitor * m [], fptr_t func ) {
     398void ?{}( monitor_dtor_guard_t & this, $monitor * m [], fptr_t func, bool join ) {
    369399        // optimization
    370400        $thread * thrd = TL_GET( this_thread );
     
    376406        this.prev = thrd->monitors;
    377407
     408        // Save whether we are in a join or not
     409        this.join = join;
     410
    378411        // Update thread context (needed for conditions)
    379412        (thrd->monitors){m, 1, func};
    380413
    381         __dtor_enter( this.m, func );
     414        __dtor_enter( this.m, func, join );
    382415}
    383416
     
    385418void ^?{}( monitor_dtor_guard_t & this ) {
    386419        // Leave the monitors in order
    387         __dtor_leave( this.m );
     420        __dtor_leave( this.m, this.join );
    388421
    389422        // Restore thread context
  • libcfa/src/concurrency/monitor.hfa

    r1c507eb r7a80113  
    5353        $monitor *    m;
    5454        __monitor_group_t prev;
     55        bool join;
    5556};
    5657
    57 void ?{}( monitor_dtor_guard_t & this, $monitor ** m, void (*func)() );
     58void ?{}( monitor_dtor_guard_t & this, $monitor ** m, void (*func)(), bool join );
    5859void ^?{}( monitor_dtor_guard_t & this );
    5960
  • libcfa/src/concurrency/thread.hfa

    r1c507eb r7a80113  
    106106void sleep( Duration duration );
    107107
     108//----------
     109// join
     110forall( dtype T | is_thread(T) )
     111T & join( T & this );
     112
    108113// Local Variables: //
    109114// mode: c //
  • libcfa/src/exception.h

    r1c507eb r7a80113  
    7676// implemented in the .c file either so they all have to be inline.
    7777
    78 trait is_exception(dtype T) {
     78trait is_exception(dtype exceptT) {
    7979        /* The first field must be a pointer to a virtual table.
    8080         * That virtual table must be a decendent of the base exception virtual tab$
    8181         */
    82         void mark_exception(T *);
     82        void mark_exception(exceptT *);
    8383        // This is never used and should be a no-op.
    8484};
    8585
    86 trait is_termination_exception(dtype T | is_exception(T)) {
    87         void defaultTerminationHandler(T &);
     86trait is_termination_exception(dtype exceptT | is_exception(exceptT)) {
     87        void defaultTerminationHandler(exceptT &);
    8888};
    8989
    90 trait is_resumption_exception(dtype T | is_exception(T)) {
    91         void defaultResumptionHandler(T &);
     90trait is_resumption_exception(dtype exceptT | is_exception(exceptT)) {
     91        void defaultResumptionHandler(exceptT &);
    9292};
    9393
    94 forall(dtype T | is_termination_exception(T))
    95 static inline void $throw(T & except) {
     94forall(dtype exceptT | is_termination_exception(exceptT))
     95static inline void $throw(exceptT & except) {
    9696        __cfaehm_throw_terminate(
    9797                (exception_t *)&except,
     
    100100}
    101101
    102 forall(dtype T | is_resumption_exception(T))
    103 static inline void $throwResume(T & except) {
     102forall(dtype exceptT | is_resumption_exception(exceptT))
     103static inline void $throwResume(exceptT & except) {
    104104        __cfaehm_throw_resume(
    105105                (exception_t *)&except,
     
    108108}
    109109
    110 forall(dtype T | is_exception(T))
    111 static inline void cancel_stack(T & except) __attribute__((noreturn)) {
     110forall(dtype exceptT | is_exception(exceptT))
     111static inline void cancel_stack(exceptT & except) __attribute__((noreturn)) {
    112112        __cfaehm_cancel_stack( (exception_t *)&except );
    113113}
    114114
    115 forall(dtype T | is_exception(T))
    116 static inline void defaultTerminationHandler(T & except) {
     115forall(dtype exceptT | is_exception(exceptT))
     116static inline void defaultTerminationHandler(exceptT & except) {
    117117        return cancel_stack( except );
    118118}
    119119
    120 forall(dtype T | is_exception(T))
    121 static inline void defaultResumptionHandler(T & except) {
     120forall(dtype exceptT | is_exception(exceptT))
     121static inline void defaultResumptionHandler(exceptT & except) {
    122122        throw except;
    123123}
  • libcfa/src/exception.hfa

    r1c507eb r7a80113  
    192192                size_t size; \
    193193                void (*copy)(exception_name * this, exception_name * other); \
    194                 void (*free)(exception_name & this); \
     194                void (*^?{})(exception_name & this); \
    195195                const char * (*msg)(exception_name * this); \
    196196                _CLOSE
     
    213213                size_t size; \
    214214                void (*copy)(exception_name parameters * this, exception_name parameters * other); \
    215                 void (*free)(exception_name parameters & this); \
     215                void (*^?{})(exception_name parameters & this); \
    216216                const char * (*msg)(exception_name parameters * this); \
    217217                _CLOSE
  • libcfa/src/heap.cfa

    r1c507eb r7a80113  
    1010// Created On       : Tue Dec 19 21:58:35 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Sep  3 16:22:54 2020
    13 // Update Count     : 943
     12// Last Modified On : Mon Sep  7 22:17:46 2020
     13// Update Count     : 957
    1414//
    1515
     
    889889                size_t bsize, oalign;
    890890                headers( "resize", oaddr, header, freeElem, bsize, oalign );
    891 
    892891                size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
     892
    893893                // same size, DO NOT preserve STICKY PROPERTIES.
    894                 if ( oalign <= libAlign() && size <= odsize && odsize <= size * 2 ) { // allow 50% wasted storage for smaller size
     894                if ( oalign == libAlign() && size <= odsize && odsize <= size * 2 ) { // allow 50% wasted storage for smaller size
    895895                        header->kind.real.blockSize &= -2;                      // no alignment and turn off 0 fill
    896896                        header->kind.real.size = size;                          // reset allocation size
     
    931931                size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
    932932                size_t osize = header->kind.real.size;                  // old allocation size
    933                 bool ozfill = (header->kind.real.blockSize & 2) != 0; // old allocation zero filled
    934           if ( unlikely( size <= odsize ) && size > odsize / 2 ) { // allow up to 50% wasted storage
     933                bool ozfill = (header->kind.real.blockSize & 2); // old allocation zero filled
     934          if ( unlikely( size <= odsize ) && odsize <= size * 2 ) { // allow up to 50% wasted storage
    935935                        header->kind.real.size = size;                          // reset allocation size
    936936                        if ( unlikely( ozfill ) && size > osize ) {     // previous request zero fill and larger ?
     
    947947
    948948                void * naddr;
    949                 if ( likely( oalign <= libAlign() ) ) {                 // previous request not aligned ?
     949                if ( likely( oalign == libAlign() ) ) {                 // previous request not aligned ?
    950950                        naddr = mallocNoStats( size );                          // create new area
    951951                } else {
     
    12311231        } // if
    12321232
    1233         // Attempt to reuse existing storage.
     1233        // Attempt to reuse existing alignment.
    12341234        HeapManager.Storage.Header * header = headerAddr( oaddr );
    1235         if ( unlikely ( ( header->kind.fake.alignment & 1 == 1 &&       // old fake header ?
    1236                                  (uintptr_t)oaddr % nalign == 0 &&                              // lucky match ?
    1237                                  header->kind.fake.alignment <= nalign &&               // ok to leave LSB at 1
    1238                                  nalign <= 128 )                                                                // not too much alignment storage wasted ?
    1239                         ||   ( header->kind.fake.alignment & 1 != 1 &&          // old real header ( aligned on libAlign ) ?
    1240                                  nalign == libAlign() ) ) ) {                                   // new alignment also on libAlign
    1241 
    1242                 HeapManager.FreeHeader * freeElem;
    1243                 size_t bsize, oalign;
    1244                 headers( "resize", oaddr, header, freeElem, bsize, oalign );
    1245                 size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
    1246 
    1247                 if ( size <= odsize && odsize <= size * 2 ) { // allow 50% wasted data storage
     1235        bool isFakeHeader = header->kind.fake.alignment & 1; // old fake header ?
     1236        size_t oalign;
     1237        if ( isFakeHeader ) {
     1238                oalign = header->kind.fake.alignment & -2;              // old alignment
     1239                if ( (uintptr_t)oaddr % nalign == 0                             // lucky match ?
     1240                         && ( oalign <= nalign                                          // going down
     1241                                  || (oalign >= nalign && oalign <= 256) ) // little alignment storage wasted ?
     1242                        ) {
    12481243                        headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
    1249 
    1250                         header->kind.real.blockSize &= -2;              // turn off 0 fill
    1251                         header->kind.real.size = size;                  // reset allocation size
    1252                         return oaddr;
    1253                 } // if
     1244                        HeapManager.FreeHeader * freeElem;
     1245                        size_t bsize, oalign;
     1246                        headers( "resize", oaddr, header, freeElem, bsize, oalign );
     1247                        size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
     1248
     1249                        if ( size <= odsize && odsize <= size * 2 ) { // allow 50% wasted data storage
     1250                                headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
     1251
     1252                                header->kind.real.blockSize &= -2;              // turn off 0 fill
     1253                                header->kind.real.size = size;                  // reset allocation size
     1254                                return oaddr;
     1255                        } // if
     1256                } // if
     1257        } else if ( ! isFakeHeader                                                      // old real header (aligned on libAlign) ?
     1258                                && nalign == libAlign() ) {                             // new alignment also on libAlign => no fake header needed
     1259                return resize( oaddr, size );                                   // duplicate special case checks
    12541260        } // if
    12551261
     
    12811287        } // if
    12821288
    1283         HeapManager.Storage.Header * header;
    1284         HeapManager.FreeHeader * freeElem;
    1285         size_t bsize, oalign;
    1286         headers( "realloc", oaddr, header, freeElem, bsize, oalign );
    1287 
    1288         // Attempt to reuse existing storage.
    1289         if ( unlikely ( ( header->kind.fake.alignment & 1 == 1 &&       // old fake header ?
    1290                                  (uintptr_t)oaddr % nalign == 0 &&                              // lucky match ?
    1291                                  header->kind.fake.alignment <= nalign &&               // ok to leave LSB at 1
    1292                                  nalign <= 128 )                                                                // not too much alignment storage wasted ?
    1293                         ||   ( header->kind.fake.alignment & 1 != 1 &&          // old real header ( aligned on libAlign ) ?
    1294                                  nalign == libAlign() ) ) ) {                                   // new alignment also on libAlign
    1295 
    1296                 headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
    1297                 return realloc( oaddr, size );
    1298 
    1299         } // if
    1300 
    1301         // change size and copy old content to new storage
     1289        // Attempt to reuse existing alignment.
     1290        HeapManager.Storage.Header * header = headerAddr( oaddr );
     1291        bool isFakeHeader = header->kind.fake.alignment & 1; // old fake header ?
     1292        size_t oalign;
     1293        if ( isFakeHeader ) {
     1294                oalign = header->kind.fake.alignment & -2;              // old alignment
     1295                if ( (uintptr_t)oaddr % nalign == 0                             // lucky match ?
     1296                         && ( oalign <= nalign                                          // going down
     1297                                  || (oalign >= nalign && oalign <= 256) ) // little alignment storage wasted ?
     1298                        ) {
     1299                        headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
     1300                        return realloc( oaddr, size );                          // duplicate alignment and special case checks
     1301                } // if
     1302        } else if ( ! isFakeHeader                                                      // old real header (aligned on libAlign) ?
     1303                                && nalign == libAlign() )                               // new alignment also on libAlign => no fake header needed
     1304                return realloc( oaddr, size );                                  // duplicate alignment and special case checks
    13021305
    13031306        #ifdef __STATISTICS__
     
    13061309        #endif // __STATISTICS__
    13071310
     1311        HeapManager.FreeHeader * freeElem;
     1312        size_t bsize;
     1313        headers( "realloc", oaddr, header, freeElem, bsize, oalign );
     1314
     1315        // change size and copy old content to new storage
     1316
    13081317        size_t osize = header->kind.real.size;                          // old allocation size
    1309         bool ozfill = (header->kind.real.blockSize & 2) != 0; // old allocation zero filled
     1318        bool ozfill = (header->kind.real.blockSize & 2);        // old allocation zero filled
    13101319
    13111320        void * naddr = memalignNoStats( nalign, size );         // create new aligned area
  • src/AST/Convert.cpp

    r1c507eb r7a80113  
    11621162        }
    11631163
    1164         const ast::Type * postvisit( const ast::ReferenceToType * old, ReferenceToType * ty ) {
     1164        const ast::Type * postvisit( const ast::BaseInstType * old, ReferenceToType * ty ) {
    11651165                ty->forall = get<TypeDecl>().acceptL( old->forall );
    11661166                ty->parameters = get<Expression>().acceptL( old->params );
     
    25212521        }
    25222522
    2523         void postvisit( const ReferenceToType * old, ast::ReferenceToType * ty ) {
     2523        void postvisit( const ReferenceToType * old, ast::BaseInstType * ty ) {
    25242524                ty->forall = GET_ACCEPT_V( forall, TypeDecl );
    25252525                ty->params = GET_ACCEPT_V( parameters, Expr );
  • src/AST/Fwd.hpp

    r1c507eb r7a80113  
    107107class QualifiedType;
    108108class FunctionType;
    109 class ReferenceToType;
     109class BaseInstType;
    110110template<typename decl_t> class SueInstType;
    111111using StructInstType = SueInstType<StructDecl>;
  • src/AST/GenericSubstitution.cpp

    r1c507eb r7a80113  
    4242        private:
    4343                // make substitution for generic type
    44                 void makeSub( const ReferenceToType * ty ) {
     44                void makeSub( const BaseInstType * ty ) {
    4545                        visit_children = false;
    4646                        const AggregateDecl * aggr = ty->aggr();
  • src/AST/Node.cpp

    r1c507eb r7a80113  
    266266template class ast::ptr_base< ast::FunctionType, ast::Node::ref_type::weak >;
    267267template class ast::ptr_base< ast::FunctionType, ast::Node::ref_type::strong >;
    268 template class ast::ptr_base< ast::ReferenceToType, ast::Node::ref_type::weak >;
    269 template class ast::ptr_base< ast::ReferenceToType, ast::Node::ref_type::strong >;
     268template class ast::ptr_base< ast::BaseInstType, ast::Node::ref_type::weak >;
     269template class ast::ptr_base< ast::BaseInstType, ast::Node::ref_type::strong >;
    270270template class ast::ptr_base< ast::StructInstType, ast::Node::ref_type::weak >;
    271271template class ast::ptr_base< ast::StructInstType, ast::Node::ref_type::strong >;
  • src/AST/Pass.hpp

    r1c507eb r7a80113  
    5050// | PureVisitor           - makes the visitor pure, it never modifies nodes in place and always
    5151//                           clones nodes it needs to make changes to
    52 // | WithTypeSubstitution  - provides polymorphic const TypeSubstitution * env for the
     52// | WithConstTypeSubstitution - provides polymorphic const TypeSubstitution * typeSubs for the
    5353//                           current expression
    5454// | WithStmtsToAdd        - provides the ability to insert statements before or after the current
     
    6767// | WithSymbolTable       - provides symbol table functionality
    6868// | WithForallSubstitutor - maintains links between TypeInstType and TypeDecl under mutation
     69//
     70// Other Special Members:
     71// | result                - Either a method that takes no parameters or a field. If a method (or
     72//                           callable field) get_result calls it, otherwise the value is returned.
    6973//-------------------------------------------------------------------------------------------------
    7074template< typename core_t >
     
    8993        virtual ~Pass() = default;
    9094
     95        /// Storage for the actual pass.
     96        core_t core;
     97
     98        /// If the core defines a result, call it if possible, otherwise return it.
     99        inline auto get_result() -> decltype( __pass::get_result( core, '0' ) ) {
     100                return __pass::get_result( core, '0' );
     101        }
     102
    91103        /// Construct and run a pass on a translation unit.
    92104        template< typename... Args >
     
    96108        }
    97109
     110        /// Contruct and run a pass on a pointer to extract a value.
     111        template< typename node_type, typename... Args >
     112        static auto read( node_type const * node, Args&&... args ) {
     113                Pass<core_t> visitor( std::forward<Args>( args )... );
     114                node_type const * temp = node->accept( visitor );
     115                assert( temp == node );
     116                return visitor.get_result();
     117        }
     118
     119        // Versions of the above for older compilers.
    98120        template< typename... Args >
    99121        static void run( std::list< ptr<Decl> > & decls ) {
     
    102124        }
    103125
    104         /// Storage for the actual pass
    105         core_t core;
     126        template< typename node_type, typename... Args >
     127        static auto read( node_type const * node ) {
     128                Pass<core_t> visitor;
     129                node_type const * temp = node->accept( visitor );
     130                assert( temp == node );
     131                return visitor.get_result();
     132        }
    106133
    107134        /// Visit function declarations
     
    267294//-------------------------------------------------------------------------------------------------
    268295
    269 /// Keep track of the polymorphic const TypeSubstitution * env for the current expression
    270 
    271296/// If used the visitor will always clone nodes.
    272297struct PureVisitor {};
    273298
     299/// Keep track of the polymorphic const TypeSubstitution * typeSubs for the current expression.
    274300struct WithConstTypeSubstitution {
    275         const TypeSubstitution * env = nullptr;
     301        const TypeSubstitution * typeSubs = nullptr;
    276302};
    277303
  • src/AST/Pass.impl.hpp

    r1c507eb r7a80113  
    154154                __pedantic_pass_assert( expr );
    155155
    156                 const ast::TypeSubstitution ** env_ptr = __pass::env( core, 0);
    157                 if ( env_ptr && expr->env ) {
    158                         *env_ptr = expr->env;
     156                const ast::TypeSubstitution ** typeSubs_ptr = __pass::typeSubs( core, 0 );
     157                if ( typeSubs_ptr && expr->env ) {
     158                        *typeSubs_ptr = expr->env;
    159159                }
    160160
     
    177177
    178178                // These may be modified by subnode but most be restored once we exit this statemnet.
    179                 ValueGuardPtr< const ast::TypeSubstitution * > __old_env         ( __pass::env( core, 0) );
     179                ValueGuardPtr< const ast::TypeSubstitution * > __old_env         ( __pass::typeSubs( core, 0 ) );
    180180                ValueGuardPtr< typename std::remove_pointer< decltype(stmts_before) >::type > __old_decls_before( stmts_before );
    181181                ValueGuardPtr< typename std::remove_pointer< decltype(stmts_after ) >::type > __old_decls_after ( stmts_after  );
     
    14881488
    14891489                // These may be modified by subnode but most be restored once we exit this statemnet.
    1490                 ValueGuardPtr< const ast::TypeSubstitution * > __old_env( __pass::env( core, 0) );
     1490                ValueGuardPtr< const ast::TypeSubstitution * > __old_env( __pass::typeSubs( core, 0 ) );
    14911491                ValueGuardPtr< typename std::remove_pointer< decltype(stmts_before) >::type > __old_decls_before( stmts_before );
    14921492                ValueGuardPtr< typename std::remove_pointer< decltype(stmts_after ) >::type > __old_decls_after ( stmts_after  );
  • src/AST/Pass.proto.hpp

    r1c507eb r7a80113  
    236236
    237237        // List of fields and their expected types
    238         FIELD_PTR( env, const ast::TypeSubstitution * )
     238        FIELD_PTR( typeSubs, const ast::TypeSubstitution * )
    239239        FIELD_PTR( stmtsToAddBefore, std::list< ast::ptr< ast::Stmt > > )
    240240        FIELD_PTR( stmtsToAddAfter , std::list< ast::ptr< ast::Stmt > > )
     
    421421
    422422        } // namespace forall
     423
     424        template<typename core_t>
     425        static inline auto get_result( core_t & core, char ) -> decltype( core.result() ) {
     426                return core.result();
     427        }
     428
     429        template<typename core_t>
     430        static inline auto get_result( core_t & core, int ) -> decltype( core.result ) {
     431                return core.result;
     432        }
     433
     434        template<typename core_t>
     435        static inline void get_result( core_t &, long ) {}
    423436} // namespace __pass
    424437} // namespace ast
  • src/AST/Print.cpp

    r1c507eb r7a80113  
    270270        }
    271271
    272         void preprint( const ast::ReferenceToType * node ) {
     272        void preprint( const ast::BaseInstType * node ) {
    273273                print( node->forall );
    274274                print( node->attributes );
  • src/AST/SymbolTable.cpp

    r1c507eb r7a80113  
    313313                if ( ! expr->result ) continue;
    314314                const Type * resTy = expr->result->stripReferences();
    315                 auto aggrType = dynamic_cast< const ReferenceToType * >( resTy );
     315                auto aggrType = dynamic_cast< const BaseInstType * >( resTy );
    316316                assertf( aggrType, "WithStmt expr has non-aggregate type: %s",
    317317                        toString( expr->result ).c_str() );
     
    654654                        if ( dwt->name == "" ) {
    655655                                const Type * t = dwt->get_type()->stripReferences();
    656                                 if ( auto rty = dynamic_cast<const ReferenceToType *>( t ) ) {
     656                                if ( auto rty = dynamic_cast<const BaseInstType *>( t ) ) {
    657657                                        if ( ! dynamic_cast<const StructInstType *>(rty)
    658658                                                && ! dynamic_cast<const UnionInstType *>(rty) ) continue;
  • src/AST/Type.cpp

    r1c507eb r7a80113  
    124124}
    125125
    126 // --- ReferenceToType
    127 
    128 void ReferenceToType::initWithSub( const ReferenceToType & o, Pass< ForallSubstitutor > & sub ) {
     126// --- BaseInstType
     127
     128void BaseInstType::initWithSub( const BaseInstType & o, Pass< ForallSubstitutor > & sub ) {
    129129        ParameterizedType::initWithSub( o, sub ); // initialize substitution
    130130        params = sub.core( o.params );            // apply to parameters
    131131}
    132132
    133 ReferenceToType::ReferenceToType( const ReferenceToType & o )
     133BaseInstType::BaseInstType( const BaseInstType & o )
    134134: ParameterizedType( o.qualifiers, copy( o.attributes ) ), params(), name( o.name ),
    135135  hoistType( o.hoistType ) {
     
    138138}
    139139
    140 std::vector<readonly<Decl>> ReferenceToType::lookup( const std::string& name ) const {
     140std::vector<readonly<Decl>> BaseInstType::lookup( const std::string& name ) const {
    141141        assertf( aggr(), "Must have aggregate to perform lookup" );
    142142
     
    153153SueInstType<decl_t>::SueInstType(
    154154        const decl_t * b, CV::Qualifiers q, std::vector<ptr<Attribute>>&& as )
    155 : ReferenceToType( b->name, q, move(as) ), base( b ) {}
     155: BaseInstType( b->name, q, move(as) ), base( b ) {}
    156156
    157157template<typename decl_t>
     
    168168TraitInstType::TraitInstType(
    169169        const TraitDecl * b, CV::Qualifiers q, std::vector<ptr<Attribute>>&& as )
    170 : ReferenceToType( b->name, q, move(as) ), base( b ) {}
     170: BaseInstType( b->name, q, move(as) ), base( b ) {}
    171171
    172172// --- TypeInstType
    173173
    174174TypeInstType::TypeInstType( const TypeInstType & o )
    175 : ReferenceToType( o.name, o.qualifiers, copy( o.attributes ) ), base(), kind( o.kind ) {
     175: BaseInstType( o.name, o.qualifiers, copy( o.attributes ) ), base(), kind( o.kind ) {
    176176        Pass< ForallSubstitutor > sub;
    177177        initWithSub( o, sub );      // initialize substitution
  • src/AST/Type.hpp

    r1c507eb r7a80113  
    329329
    330330/// base class for types that refer to types declared elsewhere (aggregates and typedefs)
    331 class ReferenceToType : public ParameterizedType {
     331class BaseInstType : public ParameterizedType {
    332332protected:
    333333        /// Initializes forall and parameters based on substitutor
    334         void initWithSub( const ReferenceToType & o, Pass< ForallSubstitutor > & sub );
     334        void initWithSub( const BaseInstType & o, Pass< ForallSubstitutor > & sub );
    335335public:
    336336        std::vector<ptr<Expr>> params;
     
    338338        bool hoistType = false;
    339339
    340         ReferenceToType(
     340        BaseInstType(
    341341                const std::string& n, CV::Qualifiers q = {}, std::vector<ptr<Attribute>> && as = {} )
    342342        : ParameterizedType(q, std::move(as)), params(), name(n) {}
    343343
    344         ReferenceToType( const ReferenceToType & o );
     344        BaseInstType( const BaseInstType & o );
    345345
    346346        /// Gets aggregate declaration this type refers to
     
    350350
    351351private:
    352         virtual ReferenceToType * clone() const override = 0;
     352        virtual BaseInstType * clone() const override = 0;
    353353        MUTATE_FRIEND
    354354};
     
    356356// Common implementation for the SUE instance types. Not to be used directly.
    357357template<typename decl_t>
    358 class SueInstType final : public ReferenceToType {
     358class SueInstType final : public BaseInstType {
    359359public:
    360360        using base_type = decl_t;
     
    363363        SueInstType(
    364364                const std::string& n, CV::Qualifiers q = {}, std::vector<ptr<Attribute>> && as = {} )
    365         : ReferenceToType( n, q, std::move(as) ), base() {}
     365        : BaseInstType( n, q, std::move(as) ), base() {}
    366366
    367367        SueInstType(
     
    388388
    389389/// An instance of a trait type.
    390 class TraitInstType final : public ReferenceToType {
     390class TraitInstType final : public BaseInstType {
    391391public:
    392392        readonly<TraitDecl> base;
     
    394394        TraitInstType(
    395395                const std::string& n, CV::Qualifiers q = {}, std::vector<ptr<Attribute>> && as = {} )
    396         : ReferenceToType( n, q, std::move(as) ), base() {}
     396        : BaseInstType( n, q, std::move(as) ), base() {}
    397397
    398398        TraitInstType(
     
    411411
    412412/// instance of named type alias (typedef or variable)
    413 class TypeInstType final : public ReferenceToType {
     413class TypeInstType final : public BaseInstType {
    414414public:
    415415        readonly<TypeDecl> base;
     
    419419                const std::string& n, const TypeDecl * b, CV::Qualifiers q = {},
    420420                std::vector<ptr<Attribute>> && as = {} )
    421         : ReferenceToType( n, q, std::move(as) ), base( b ), kind( b->kind ) {}
     421        : BaseInstType( n, q, std::move(as) ), base( b ), kind( b->kind ) {}
    422422        TypeInstType( const std::string& n, TypeDecl::Kind k, CV::Qualifiers q = {},
    423423                std::vector<ptr<Attribute>> && as = {} )
    424         : ReferenceToType( n, q, std::move(as) ), base(), kind( k ) {}
     424        : BaseInstType( n, q, std::move(as) ), base(), kind( k ) {}
    425425
    426426        TypeInstType( const TypeInstType & o );
  • src/AST/TypeSubstitution.cpp

    r1c507eb r7a80113  
    176176}
    177177
    178 void TypeSubstitution::Substituter::handleAggregateType( const ReferenceToType * type ) {
     178void TypeSubstitution::Substituter::handleAggregateType( const BaseInstType * type ) {
    179179        GuardValue( boundVars );
    180180        // bind type variables from forall-qualifiers
  • src/AST/TypeSubstitution.hpp

    r1c507eb r7a80113  
    169169                void previsit( const ParameterizedType * type );
    170170                /// Records type variable bindings from forall-statements and instantiations of generic types
    171                 void handleAggregateType( const ReferenceToType * type );
     171                void handleAggregateType( const BaseInstType * type );
    172172
    173173                void previsit( const StructInstType * aggregateUseType );
  • src/Common/Stats/Stats.cc

    r1c507eb r7a80113  
    3535        }
    3636
     37        namespace ResolveTime {
     38                bool enabled = false;
     39        }
     40
    3741        struct {
    3842                const char * const opt;
     
    4347                { "heap"    , Heap::enabled },
    4448                { "time"    , Time::enabled },
     49                { "resolve" , ResolveTime::enabled },
    4550        };
    4651
  • src/Common/module.mk

    r1c507eb r7a80113  
    2222      Common/ErrorObjects.h \
    2323      Common/Eval.cc \
     24      Common/Examine.cc \
     25      Common/Examine.h \
    2426      Common/FilterCombos.h \
    2527      Common/Indenter.h \
     
    3840      Common/Stats/Heap.cc \
    3941      Common/Stats/Heap.h \
     42      Common/Stats/ResolveTime.cc \
     43      Common/Stats/ResolveTime.h \
    4044      Common/Stats/Stats.cc \
    4145      Common/Stats/Time.cc \
  • src/Concurrency/Keywords.cc

    r1c507eb r7a80113  
    1919#include <string>                         // for string, operator==
    2020
     21#include <iostream>
     22
     23#include "Common/Examine.h"               // for isMainFor
    2124#include "Common/PassVisitor.h"           // for PassVisitor
    2225#include "Common/SemanticError.h"         // for SemanticError
     
    3437#include "SynTree/Type.h"                 // for StructInstType, Type, PointerType
    3538#include "SynTree/Visitor.h"              // for Visitor, acceptAll
     39#include "Virtual/Tables.h"
    3640
    3741class Attribute;
    3842
    3943namespace Concurrency {
     44        inline static std::string getVTableName( std::string const & exception_name ) {
     45                return exception_name.empty() ? std::string() : Virtual::vtableTypeName(exception_name);
     46        }
     47
    4048        //=============================================================================================
    4149        // Pass declarations
     
    5462          public:
    5563
    56                 ConcurrentSueKeyword( std::string&& type_name, std::string&& field_name, std::string&& getter_name, std::string&& context_error, bool needs_main, AggregateDecl::Aggregate cast_target ) :
    57                   type_name( type_name ), field_name( field_name ), getter_name( getter_name ), context_error( context_error ), needs_main( needs_main ), cast_target( cast_target ) {}
     64                ConcurrentSueKeyword( std::string&& type_name, std::string&& field_name,
     65                        std::string&& getter_name, std::string&& context_error, std::string&& exception_name,
     66                        bool needs_main, AggregateDecl::Aggregate cast_target ) :
     67                  type_name( type_name ), field_name( field_name ), getter_name( getter_name ),
     68                  context_error( context_error ), vtable_name( getVTableName( exception_name ) ),
     69                  needs_main( needs_main ), cast_target( cast_target ) {}
    5870
    5971                virtual ~ConcurrentSueKeyword() {}
     
    6375
    6476                void handle( StructDecl * );
     77                void addVtableForward( StructDecl * );
    6578                FunctionDecl * forwardDeclare( StructDecl * );
    6679                ObjectDecl * addField( StructDecl * );
     
    7689                const std::string getter_name;
    7790                const std::string context_error;
     91                const std::string vtable_name;
    7892                bool needs_main;
    7993                AggregateDecl::Aggregate cast_target;
     
    8195                StructDecl   * type_decl = nullptr;
    8296                FunctionDecl * dtor_decl = nullptr;
     97                StructDecl * vtable_decl = nullptr;
    8398        };
    8499
     
    101116                        "get_thread",
    102117                        "thread keyword requires threads to be in scope, add #include <thread.hfa>\n",
     118                        "",
    103119                        true,
    104120                        AggregateDecl::Thread
     
    133149                        "get_coroutine",
    134150                        "coroutine keyword requires coroutines to be in scope, add #include <coroutine.hfa>\n",
     151                        "CoroutineCancelled",
    135152                        true,
    136153                        AggregateDecl::Coroutine
     
    167184                        "get_monitor",
    168185                        "monitor keyword requires monitors to be in scope, add #include <monitor.hfa>\n",
     186                        "",
    169187                        false,
    170188                        AggregateDecl::Monitor
     
    198216                        "get_generator",
    199217                        "Unable to find builtin type $generator\n",
     218                        "",
    200219                        true,
    201220                        AggregateDecl::Generator
     
    231250
    232251        private:
    233                 DeclarationWithType * is_main( FunctionDecl * );
    234252                bool is_real_suspend( FunctionDecl * );
    235253
     
    359377                        handle( decl );
    360378                }
     379                else if ( !vtable_decl && vtable_name == decl->name && decl->body ) {
     380                        vtable_decl = decl;
     381                }
     382                // Might be able to get ride of is target.
     383                assert( is_target(decl) == (cast_target == decl->kind) );
    361384                return decl;
    362385        }
    363386
    364387        DeclarationWithType * ConcurrentSueKeyword::postmutate( FunctionDecl * decl ) {
    365                 if( !type_decl ) return decl;
    366                 if( !CodeGen::isDestructor( decl->name ) ) return decl;
    367 
    368                 auto params = decl->type->parameters;
    369                 if( params.size() != 1 ) return decl;
    370 
    371                 auto type = dynamic_cast<ReferenceType*>( params.front()->get_type() );
    372                 if( !type ) return decl;
    373 
    374                 auto stype = dynamic_cast<StructInstType*>( type->base );
    375                 if( !stype ) return decl;
    376                 if( stype->baseStruct != type_decl ) return decl;
    377 
    378                 if( !dtor_decl ) dtor_decl = decl;
     388                if ( type_decl && isDestructorFor( decl, type_decl ) )
     389                        dtor_decl = decl;
     390                else if ( vtable_name.empty() )
     391                        ;
     392                else if ( auto param = isMainFor( decl, cast_target ) ) {
     393                        // This should never trigger.
     394                        assert( vtable_decl );
     395                        // Should be safe because of isMainFor.
     396                        StructInstType * struct_type = static_cast<StructInstType *>(
     397                                static_cast<ReferenceType *>( param->get_type() )->base );
     398                        assert( struct_type );
     399
     400                        declsToAddAfter.push_back( Virtual::makeVtableInstance( vtable_decl, {
     401                                new TypeExpr( struct_type->clone() ),
     402                        }, struct_type, nullptr ) );
     403                }
     404
    379405                return decl;
    380406        }
     
    400426                if( !dtor_decl ) SemanticError( decl, context_error );
    401427
     428                addVtableForward( decl );
    402429                FunctionDecl * func = forwardDeclare( decl );
    403430                ObjectDecl * field = addField( decl );
    404431                addRoutines( field, func );
     432        }
     433
     434        void ConcurrentSueKeyword::addVtableForward( StructDecl * decl ) {
     435                if ( vtable_decl ) {
     436                        declsToAddBefore.push_back( Virtual::makeVtableForward( vtable_decl, {
     437                                new TypeExpr( new StructInstType( noQualifiers, decl ) ),
     438                        } ) );
     439                // Its only an error if we want a vtable and don't have one.
     440                } else if ( ! vtable_name.empty() ) {
     441                        SemanticError( decl, context_error );
     442                }
    405443        }
    406444
     
    528566        // Suspend keyword implementation
    529567        //=============================================================================================
    530         DeclarationWithType * SuspendKeyword::is_main( FunctionDecl * func) {
    531                 if(func->name != "main") return nullptr;
    532                 if(func->type->parameters.size() != 1) return nullptr;
    533 
    534                 auto param = func->type->parameters.front();
    535 
    536                 auto type  = dynamic_cast<ReferenceType * >(param->get_type());
    537                 if(!type) return nullptr;
    538 
    539                 auto obj   = dynamic_cast<StructInstType *>(type->base);
    540                 if(!obj) return nullptr;
    541 
    542                 if(!obj->baseStruct->is_generator()) return nullptr;
    543 
    544                 return param;
    545         }
    546 
    547568        bool SuspendKeyword::is_real_suspend( FunctionDecl * func ) {
    548569                if(isMangled(func->linkage)) return false; // the real suspend isn't mangled
     
    565586
    566587                // Is this the main of a generator?
    567                 auto param = is_main( func );
     588                auto param = isMainFor( func, AggregateDecl::Aggregate::Generator );
    568589                if(!param) return;
    569590
     
    910931                                        {
    911932                                                new SingleInit( new AddressExpr( new VariableExpr( monitors ) ) ),
    912                                                 new SingleInit( new CastExpr( new VariableExpr( func ), generic_func->clone(), false ) )
     933                                                new SingleInit( new CastExpr( new VariableExpr( func ), generic_func->clone(), false ) ),
     934                                                new SingleInit( new ConstantExpr( Constant::from_bool( false ) ) )
    913935                                        },
    914936                                        noDesignators,
     
    10331055// tab-width: 4 //
    10341056// End: //
     1057
  • src/ResolvExpr/CandidateFinder.cpp

    r1c507eb r7a80113  
    816816                /// Adds aggregate member interpretations
    817817                void addAggMembers(
    818                         const ast::ReferenceToType * aggrInst, const ast::Expr * expr,
     818                        const ast::BaseInstType * aggrInst, const ast::Expr * expr,
    819819                        const Candidate & cand, const Cost & addedCost, const std::string & name
    820820                ) {
     
    12631263
    12641264                void postvisit( const ast::UntypedOffsetofExpr * offsetofExpr ) {
    1265                         const ast::ReferenceToType * aggInst;
     1265                        const ast::BaseInstType * aggInst;
    12661266                        if (( aggInst = offsetofExpr->type.as< ast::StructInstType >() )) ;
    12671267                        else if (( aggInst = offsetofExpr->type.as< ast::UnionInstType >() )) ;
  • src/ResolvExpr/ConversionCost.cc

    r1c507eb r7a80113  
    520520                return convertToReferenceCost( src, refType, srcIsLvalue, symtab, env, localPtrsAssignable );
    521521        } else {
    522                 ast::Pass<ConversionCost_new> converter( dst, srcIsLvalue, symtab, env, localConversionCost );
    523                 src->accept( converter );
    524                 return converter.core.cost;
     522                return ast::Pass<ConversionCost_new>::read( src, dst, srcIsLvalue, symtab, env, localConversionCost );
    525523        }
    526524}
     
    563561                        }
    564562                } else {
    565                         ast::Pass<ConversionCost_new> converter( dst, srcIsLvalue, symtab, env, localConversionCost );
    566                         src->accept( converter );
    567                         return converter.core.cost;
     563                        return ast::Pass<ConversionCost_new>::read( src, dst, srcIsLvalue, symtab, env, localConversionCost );
    568564                }
    569565        } else {
  • src/ResolvExpr/ConversionCost.h

    r1c507eb r7a80113  
    8888        static size_t traceId;
    8989        Cost cost;
     90        Cost result() { return cost; }
    9091
    9192        ConversionCost_new( const ast::Type * dst, bool srcIsLvalue, const ast::SymbolTable & symtab,
  • src/ResolvExpr/CurrentObject.cc

    r1c507eb r7a80113  
    923923
    924924        MemberIterator * createMemberIterator( const CodeLocation & loc, const Type * type ) {
    925                 if ( auto aggr = dynamic_cast< const ReferenceToType * >( type ) ) {
     925                if ( auto aggr = dynamic_cast< const BaseInstType * >( type ) ) {
    926926                        if ( auto sit = dynamic_cast< const StructInstType * >( aggr ) ) {
    927927                                return new StructIterator{ loc, sit };
     
    932932                                        dynamic_cast< const EnumInstType * >( type )
    933933                                                || dynamic_cast< const TypeInstType * >( type ),
    934                                         "Encountered unhandled ReferenceToType in createMemberIterator: %s",
     934                                        "Encountered unhandled BaseInstType in createMemberIterator: %s",
    935935                                                toString( type ).c_str() );
    936936                                return new SimpleIterator{ loc, type };
     
    965965                                        DesignatorChain & d = *dit;
    966966                                        PRINT( std::cerr << "____actual: " << t << std::endl; )
    967                                         if ( auto refType = dynamic_cast< const ReferenceToType * >( t ) ) {
     967                                        if ( auto refType = dynamic_cast< const BaseInstType * >( t ) ) {
    968968                                                // concatenate identical field names
    969969                                                for ( const Decl * mem : refType->lookup( nexpr->name ) ) {
  • src/ResolvExpr/Resolver.cc

    r1c507eb r7a80113  
    3838#include "Common/PassVisitor.h"          // for PassVisitor
    3939#include "Common/SemanticError.h"        // for SemanticError
     40#include "Common/Stats/ResolveTime.h"    // for ResolveTime::start(), ResolveTime::stop()
    4041#include "Common/utility.h"              // for ValueGuard, group_iterate
    4142#include "InitTweak/GenInit.h"
     
    965966                /// Finds deleted expressions in an expression tree
    966967                struct DeleteFinder_new final : public ast::WithShortCircuiting {
    967                         const ast::DeletedExpr * delExpr = nullptr;
     968                        const ast::DeletedExpr * result = nullptr;
    968969
    969970                        void previsit( const ast::DeletedExpr * expr ) {
    970                                 if ( delExpr ) { visit_children = false; }
    971                                 else { delExpr = expr; }
     971                                if ( result ) { visit_children = false; }
     972                                else { result = expr; }
    972973                        }
    973974
    974975                        void previsit( const ast::Expr * ) {
    975                                 if ( delExpr ) { visit_children = false; }
     976                                if ( result ) { visit_children = false; }
    976977                        }
    977978                };
     
    980981        /// Check if this expression is or includes a deleted expression
    981982        const ast::DeletedExpr * findDeletedExpr( const ast::Expr * expr ) {
    982                 ast::Pass<DeleteFinder_new> finder;
    983                 expr->accept( finder );
    984                 return finder.core.delExpr;
     983                return ast::Pass<DeleteFinder_new>::read( expr );
    985984        }
    986985
     
    11711170                        const ast::Expr * untyped, const ast::SymbolTable & symtab
    11721171                ) {
    1173                         return findKindExpression( untyped, symtab );
     1172                        Stats::ResolveTime::start( untyped );
     1173                        auto res = findKindExpression( untyped, symtab );
     1174                        Stats::ResolveTime::stop();
     1175                        return res;
    11741176                }
    11751177        } // anonymous namespace
     
    12611263                const ast::ThrowStmt *       previsit( const ast::ThrowStmt * );
    12621264                const ast::CatchStmt *       previsit( const ast::CatchStmt * );
     1265                const ast::CatchStmt *       postvisit( const ast::CatchStmt * );
    12631266                const ast::WaitForStmt *     previsit( const ast::WaitForStmt * );
    12641267
     
    14931496
    14941497        const ast::CatchStmt * Resolver_new::previsit( const ast::CatchStmt * catchStmt ) {
    1495                 // TODO: This will need a fix for the decl/cond scoping problem.
     1498                // Until we are very sure this invarent (ifs that move between passes have thenPart)
     1499                // holds, check it. This allows a check for when to decode the mangling.
     1500                if ( auto ifStmt = catchStmt->body.as<ast::IfStmt>() ) {
     1501                        assert( ifStmt->thenPart );
     1502                }
     1503                // Encode the catchStmt so the condition can see the declaration.
    14961504                if ( catchStmt->cond ) {
    1497                         ast::ptr< ast::Type > boolType = new ast::BasicType{ ast::BasicType::Bool };
    1498                         catchStmt = ast::mutate_field(
    1499                                 catchStmt, &ast::CatchStmt::cond,
    1500                                 findSingleExpression( catchStmt->cond, boolType, symtab ) );
     1505                        ast::CatchStmt * stmt = mutate( catchStmt );
     1506                        stmt->body = new ast::IfStmt( stmt->location, stmt->cond, nullptr, stmt->body );
     1507                        stmt->cond = nullptr;
     1508                        return stmt;
     1509                }
     1510                return catchStmt;
     1511        }
     1512
     1513        const ast::CatchStmt * Resolver_new::postvisit( const ast::CatchStmt * catchStmt ) {
     1514                // Decode the catchStmt so everything is stored properly.
     1515                const ast::IfStmt * ifStmt = catchStmt->body.as<ast::IfStmt>();
     1516                if ( nullptr != ifStmt && nullptr == ifStmt->thenPart ) {
     1517                        assert( ifStmt->cond );
     1518                        assert( ifStmt->elsePart );
     1519                        ast::CatchStmt * stmt = ast::mutate( catchStmt );
     1520                        stmt->cond = ifStmt->cond;
     1521                        stmt->body = ifStmt->elsePart;
     1522                        // ifStmt should be implicately deleted here.
     1523                        return stmt;
    15011524                }
    15021525                return catchStmt;
  • src/SymTab/Mangler.cc

    r1c507eb r7a80113  
    437437                  private:
    438438                        void mangleDecl( const ast::DeclWithType *declaration );
    439                         void mangleRef( const ast::ReferenceToType *refType, std::string prefix );
     439                        void mangleRef( const ast::BaseInstType *refType, std::string prefix );
    440440
    441441                        void printQualifiers( const ast::Type *type );
     
    560560                }
    561561
    562                 void Mangler_new::mangleRef( const ast::ReferenceToType * refType, std::string prefix ) {
     562                void Mangler_new::mangleRef( const ast::BaseInstType * refType, std::string prefix ) {
    563563                        printQualifiers( refType );
    564564
  • src/SymTab/Validate.cc

    r1c507eb r7a80113  
    960960        }
    961961
     962        static bool isNonParameterAttribute( Attribute * attr ) {
     963                static const std::vector<std::string> bad_names = {
     964                        "aligned", "__aligned__",
     965                };
     966                for ( auto name : bad_names ) {
     967                        if ( name == attr->name ) {
     968                                return true;
     969                        }
     970                }
     971                return false;
     972        }
     973
    962974        Type * ReplaceTypedef::postmutate( TypeInstType * typeInst ) {
    963975                // instances of typedef types will come here. If it is an instance
     
    968980                        ret->location = typeInst->location;
    969981                        ret->get_qualifiers() |= typeInst->get_qualifiers();
    970                         // attributes are not carried over from typedef to function parameters/return values
    971                         if ( ! inFunctionType ) {
    972                                 ret->attributes.splice( ret->attributes.end(), typeInst->attributes );
    973                         } else {
    974                                 deleteAll( ret->attributes );
    975                                 ret->attributes.clear();
    976                         }
     982                        // GCC ignores certain attributes if they arrive by typedef, this mimics that.
     983                        if ( inFunctionType ) {
     984                                ret->attributes.remove_if( isNonParameterAttribute );
     985                        }
     986                        ret->attributes.splice( ret->attributes.end(), typeInst->attributes );
    977987                        // place instance parameters on the typedef'd type
    978988                        if ( ! typeInst->parameters.empty() ) {
     
    15081518                }
    15091519
    1510                 void checkGenericParameters( const ast::ReferenceToType * inst ) {
     1520                void checkGenericParameters( const ast::BaseInstType * inst ) {
    15111521                        for ( const ast::Expr * param : inst->params ) {
    15121522                                if ( ! dynamic_cast< const ast::TypeExpr * >( param ) ) {
  • src/Virtual/module.mk

    r1c507eb r7a80113  
    1515###############################################################################
    1616
    17 SRC += Virtual/ExpandCasts.cc Virtual/ExpandCasts.h
     17SRC += Virtual/ExpandCasts.cc Virtual/ExpandCasts.h \
     18        Virtual/Tables.cc Virtual/Tables.h
     19
     20SRCDEMANGLE += Virtual/Tables.cc
  • tests/Makefile.am

    r1c507eb r7a80113  
    3838# since automake doesn't have support for CFA we have to
    3939AM_CFLAGS = $(if $(test), 2> $(test), ) \
     40        -fdebug-prefix-map=$(abspath ${abs_srcdir})= \
     41        -fdebug-prefix-map=/tmp= \
    4042        -g \
    4143        -Wall \
     
    5860# adjusted CC but without the actual distcc call
    5961CFACCLOCAL = $(if $(DISTCC_CFA_PATH),$(DISTCC_CFA_PATH) ${ARCH_FLAGS},$(TARGET_CFA) ${DEBUG_FLAGS} ${ARCH_FLAGS})
     62CFACCLINK = $(CFACCLOCAL) $(if $(test), 2> $(test), ) $($(shell echo "${@}_FLAGSLD" | sed 's/-\|\//_/g'))
    6063
    6164PRETTY_PATH=mkdir -p $(dir $(abspath ${@})) && cd ${srcdir} &&
     
    110113% : %.cfa $(CFACCBIN)
    111114        $(CFACOMPILETEST) -c -o $(abspath ${@}).o
    112         $(CFACCLOCAL) $($(shell echo "${@}_FLAGSLD" | sed 's/-\|\//_/g')) $(abspath ${@}).o -o $(abspath ${@})
     115        $(CFACCLINK) ${@}.o -o $(abspath ${@})
     116        rm $(abspath ${@}).o
    113117
    114118# implicit rule for c++ test
     
    137141# CUSTOM TARGET
    138142#------------------------------------------------------------------------------
     143# tests that just validate syntax
     144expression : expression.cfa $(CFACCBIN)
     145        $(CFACOMPILETEST) -c -fsyntax-only 2> $(abspath ${@})
     146
    139147# expected failures
    140148# use custom target since they require a custom define and custom dependencies
     
    170178        $(CFACCLOCAL) $($(shell echo "${@}_FLAGSLD" | sed 's/-\|\//_/g')) $(abspath ${@}).o -o $(abspath ${@})
    171179
     180# Linking tests
     181# Meta tests to make sure we see linking errors (can't compile with -O2 since it may multiply number of calls)
     182linking/linkerror : linking/linkerror.cfa $(CFACCBIN)
     183        $(CFACOMPILETEST) -O0 -c -o $(abspath ${@}).o
     184        $(CFACCLINK)  -O0 ${@}.o -o $(abspath ${@})
     185        rm $(abspath ${@}).o
     186
    172187#------------------------------------------------------------------------------
    173188# Other targets
  • tests/alloc2.cfa

    r1c507eb r7a80113  
    1313void test_base( void * ip, size_t size, size_t align) {
    1414        tests_total += 1;
     15//      printf("DEBUG: starting test %d\n", tests_total);
    1516        bool passed = (malloc_size(ip) == size) && (malloc_usable_size(ip) >= size) && (malloc_alignment(ip) == align) && ((uintptr_t)ip % align  == 0);
    1617        if (!passed) {
     
    1819                tests_failed += 1;
    1920        }
     21//      printf("DEBUG: done test %d\n", tests_total);
    2022}
    2123
    2224void test_fill( void * ip_, size_t start, size_t end, char fill) {
    2325        tests_total += 1;
     26//      printf("DEBUG: starting test %d\n", tests_total);
    2427        bool passed = true;
    2528        char * ip = (char *) ip_;
     
    2932                tests_failed += 1;
    3033        }
     34//      printf("DEBUG: done test %d\n", tests_total);
    3135}
    3236
    3337void test_fill( void * ip_, size_t start, size_t end, int fill) {
    3438        tests_total += 1;
     39//      printf("DEBUG: starting test %d\n", tests_total);
    3540        bool passed = true;
    3641        int * ip = (int *) ip_;
     
    4045                tests_failed += 1;
    4146        }
     47//      printf("DEBUG: done test %d\n", tests_total);
    4248}
    4349
    4450void test_fill( void * ip_, size_t start, size_t end, int * fill) {
    4551        tests_total += 1;
     52//      printf("DEBUG: starting test %d\n", tests_total);
    4653        bool passed = (memcmp((void*)((uintptr_t)ip_ + start), (void*)fill, end) == 0);
    4754        if (!passed) {
     
    4956                tests_failed += 1;
    5057        }
     58//      printf("DEBUG: done test %d\n", tests_total);
    5159}
    5260
    5361void test_fill( void * ip_, size_t start, size_t end, T1 fill) {
    5462        tests_total += 1;
     63//      printf("DEBUG: starting test %d\n", tests_total);
    5564        bool passed = true;
    5665        T1 * ip = (T1 *) ip_;
     
    6069                tests_failed += 1;
    6170        }
     71//      printf("DEBUG: done test %d\n", tests_total);
    6272}
    6373
    6474void test_fill( void * ip_, size_t start, size_t end, T1 * fill) {
    6575        tests_total += 1;
     76//      printf("DEBUG: starting test %d\n", tests_total);
    6677        bool passed = (memcmp((void*)((uintptr_t)ip_ + start), (void*)fill, end) == 0);
    6778        if (!passed) {
     
    6980                tests_failed += 1;
    7081        }
     82//      printf("DEBUG: done test %d\n", tests_total);
    7183}
    7284
    7385void test_use( int * ip, size_t dim) {
    7486        tests_total += 1;
     87//      printf("DEBUG: starting test %d\n", tests_total);
    7588        bool passed = true;
    7689        for (i; 0 ~ dim) ip[i] = 0xdeadbeef;
     
    8093                tests_failed += 1;
    8194        }
     95//      printf("DEBUG: done test %d\n", tests_total);
    8296}
    8397
    8498void test_use( T1 * ip, size_t dim) {
    8599        tests_total += 1;
     100//      printf("DEBUG: starting test %d\n", tests_total);
    86101        bool passed = true;
    87102        for (i; 0 ~ dim) ip[i].data = 0xdeadbeef;
     
    91106                tests_failed += 1;
    92107        }
     108//      printf("DEBUG: done test %d\n", tests_total);
    93109}
    94110
  • tests/heap.cfa

    r1c507eb r7a80113  
    1010// Created On       : Tue Nov  6 17:54:56 2018
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Sun Aug  9 08:05:16 2020
    13 // Update Count     : 57
     12// Last Modified On : Mon Sep  7 18:37:41 2020
     13// Update Count     : 72
    1414//
    1515
     
    205205                        free( area );
    206206                } // for
     207        } // for
     208
     209        // check malloc/resize/free (sbrk)
     210
     211        for ( i; 2 ~ NoOfAllocs ~ 12 ) {
     212                // initial N byte allocation
     213                char * area = (char *)malloc( i );
     214                area[0] = '\345'; area[i - 1] = '\345';                 // fill first/penultimate byte
     215
     216                // Do not start this loop index at 0 because resize of 0 bytes frees the storage.
     217                int prev = i;
     218                for ( s; i ~ 256 * 1024 ~ 26 ) {                                // start at initial memory request
     219                        if ( area[0] != '\345' || area[prev - 1] != '\345' ) abort( "malloc/resize/free corrupt storage" );
     220                        area = (char *)resize( area, s );                       // attempt to reuse storage
     221                        area[0] = area[s - 1] = '\345';                         // fill last byte
     222                        prev = s;
     223                } // for
     224                free( area );
     225        } // for
     226
     227        // check malloc/resize/free (mmap)
     228
     229        for ( i; 2 ~ NoOfAllocs ~ 12 ) {
     230                // initial N byte allocation
     231                size_t s = i + default_mmap_start();                    // cross over point
     232                char * area = (char *)malloc( s );
     233                area[0] = '\345'; area[s - 1] = '\345';                 // fill first/penultimate byte
     234
     235                // Do not start this loop index at 0 because resize of 0 bytes frees the storage.
     236                int prev = s;
     237                for ( r; s ~ 256 * 1024 ~ 26 ) {                                // start at initial memory request
     238                        if ( area[0] != '\345' || area[prev - 1] != '\345' ) abort( "malloc/resize/free corrupt storage" );
     239                        area = (char *)resize( area, s );                       // attempt to reuse storage
     240                        area[0] = area[r - 1] = '\345';                         // fill last byte
     241                        prev = r;
     242                } // for
     243                free( area );
     244        } // for
     245
     246        // check malloc/realloc/free (sbrk)
     247
     248        for ( i; 2 ~ NoOfAllocs ~ 12 ) {
     249                // initial N byte allocation
     250                char * area = (char *)malloc( i );
     251                area[0] = '\345'; area[i - 1] = '\345';                 // fill first/penultimate byte
     252
     253                // Do not start this loop index at 0 because realloc of 0 bytes frees the storage.
     254                int prev = i;
     255                for ( s; i ~ 256 * 1024 ~ 26 ) {                                // start at initial memory request
     256                        if ( area[0] != '\345' || area[prev - 1] != '\345' ) abort( "malloc/realloc/free corrupt storage" );
     257                        area = (char *)realloc( area, s );                      // attempt to reuse storage
     258                        area[s - 1] = '\345';                                           // fill last byte
     259                        prev = s;
     260                } // for
     261                free( area );
     262        } // for
     263
     264        // check malloc/realloc/free (mmap)
     265
     266        for ( i; 2 ~ NoOfAllocs ~ 12 ) {
     267                // initial N byte allocation
     268                size_t s = i + default_mmap_start();                    // cross over point
     269                char * area = (char *)malloc( s );
     270                area[0] = '\345'; area[s - 1] = '\345';                 // fill first/penultimate byte
     271
     272                // Do not start this loop index at 0 because realloc of 0 bytes frees the storage.
     273                int prev = s;
     274                for ( r; s ~ 256 * 1024 ~ 26 ) {                                // start at initial memory request
     275                        if ( area[0] != '\345' || area[prev - 1] != '\345' ) abort( "malloc/realloc/free corrupt storage" );
     276                        area = (char *)realloc( area, s );                      // attempt to reuse storage
     277                        area[r - 1] = '\345';                                           // fill last byte
     278                        prev = r;
     279                } // for
     280                free( area );
    207281        } // for
    208282
     
    320394        } // for
    321395
     396        // check memalign/resize with align/free
     397
     398        amount = 2;
     399        for ( a; libAlign() ~= limit ~ a ) {                            // generate powers of 2
     400                // initial N byte allocation
     401                char * area = (char *)memalign( a, amount );    // aligned N-byte allocation
     402                //sout | alignments[a] | area | endl;
     403                if ( (size_t)area % a != 0 || malloc_alignment( area ) != a ) { // check for initial alignment
     404                        abort( "memalign/resize with align/free bad alignment : memalign(%d,%d) = %p", (int)a, (int)amount, area );
     405                } // if
     406                area[0] = '\345'; area[amount - 2] = '\345';    // fill first/penultimate byte
     407
     408                // Do not start this loop index at 0 because resize of 0 bytes frees the storage.
     409                for ( s; amount ~ 256 * 1024 ) {                                // start at initial memory request
     410                        area = (char *)resize( area, a * 2, s );        // attempt to reuse storage
     411                        //sout | i | area | endl;
     412                        if ( (size_t)area % a * 2 != 0 ) {                      // check for initial alignment
     413                                abort( "memalign/resize with align/free bad alignment %p", area );
     414                        } // if
     415                        area[s - 1] = '\345';                                           // fill last byte
     416                } // for
     417                free( area );
     418        } // for
     419
    322420        // check memalign/realloc with align/free
    323421
  • tests/pybin/tools.py

    r1c507eb r7a80113  
    120120                return None
    121121
    122         file = open(file, mode)
     122        file = open(file, mode, encoding="latin-1") # use latin-1 so all chars mean something.
    123123        exitstack.push(file)
    124124        return file
  • tests/test.py

    r1c507eb r7a80113  
    207207                else:
    208208                        if os.stat(out_file).st_size < 1048576:
    209                                 with open (out_file, "r") as myfile:
     209                                with open (out_file, "r", encoding='latin-1') as myfile:  # use latin-1 so all chars mean something.
    210210                                        error = myfile.read()
    211211                        else:
Note: See TracChangeset for help on using the changeset viewer.