Diff [266ecf1eace6e01cf462c6e47ac0538f28f02825:182256b4aa148c77a64e9731dedbd8a503973f2f] for / – Cforall

Jenkins/FullBuild

-              r266ecf1
+              r182256b
                                         gcc_7_x86_old: { trigger_build( 'gcc-7',   'x86', false ) },
                                         gcc_6_x86_old: { trigger_build( 'gcc-6',   'x86', false ) },
                                         gcc_9_x64_new: { trigger_build( 'gcc-9',   'x64', true  ) },
                                         gcc_8_x64_new: { trigger_build( 'gcc-8',   'x64', true  ) },
                                         gcc_7_x64_new: { trigger_build( 'gcc-7',   'x64', true  ) },
                                         gcc_6_x64_new: { trigger_build( 'gcc-6',   'x64', true  ) },
                                         gcc_5_x64_new: { trigger_build( 'gcc-5',   'x64', true  ) },
                                         clang_x64_new: { trigger_build( 'clang',   'x64', true  ) },
                                         clang_x64_old: { trigger_build( 'clang',   'x64', false ) },
+                                        gcc_9_x64_old: { trigger_build( 'gcc-9',   'x64', true  ) },
+                                        gcc_8_x64_old: { trigger_build( 'gcc-8',   'x64', true  ) },
+                                        gcc_7_x64_old: { trigger_build( 'gcc-7',   'x64', true  ) },
+                                        gcc_6_x64_old: { trigger_build( 'gcc-6',   'x64', true  ) },
+                                        gcc_5_x64_old: { trigger_build( 'gcc-5',   'x64', true  ) },
+                                        clang_x64_old: { trigger_build( 'clang',   'x64', true  ) },
+                                        clang_x64_new: { trigger_build( 'clang',   'x64', false ) },
+                                )
+                        }
 …
 def trigger_build(String cc, String arch, boolean new_ast) {
-        // Randomly delay the builds by a random amount to avoid hitting the SC server to hard
-        sleep(time: 5 * Math.random(), unit:"MINUTES")
-        // Run the build
-        // Don't propagate, it doesn't play nice with our email setup
         def result = build job: 'Cforall/master',               \
                 parameters: [                                           \

benchmark/io/http/http_ring.cpp

-              r266ecf1
+              r182256b
                 socklen_t *addrlen;
                 int flags;
-                unsigned cnt;
         } acpt;
 …
 thread_local stats_block_t stats;
 stats_block_t global_stats;
-thread_local struct __attribute__((aligned(128))) {
-        size_t to_submit = 0;
-} local;
 // Get an array of current connections
 …
         static void submit(struct io_uring * ring, struct io_uring_sqe * sqe, connection * conn) {
                 (void)ring;
-                local.to_submit++;
                 #ifdef USE_ASYNC
                         io_uring_sqe_set_flags(sqe, IOSQE_ASYNC);
 …
                 switch(state) {
                 case ACCEPTING:
                         // connection::accept(ring, opt);
+                        connection::accept(ring, opt);
                         newconn(ring, res);
                         break;
 …
 //=========================================================
-extern "C" {
-        #include <sys/eventfd.h>  // use for termination
+}
 // Main loop of the WebServer
 // Effectively uses one thread_local copy of everything per kernel thread
 …
         struct io_uring * ring = opt.ring;
-        int blockfd = eventfd(0, 0);
-        if (blockfd < 0) {
-                fprintf( stderr, "eventfd create error: (%d) %s\n", (int)errno, strerror(errno) );
-                exit(EXIT_FAILURE);
+        }
-        int ret = io_uring_register_eventfd(ring, blockfd);
-        if (ret < 0) {
-                fprintf( stderr, "io_uring S&W error: (%d) %s\n", (int)-ret, strerror(-ret) );
-                exit(EXIT_FAILURE);
+        }
         // Track the shutdown using a event_fd
         char endfd_buf[8];
 …
         // Accept our first connection
         // May not take effect until io_uring_submit_and_wait
+        for(unsigned i = 0; i < opt.acpt.cnt; i++) {
+                connection::accept(ring, opt);
+        }
+        connection::accept(ring, opt);
         int reset = 1;       // Counter to print stats once in a while
 …
         while(!done) {
                 // Submit all the answers we have and wait for responses
+                int ret = io_uring_submit(ring);
+                local.to_submit = 0;
+                int ret = io_uring_submit_and_wait(ring, 1);
                 // check errors
 …
                 sqes += ret;
                 call++;
-                eventfd_t val;
-                ret = eventfd_read(blockfd, &val);
-                // check errors
-                if (ret < 0) {
-                        fprintf( stderr, "eventfd read error: (%d) %s\n", (int)errno, strerror(errno) );
-                        exit(EXIT_FAILURE);
+                }
                 struct io_uring_cqe *cqe;
 …
                                 break;
+                        }
-                        if(local.to_submit > 30) break;
                         auto req = (class connection *)cqe->user_data;
 …
         #include <pthread.h>      // for pthreads
         #include <signal.h>       // for signal(SIGPIPE, SIG_IGN);
+        #include <sys/eventfd.h>  // use for termination
         #include <sys/socket.h>   // for sockets in general
         #include <netinet/in.h>   // for sockaddr_in, AF_INET
 …
         unsigned entries = 256;     // number of entries per ring/kernel thread
         unsigned backlog = 262144;  // backlog argument to listen
-        unsigned preaccept = 1;     // start by accepting X per threads
         bool attach = false;        // Whether or not to attach all the rings
         bool sqpoll = false;        // Whether or not to use SQ Polling
 …
         // Arguments Parsing
         int c;
         while ((c = getopt (argc, argv, "t:p:e:b:c:aS")) != -1) {
+        while ((c = getopt (argc, argv, "t:p:e:b:aS")) != -1) {
                 switch (c)
+                {
 …
                 case 'b':
                         backlog = atoi(optarg);
-                        break;
-                case 'c':
-                        preaccept = atoi(optarg);
                         break;
                 case 'a':
 …
                 thrd_opts[i].acpt.addrlen = (socklen_t*)&addrlen;
                 thrd_opts[i].acpt.flags   = 0;
-                thrd_opts[i].acpt.cnt     = preaccept;
                 thrd_opts[i].endfd        = efd;
                 thrd_opts[i].ring         = &thrd_rings[i].storage;

benchmark/io/http/main.cfa

-              r266ecf1
+              r182256b
 //=============================================================================================
+// Globals
+//=============================================================================================
+struct ServerProc {
+        processor self;
+};
+void ?{}( ServerProc & this ) {
+        /* paranoid */ assert( options.clopts.instance != 0p );
+        (this.self){ "Benchmark Processor", *options.clopts.instance };
+        #if !defined(__CFA_NO_STATISTICS__)
+                if( options.clopts.procstats ) {
+                        print_stats_at_exit( this.self, options.clopts.instance->print_stats );
+                }
+                if( options.clopts.viewhalts ) {
+                        print_halts( this.self );
+                }
+        #endif
+}
+extern void init_protocol(void);
+extern void deinit_protocol(void);
+//=============================================================================================
 // Stats Printer
 //============================================================================================='
 …
 thread StatsPrinter {};
+void ?{}( StatsPrinter & this, cluster & cl ) {
+        ((thread&)this){ "Stats Printer Thread", cl };
+}
+void ^?{}( StatsPrinter & mutex this ) {}
+void ?{}( StatsPrinter & this ) {
+        ((thread&)this){ "Stats Printer Thread" };
+}
 void main(StatsPrinter & this) {
 …
                 sleep(10`s);
+                print_stats_now( *active_cluster(), CFA_STATS_READY_Q | CFA_STATS_IO );
+        }
+}
+//=============================================================================================
+// Globals
+//=============================================================================================
+struct ServerCluster {
+        cluster self;
+        processor    * procs;
+        // io_context   * ctxs;
+        StatsPrinter * prnt;
+};
+void ?{}( ServerCluster & this ) {
+        (this.self){ "Server Cluster", options.clopts.params };
+        this.procs = alloc(options.clopts.nprocs);
+        for(i; options.clopts.nprocs) {
+                (this.procs[i]){ "Benchmark Processor", this.self };
+                #if !defined(__CFA_NO_STATISTICS__)
+                        if( options.clopts.procstats ) {
+                                print_stats_at_exit( *this.procs, this.self.print_stats );
+                        }
+                        if( options.clopts.viewhalts ) {
+                                print_halts( *this.procs );
+                        }
+                #endif
+        }
+        if(options.stats) {
+                this.prnt = alloc();
+                (*this.prnt){ this.self };
+        } else {
+                this.prnt = 0p;
+        }
+        #if !defined(__CFA_NO_STATISTICS__)
+                print_stats_at_exit( this.self, CFA_STATS_READY_Q | CFA_STATS_IO );
+        #endif
+        options.clopts.instance[options.clopts.cltr_cnt] = &this.self;
+        options.clopts.cltr_cnt++;
+}
+void ^?{}( ServerCluster & this ) {
+        delete(this.prnt);
+        for(i; options.clopts.nprocs) {
+                ^(this.procs[i]){};
+        }
+        free(this.procs);
+        ^(this.self){};
+}
+extern void init_protocol(void);
+extern void deinit_protocol(void);
+                print_stats_now( *options.clopts.instance, CFA_STATS_READY_Q | CFA_STATS_IO );
+        }
+}
 //=============================================================================================
 …
         // Run Server Cluster
+        {
+                cluster cl = { "Server Cluster", options.clopts.params };
+                #if !defined(__CFA_NO_STATISTICS__)
+                        print_stats_at_exit( cl, CFA_STATS_READY_Q | CFA_STATS_IO );
+                #endif
+                options.clopts.instance = &cl;
                 int pipe_cnt = options.clopts.nworkers * 2;
                 int pipe_off;
 …
+                }
                 // if(options.file_cache.path && options.file_cache.fixed_fds) {
                 //      register_fixed_files(cl, fds, pipe_off);
                 // }
+                if(options.file_cache.path && options.file_cache.fixed_fds) {
+                        register_fixed_files(cl, fds, pipe_off);
+                }
+                {
+                        ServerCluster cl[options.clopts.nclusters];
+                        ServerProc procs[options.clopts.nprocs];
+                        StatsPrinter printer;
                         init_protocol();
 …
                                         unpark( workers[i] );
+                                }
+                                sout | options.clopts.nworkers | "workers started on" | options.clopts.nprocs | "processors /" | options.clopts.nclusters | "clusters";
+                                for(i; options.clopts.nclusters) {
+                                        sout | options.clopts.thrd_cnt[i] | nonl;
+                                }
+                                sout | nl;
+                                sout | options.clopts.nworkers | "workers started on" | options.clopts.nprocs | "processors";
+                                {
                                         char buffer[128];
+                                        for() {
+                                                int ret = cfa_read(0, buffer, 128, 0);
+                                                if(ret == 0) break;
+                                        while(int ret = cfa_read(0, buffer, 128, 0, -1`s, 0p, 0p); ret != 0) {
                                                 if(ret < 0) abort( "main read error: (%d) %s\n", (int)errno, strerror(errno) );
-                                                sout | "User wrote '" | "" | nonl;
-                                                write(sout, buffer, ret - 1);
-                                                sout | "'";
+                                        }
 …
                                 for(i; options.clopts.nworkers) {
                                         workers[i].done = true;
+                                        cancel(workers[i].cancel);
+                                }
                                 sout | "done";
 …
                         sout | "done";
                         sout | "Stopping processors/clusters..." | nonl; flush( sout );
+                        sout | "Stopping processors..." | nonl; flush( sout );
+                }
                 sout | "done";

benchmark/io/http/options.cfa

-              r266ecf1
+              r182256b
 #include <kernel.hfa>
 #include <parseargs.hfa>
-#include <stdlib.hfa>
 #include <stdlib.h>
 …
 Options options @= {
         false, // log
-        false, // stats
         { // file_cache
 …
         { // cluster
-,     // nclusters;
 ,     // nprocs;
 ,     // nworkers;
 …
 void parse_options( int argc, char * argv[] ) {
+        // bool fixedfd = false;
+        // bool sqkpoll = false;
+        // bool iokpoll = false;
+        bool subthrd = false;
+        bool eagrsub = false;
+        bool fixedfd = false;
+        bool sqkpoll = false;
+        bool iokpoll = false;
+        unsigned sublen = 16;
         unsigned nentries = 16;
-        bool isolate = false;
 …
                 { 'c', "cpus",           "Number of processors to use", options.clopts.nprocs},
                 { 't', "threads",        "Number of worker threads to use", options.clopts.nworkers},
-                {'\0', "isolate",        "Create one cluster per processor", isolate, parse_settrue},
                 {'\0', "log",            "Enable logs", options.log, parse_settrue},
-                {'\0', "stats",          "Enable statistics", options.stats, parse_settrue},
                 {'\0', "accept-backlog", "Maximum number of pending accepts", options.socket.backlog},
                 {'\0', "request_len",    "Maximum number of bytes in the http request, requests with more data will be answered with Http Code 414", options.socket.buflen},
 …
                 {'\0', "cache-size",     "Size of the cache to use, if set to small, will uses closes power of 2", options.file_cache.size },
                 {'\0', "list-files",     "List the files in the specified path and exit", options.file_cache.list, parse_settrue },
+                // { 'f', "fixed-fds",      "If set, files are open eagerly and pre-registered with the cluster", fixedfd, parse_settrue},
+                // { 'k', "kpollsubmit",    "If set, cluster uses IORING_SETUP_SQPOLL, implies -f", sqkpoll, parse_settrue },
+                // { 'i', "kpollcomplete",  "If set, cluster uses IORING_SETUP_IOPOLL", iokpoll, parse_settrue },
+                {'e', "numentries",     "Number of I/O entries", nentries },
+                { 's', "submitthread",   "If set, cluster uses polling thread to submit I/O", subthrd, parse_settrue },
+                { 'e', "eagersubmit",    "If set, cluster submits I/O eagerly but still aggregates submits", eagrsub, parse_settrue},
+                { 'f', "fixed-fds",      "If set, files are open eagerly and pre-registered with the cluster", fixedfd, parse_settrue},
+                { 'k', "kpollsubmit",    "If set, cluster uses IORING_SETUP_SQPOLL, implies -f", sqkpoll, parse_settrue },
+                { 'i', "kpollcomplete",  "If set, cluster uses IORING_SETUP_IOPOLL", iokpoll, parse_settrue },
+                {'\0', "submitlength",   "Max number of submitions that can be submitted together", sublen },
+                {'\0', "numentries",     "Number of I/O entries", nentries },
         };
 …
                 nentries = v;
+        }
-        if(isolate) {
-                options.clopts.nclusters = options.clopts.nprocs;
-                options.clopts.nprocs = 1;
+        }
         options.clopts.params.num_entries = nentries;
+        options.clopts.instance = alloc(options.clopts.nclusters);
+        options.clopts.thrd_cnt = alloc(options.clopts.nclusters);
+        options.clopts.cltr_cnt = 0;
+        for(i; options.clopts.nclusters) {
+                options.clopts.thrd_cnt[i] = 0;
+        options.clopts.params.poller_submits = subthrd;
+        options.clopts.params.eager_submits  = eagrsub;
+        if( fixedfd ) {
+                options.file_cache.fixed_fds = true;
+        }
+        if( sqkpoll ) {
+                options.clopts.params.poll_submit = true;
+                options.file_cache.fixed_fds = true;
+        }
+        // if( fixedfd ) {
+        //      options.file_cache.fixed_fds = true;
+        // }
+        if( iokpoll ) {
+                options.clopts.params.poll_complete = true;
+                options.file_cache.open_flags |= O_DIRECT;
+        }
+        // if( sqkpoll ) {
+        //      options.file_cache.fixed_fds = true;
+        // }
+        // if( iokpoll ) {
+        //      options.file_cache.open_flags |= O_DIRECT;
+        // }
+        options.clopts.params.num_ready = sublen;
         if( left[0] == 0p ) { return; }

benchmark/io/http/options.hfa

-              r266ecf1
+              r182256b
 struct Options {
         bool log;
-        bool stats;
         struct {
 …
         struct {
-                int nclusters;
                 int nprocs;
                 int nworkers;
 …
                 bool procstats;
                 bool viewhalts;
+                cluster ** instance;
+                size_t   * thrd_cnt;
+                size_t     cltr_cnt;
+                cluster * instance;
         } clopts;
 };

benchmark/io/http/protocol.cfa

-              r266ecf1
+              r182256b
 #include "options.hfa"
+#define PLAINTEXT_1WRITE
+#define PLAINTEXT_NOCOPY
+struct https_msg_str {
+        char msg[512];
+        size_t len;
+};
+const https_msg_str * volatile http_msgs[KNOWN_CODES] = { 0 };
+const char * volatile date = 0p;
+const char * http_msgs[] = {
+        "HTTP/1.1 200 OK\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: %zu \n\n",
+        "HTTP/1.1 400 Bad Request\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
+        "HTTP/1.1 404 Not Found\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
+        "HTTP/1.1 405 Method Not Allowed\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
+        "HTTP/1.1 408 Request Timeout\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
+        "HTTP/1.1 413 Payload Too Large\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
+        "HTTP/1.1 414 URI Too Long\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
+};
 _Static_assert( KNOWN_CODES == (sizeof(http_msgs ) / sizeof(http_msgs [0])));
+const int http_codes[KNOWN_CODES] = {
+,
+const int http_codes[] = {
 ,
 ,
 …
         while(len > 0) {
                 // Call write
+                int ret = cfa_send(fd, it, len, 0, CFA_IO_LAZY);
+                int ret = cfa_write(fd, it, len, 0, -1`s, 0p, 0p);
+                // int ret = write(fd, it, len);
                 if( ret < 0 ) {
                         if( errno == ECONNRESET || errno == EPIPE ) return -ECONNRESET;
 …
         /* paranoid */ assert( code < KNOWN_CODES && code != OK200 );
         int idx = (int)code;
         return answer( fd, http_msgs[idx]->msg, http_msgs[idx]->len );
+        return answer( fd, http_msgs[idx], strlen( http_msgs[idx] ) );
+}
 int answer_header( int fd, size_t size ) {
+        char buffer[512];
+        char * it = buffer;
+        memcpy(it, http_msgs[OK200]->msg, http_msgs[OK200]->len);
+        it += http_msgs[OK200]->len;
+        int len = http_msgs[OK200]->len;
+        len += snprintf(it, 512 - len, "%d \n\n", size);
+        const char * fmt = http_msgs[OK200];
+        int len = 200;
+        char buffer[len];
+        len = snprintf(buffer, len, fmt, date, size);
         return answer( fd, buffer, len );
+}
+#if defined(PLAINTEXT_NOCOPY)
+int answer_plaintext( int fd ) {
+        return answer(fd, http_msgs[OK200_PlainText]->msg, http_msgs[OK200_PlainText]->len + 1); // +1 cause snprintf doesn't count nullterminator
+}
+#elif defined(PLAINTEXT_1WRITE)
+int answer_plaintext( int fd ) {
+        char text[] = "Hello, World!\n";
+        char buffer[512 + sizeof(text)];
+        char * it = buffer;
+        memcpy(it, http_msgs[OK200]->msg, http_msgs[OK200]->len);
+        it += http_msgs[OK200]->len;
+        int len = http_msgs[OK200]->len;
+        int r = snprintf(it, 512 - len, "%d \n\n", sizeof(text));
+        it += r;
+        len += r;
+        memcpy(it, text, sizeof(text));
+        return answer(fd, buffer, len + sizeof(text));
+}
+#else
+int answer_plaintext( int fd ) {
+        char text[] = "Hello, World!\n";
+        int ret = answer_header(fd, sizeof(text));
+int answer_plain( int fd, char buffer[], size_t size ) {
+        int ret = answer_header(fd, size);
         if( ret < 0 ) return ret;
+        return answer(fd, text, sizeof(text));
+}
+#endif
+        return answer(fd, buffer, size);
+}
 int answer_empty( int fd ) {
 …
 [HttpCode code, bool closed, * const char file, size_t len] http_read(int fd, []char buffer, size_t len) {
+[HttpCode code, bool closed, * const char file, size_t len] http_read(int fd, []char buffer, size_t len, io_cancellation * cancel) {
         char * it = buffer;
         size_t count = len - 1;
 …
         READ:
         for() {
                 int ret = cfa_recv(fd, (void*)it, count, 0, CFA_IO_LAZY);
+                int ret = cfa_read(fd, (void*)it, count, 0, -1`s, cancel, 0p);
                 // int ret = read(fd, (void*)it, count);
                 if(ret == 0 ) return [OK200, true, 0, 0];
 …
         ssize_t ret;
         SPLICE1: while(count > 0) {
+                ret = cfa_splice(ans_fd, &offset, pipe[1], 0p, count, sflags, CFA_IO_LAZY);
+                ret = cfa_splice(ans_fd, &offset, pipe[1], 0p, count, sflags, 0, -1`s, 0p, 0p);
+                // ret = splice(ans_fd, &offset, pipe[1], 0p, count, sflags);
                 if( ret < 0 ) {
                         if( errno != EAGAIN && errno != EWOULDBLOCK) continue SPLICE1;
 …
                 size_t in_pipe = ret;
                 SPLICE2: while(in_pipe > 0) {
+                        ret = cfa_splice(pipe[0], 0p, fd, 0p, in_pipe, sflags, CFA_IO_LAZY);
+                        ret = cfa_splice(pipe[0], 0p, fd, 0p, in_pipe, sflags, 0, -1`s, 0p, 0p);
+                        // ret = splice(pipe[0], 0p, fd, 0p, in_pipe, sflags);
                         if( ret < 0 ) {
                                 if( errno != EAGAIN && errno != EWOULDBLOCK) continue SPLICE2;
 …
 #include <thread.hfa>
-const char * original_http_msgs[] = {
-        "HTTP/1.1 200 OK\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: ",
-        "HTTP/1.1 200 OK\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 15\n\nHello, World!\n",
-        "HTTP/1.1 400 Bad Request\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
-        "HTTP/1.1 404 Not Found\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
-        "HTTP/1.1 405 Method Not Allowed\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
-        "HTTP/1.1 408 Request Timeout\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
-        "HTTP/1.1 413 Payload Too Large\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
-        "HTTP/1.1 414 URI Too Long\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
-};
 struct date_buffer {
         https_msg_str strs[KNOWN_CODES];
+        char buff[100];
 };
 …
 void ?{}( DateFormater & this ) {
         ((thread&)this){ "Server Date Thread", *options.clopts.instance[0] };
+        ((thread&)this){ "Server Date Thread", *options.clopts.instance };
         this.idx = 0;
         memset( &this.buffers[0], 0, sizeof(this.buffers[0]) );
         memset( &this.buffers[1], 0, sizeof(this.buffers[1]) );
+        memset( this.buffers[0].buff, 0, sizeof(this.buffers[0]) );
+        memset( this.buffers[1].buff, 0, sizeof(this.buffers[1]) );
+}
 …
                 or else {}
-                char buff[100];
                 Time now = getTimeNsec();
+                strftime( buff, 100, "%a, %d %b %Y %H:%M:%S %Z", now );
+                sout | "Updated date to '" | buff | "'";
+                for(i; KNOWN_CODES) {
+                        size_t len = snprintf( this.buffers[this.idx].strs[i].msg, 512, original_http_msgs[i], buff );
+                        this.buffers[this.idx].strs[i].len = len;
+                }
+                for(i; KNOWN_CODES) {
+                        https_msg_str * next = &this.buffers[this.idx].strs[i];
+                        __atomic_exchange_n((https_msg_str * volatile *)&http_msgs[i], next, __ATOMIC_SEQ_CST);
+                }
+                strftime( this.buffers[this.idx].buff, 100, "%a, %d %b %Y %H:%M:%S %Z", now );
+                char * next = this.buffers[this.idx].buff;
+                __atomic_exchange_n((char * volatile *)&date, next, __ATOMIC_SEQ_CST);
                 this.idx = (this.idx + 1) % 2;
-                sout | "Date thread sleeping";
                 sleep(1`s);

benchmark/io/http/protocol.hfa

-              r266ecf1
+              r182256b
 #pragma once
+struct io_cancellation;
 enum HttpCode {
         OK200 = 0,
-        OK200_PlainText,
         E400,
         E404,
 …
 int answer_error( int fd, HttpCode code );
 int answer_header( int fd, size_t size );
 int answer_plaintext( int fd );
+int answer_plain( int fd, char buffer [], size_t size );
 int answer_empty( int fd );
 [HttpCode code, bool closed, * const char file, size_t len] http_read(int fd, []char buffer, size_t len);
+[HttpCode code, bool closed, * const char file, size_t len] http_read(int fd, []char buffer, size_t len, io_cancellation *);
 int sendfile( int pipe[2], int fd, int ans_fd, size_t count );

benchmark/io/http/worker.cfa

-              r266ecf1
+              r182256b
 //=============================================================================================
 void ?{}( Worker & this ) {
+        size_t cli = rand() % options.clopts.cltr_cnt;
+        ((thread&)this){ "Server Worker Thread", *options.clopts.instance[cli] };
+        options.clopts.thrd_cnt[cli]++;
+        ((thread&)this){ "Server Worker Thread", *options.clopts.instance };
         this.pipe[0] = -1;
         this.pipe[1] = -1;
 …
         for() {
                 if( options.log ) sout | "=== Accepting connection ===";
+                int fd = cfa_accept4( this.[sockfd, addr, addrlen, flags], CFA_IO_LAZY );
+                int fd = cfa_accept4( this.[sockfd, addr, addrlen, flags], 0, -1`s, &this.cancel, 0p );
+                // int fd = accept4( this.[sockfd, addr, addrlen, flags] );
                 if(fd < 0) {
                         if( errno == ECONNABORTED ) break;
 …
                         abort( "accept error: (%d) %s\n", (int)errno, strerror(errno) );
+                }
-                if(this.done) break;
                 if( options.log ) sout | "=== New connection" | fd | "" | ", waiting for requests ===";
 …
                         char buffer[len];
                         if( options.log ) sout | "=== Reading request ===";
                         [code, closed, file, name_size] = http_read(fd, buffer, len);
+                        [code, closed, file, name_size] = http_read(fd, buffer, len, &this.cancel);
                         // if we are done, break out of the loop
 …
                                 if( options.log ) sout | "=== Request for /plaintext ===";
+                                int ret = answer_plaintext(fd);
+                                char text[] = "Hello, World!\n";
+                                // Send the header
+                                int ret = answer_plain(fd, text, sizeof(text));
                                 if( ret == -ECONNRESET ) break REQUEST;

benchmark/io/http/worker.hfa

r266ecf1	r182256b
17	17	socklen_t * addrlen;
18	18	int flags;
	19	io_cancellation cancel;
19	20	volatile bool done;
20	21	};

libcfa/configure.ac

-              r266ecf1
+              r182256b
 AH_TEMPLATE([CFA_HAVE_IOSQE_FIXED_FILE],[Defined if io_uring support is present when compiling libcfathread and supports the flag FIXED_FILE.])
 AH_TEMPLATE([CFA_HAVE_IOSQE_IO_DRAIN],[Defined if io_uring support is present when compiling libcfathread and supports the flag IO_DRAIN.])
+AH_TEMPLATE([CFA_HAVE_IOSQE_ASYNC],[Defined if io_uring support is present when compiling libcfathread and supports the flag ASYNC.])
 AH_TEMPLATE([CFA_HAVE_IOSQE_IO_LINK],[Defined if io_uring support is present when compiling libcfathread and supports the flag IO_LINK.])
 AH_TEMPLATE([CFA_HAVE_IOSQE_IO_HARDLINK],[Defined if io_uring support is present when compiling libcfathread and supports the flag IO_HARDLINK.])
-AH_TEMPLATE([CFA_HAVE_IOSQE_ASYNC],[Defined if io_uring support is present when compiling libcfathread and supports the flag ASYNC.])
-AH_TEMPLATE([CFA_HAVE_IOSQE_BUFFER_SELECT],[Defined if io_uring support is present when compiling libcfathread and supports the flag BUFFER_SELEC.])
 AH_TEMPLATE([CFA_HAVE_SPLICE_F_FD_IN_FIXED],[Defined if io_uring support is present when compiling libcfathread and supports the flag SPLICE_F_FD_IN_FIXED.])
 AH_TEMPLATE([CFA_HAVE_IORING_SETUP_ATTACH_WQ],[Defined if io_uring support is present when compiling libcfathread and supports the flag IORING_SETUP_ATTACH_WQ.])
 …
 define(ioring_ops, [IORING_OP_NOP,IORING_OP_READV,IORING_OP_WRITEV,IORING_OP_FSYNC,IORING_OP_READ_FIXED,IORING_OP_WRITE_FIXED,IORING_OP_POLL_ADD,IORING_OP_POLL_REMOVE,IORING_OP_SYNC_FILE_RANGE,IORING_OP_SENDMSG,IORING_OP_RECVMSG,IORING_OP_TIMEOUT,IORING_OP_TIMEOUT_REMOVE,IORING_OP_ACCEPT,IORING_OP_ASYNC_CANCEL,IORING_OP_LINK_TIMEOUT,IORING_OP_CONNECT,IORING_OP_FALLOCATE,IORING_OP_OPENAT,IORING_OP_CLOSE,IORING_OP_FILES_UPDATE,IORING_OP_STATX,IORING_OP_READ,IORING_OP_WRITE,IORING_OP_FADVISE,IORING_OP_MADVISE,IORING_OP_SEND,IORING_OP_RECV,IORING_OP_OPENAT2,IORING_OP_EPOLL_CTL,IORING_OP_SPLICE,IORING_OP_PROVIDE_BUFFERS,IORING_OP_REMOVE_BUFFER,IORING_OP_TEE])
 define(ioring_flags, [IOSQE_FIXED_FILE,IOSQE_IO_DRAIN,IOSQE_IO_LINK,IOSQE_IO_HARDLINK,IOSQE_ASYNC,IOSQE_BUFFER_SELECT,SPLICE_F_FD_IN_FIXED,IORING_SETUP_ATTACH_WQ])
+define(ioring_flags, [IOSQE_FIXED_FILE,IOSQE_IO_DRAIN,IOSQE_ASYNC,IOSQE_IO_LINK,IOSQE_IO_HARDLINK,SPLICE_F_FD_IN_FIXED,IORING_SETUP_ATTACH_WQ])
 define(ioring_from_decls, [

libcfa/prelude/defines.hfa.in

-              r266ecf1
+              r182256b
 /* Defined if io_uring support is present when compiling libcfathread and
-   supports the flag BUFFER_SELEC. */
-#undef CFA_HAVE_IOSQE_BUFFER_SELECT
-/* Defined if io_uring support is present when compiling libcfathread and
    supports the flag FIXED_FILE. */
 #undef CFA_HAVE_IOSQE_FIXED_FILE

libcfa/src/bits/defs.hfa

r266ecf1	r182256b
74	74	#error unsupported architecture
75	75	#endif
76
77		~~#define CFA_IO_LAZY (1_l64u << 32_l64u)~~

libcfa/src/concurrency/io.cfa

-              r266ecf1
+              r182256b
         extern "C" {
                 #include <sys/syscall.h>
-                #include <sys/eventfd.h>
                 #include <linux/io_uring.h>
 …
         };
+        static $io_context * __ioarbiter_allocate( $io_arbiter & mutex this, processor *, __u32 idxs[], __u32 want );
+        static void __ioarbiter_submit( $io_arbiter & mutex this, $io_context * , __u32 idxs[], __u32 have, bool lazy );
+        static void __ioarbiter_flush ( $io_arbiter & mutex this, $io_context * );
+        static inline void __ioarbiter_notify( $io_context & ctx );
+        // returns true of acquired as leader or second leader
+        static inline bool try_lock( __leaderlock_t & this ) {
+                const uintptr_t thrd = 1z | (uintptr_t)active_thread();
+                bool block;
+                disable_interrupts();
+                for() {
+                        struct $thread * expected = this.value;
+                        if( 1p != expected && 0p != expected ) {
+                                /* paranoid */ verify( thrd != (uintptr_t)expected ); // We better not already be the next leader
+                                enable_interrupts( __cfaabi_dbg_ctx );
+                                return false;
+                        }
+                        struct $thread * desired;
+                        if( 0p == expected ) {
+                                // If the lock isn't locked acquire it, no need to block
+                                desired = 1p;
+                                block = false;
+                        }
+                        else {
+                                // If the lock is already locked try becomming the next leader
+                                desired = (struct $thread *)thrd;
+                                block = true;
+                        }
+                        if( __atomic_compare_exchange_n(&this.value, &expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) ) break;
+                }
+                if( block ) {
+                        enable_interrupts( __cfaabi_dbg_ctx );
+                        park();
+                        disable_interrupts();
+                }
+                return true;
+        }
+        static inline bool next( __leaderlock_t & this ) {
+                /* paranoid */ verify( ! __preemption_enabled() );
+                struct $thread * nextt;
+                for() {
+                        struct $thread * expected = this.value;
+                        /* paranoid */ verify( (1 & (uintptr_t)expected) == 1 ); // The lock better be locked
+                        struct $thread * desired;
+                        if( 1p == expected ) {
+                                // No next leader, just unlock
+                                desired = 0p;
+                                nextt   = 0p;
+                        }
+                        else {
+                                // There is a next leader, remove but keep locked
+                                desired = 1p;
+                                nextt   = (struct $thread *)(~1z & (uintptr_t)expected);
+                        }
+                        if( __atomic_compare_exchange_n(&this.value, &expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) ) break;
+                }
+                if(nextt) {
+                        unpark( nextt );
+                        enable_interrupts( __cfaabi_dbg_ctx );
+                        return true;
+                }
+                enable_interrupts( __cfaabi_dbg_ctx );
+                return false;
+        }
+//=============================================================================================
+// I/O Syscall
+//=============================================================================================
+        static int __io_uring_enter( struct __io_data & ring, unsigned to_submit, bool get ) {
+                bool need_sys_to_submit = false;
+                bool need_sys_to_complete = false;
+                unsigned flags = 0;
+                TO_SUBMIT:
+                if( to_submit > 0 ) {
+                        if( !(ring.ring_flags & IORING_SETUP_SQPOLL) ) {
+                                need_sys_to_submit = true;
+                                break TO_SUBMIT;
+                        }
+                        if( (*ring.submit_q.flags) & IORING_SQ_NEED_WAKEUP ) {
+                                need_sys_to_submit = true;
+                                flags |= IORING_ENTER_SQ_WAKEUP;
+                        }
+                }
+                if( get && !(ring.ring_flags & IORING_SETUP_SQPOLL) ) {
+                        flags |= IORING_ENTER_GETEVENTS;
+                        if( (ring.ring_flags & IORING_SETUP_IOPOLL) ) {
+                                need_sys_to_complete = true;
+                        }
+                }
+                int ret = 0;
+                if( need_sys_to_submit || need_sys_to_complete ) {
+                        __cfadbg_print_safe(io_core, "Kernel I/O : IO_URING enter %d %u %u\n", ring.fd, to_submit, flags);
+                        ret = syscall( __NR_io_uring_enter, ring.fd, to_submit, 0, flags, (sigset_t *)0p, _NSIG / 8);
+                        __cfadbg_print_safe(io_core, "Kernel I/O : IO_URING %d returned %d\n", ring.fd, ret);
+                        if( ret < 0 ) {
+                                switch((int)errno) {
+                                case EAGAIN:
+                                case EINTR:
+                                case EBUSY:
+                                        ret = -1;
+                                        break;
+                                default:
+                                        abort( "KERNEL ERROR: IO_URING SYSCALL - (%d) %s\n", (int)errno, strerror(errno) );
+                                }
+                        }
+                }
+                // Memory barrier
+                __atomic_thread_fence( __ATOMIC_SEQ_CST );
+                return ret;
+        }
 //=============================================================================================
 // I/O Polling
 //=============================================================================================
+        static inline unsigned __flush( struct $io_context & );
+        static inline __u32 __release_sqes( struct $io_context & );
+        void __cfa_io_drain( processor * proc ) {
+        static unsigned __collect_submitions( struct __io_data & ring );
+        static __u32 __release_consumed_submission( struct __io_data & ring );
+        static inline void __clean( volatile struct io_uring_sqe * sqe );
+        // Process a single completion message from the io_uring
+        // This is NOT thread-safe
+        static inline void process( volatile struct io_uring_cqe & cqe ) {
+                struct io_future_t * future = (struct io_future_t *)(uintptr_t)cqe.user_data;
+                __cfadbg_print_safe( io, "Kernel I/O : Syscall completed : cqe %p, result %d for %p\n", &cqe, cqe.res, future );
+                fulfil( *future, cqe.res );
+        }
+        static [int, bool] __drain_io( & struct __io_data ring ) {
                 /* paranoid */ verify( ! __preemption_enabled() );
+                /* paranoid */ verify( proc );
+                /* paranoid */ verify( proc->io.ctx );
+                unsigned to_submit = 0;
+                if( ring.poller_submits ) {
+                        // If the poller thread also submits, then we need to aggregate the submissions which are ready
+                        to_submit = __collect_submitions( ring );
+                }
+                int ret = __io_uring_enter(ring, to_submit, true);
+                if( ret < 0 ) {
+                        return [0, true];
+                }
+                // update statistics
+                if (to_submit > 0) {
+                        __STATS__( true,
+                                if( to_submit > 0 ) {
+                                        io.submit_q.submit_avg.rdy += to_submit;
+                                        io.submit_q.submit_avg.csm += ret;
+                                        io.submit_q.submit_avg.cnt += 1;
+                                }
+                        )
+                }
+                __atomic_thread_fence( __ATOMIC_SEQ_CST );
+                // Release the consumed SQEs
+                __release_consumed_submission( ring );
                 // Drain the queue
+                $io_context * ctx = proc->io.ctx;
+                unsigned head = *ctx->cq.head;
+                unsigned tail = *ctx->cq.tail;
+                const __u32 mask = *ctx->cq.mask;
+                unsigned head = *ring.completion_q.head;
+                unsigned tail = *ring.completion_q.tail;
+                const __u32 mask = *ring.completion_q.mask;
+                // Nothing was new return 0
+                if (head == tail) {
+                        return [0, to_submit > 0];
+                }
                 __u32 count = tail - head;
+                __STATS__( false, io.calls.drain++; io.calls.completed += count; )
+                /* paranoid */ verify( count != 0 );
                 for(i; count) {
                         unsigned idx = (head + i) & mask;
                         volatile struct io_uring_cqe & cqe = ctx->cq.cqes[idx];
+                        volatile struct io_uring_cqe & cqe = ring.completion_q.cqes[idx];
                         /* paranoid */ verify(&cqe);
+                        struct io_future_t * future = (struct io_future_t *)(uintptr_t)cqe.user_data;
+                        __cfadbg_print_safe( io, "Kernel I/O : Syscall completed : cqe %p, result %d for %p\n", &cqe, cqe.res, future );
+                        fulfil( *future, cqe.res );
+                }
+                __cfadbg_print_safe(io, "Kernel I/O : %u completed\n", count);
+                        process( cqe );
+                }
                 // Mark to the kernel that the cqe has been seen
                 // Ensure that the kernel only sees the new value of the head index after the CQEs have been read.
+                __atomic_store_n( ctx->cq.head, head + count, __ATOMIC_SEQ_CST );
+                /* paranoid */ verify( ! __preemption_enabled() );
+                return;
+        }
+        void __cfa_io_flush( processor * proc ) {
+                /* paranoid */ verify( ! __preemption_enabled() );
+                /* paranoid */ verify( proc );
+                /* paranoid */ verify( proc->io.ctx );
+                $io_context & ctx = *proc->io.ctx;
+                if(!ctx.ext_sq.empty) {
+                        __ioarbiter_flush( *ctx.arbiter, &ctx );
+                }
+                __STATS__( true, io.calls.flush++; )
+                int ret = syscall( __NR_io_uring_enter, ctx.fd, ctx.sq.to_submit, 0, 0, (sigset_t *)0p, _NSIG / 8);
+                if( ret < 0 ) {
+                        switch((int)errno) {
+                        case EAGAIN:
+                        case EINTR:
+                        case EBUSY:
+                __atomic_fetch_add( ring.completion_q.head, count, __ATOMIC_SEQ_CST );
+                return [count, count > 0 || to_submit > 0];
+        }
+        void main( $io_ctx_thread & this ) {
+                __ioctx_register( this );
+                __cfadbg_print_safe(io_core, "Kernel I/O : IO poller %d (%p) ready\n", this.ring->fd, &this);
+                const int reset_cnt = 5;
+                int reset = reset_cnt;
+                // Then loop until we need to start
+                LOOP:
+                while(!__atomic_load_n(&this.done, __ATOMIC_SEQ_CST)) {
+                        // Drain the io
+                        int count;
+                        bool again;
+                        disable_interrupts();
+                                [count, again] = __drain_io( *this.ring );
+                                if(!again) reset--;
                                 // Update statistics
+                                __STATS__( false, io.calls.errors.busy ++; )
+                                return;
+                        default:
+                                abort( "KERNEL ERROR: IO_URING SYSCALL - (%d) %s\n", (int)errno, strerror(errno) );
+                        }
+                }
+                __cfadbg_print_safe(io, "Kernel I/O : %u submitted to io_uring %d\n", ret, ctx.fd);
+                __STATS__( true, io.calls.submitted += ret; )
+                /* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num );
+                /* paranoid */ verify( ctx.sq.to_submit >= ret );
+                ctx.sq.to_submit -= ret;
+                /* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num );
+                // Release the consumed SQEs
+                __release_sqes( ctx );
+                /* paranoid */ verify( ! __preemption_enabled() );
+                ctx.proc->io.pending = false;
+                                __STATS__( true,
+                                        io.complete_q.completed_avg.val += count;
+                                        io.complete_q.completed_avg.cnt += 1;
+                                )
+                        enable_interrupts( __cfaabi_dbg_ctx );
+                        // If we got something, just yield and check again
+                        if(reset > 1) {
+                                yield();
+                                continue LOOP;
+                        }
+                        // We alread failed to find completed entries a few time.
+                        if(reset == 1) {
+                                // Rearm the context so it can block
+                                // but don't block right away
+                                // we need to retry one last time in case
+                                // something completed *just now*
+                                __ioctx_prepare_block( this );
+                                continue LOOP;
+                        }
+                                __STATS__( false,
+                                        io.complete_q.blocks += 1;
+                                )
+                                __cfadbg_print_safe(io_core, "Kernel I/O : Parking io poller %d (%p)\n", this.ring->fd, &this);
+                                // block this thread
+                                wait( this.sem );
+                        // restore counter
+                        reset = reset_cnt;
+                }
+                __cfadbg_print_safe(io_core, "Kernel I/O : Fast poller %d (%p) stopping\n", this.ring->fd, &this);
+                __ioctx_unregister( this );
+        }
 …
 //         head and tail must be fully filled and shouldn't ever be touched again.
 //
-        //=============================================================================================
-        // Allocation
-        // for user's convenience fill the sqes from the indexes
-        static inline void __fill(struct io_uring_sqe * out_sqes[], __u32 want, __u32 idxs[], struct $io_context * ctx)  {
-                struct io_uring_sqe * sqes = ctx->sq.sqes;
-                for(i; want) {
-                        __cfadbg_print_safe(io, "Kernel I/O : filling loop\n");
-                        out_sqes[i] = &sqes[idxs[i]];
+                }
+        }
-        // Try to directly allocate from the a given context
-        // Not thread-safe
-        static inline bool __alloc(struct $io_context * ctx, __u32 idxs[], __u32 want) {
-                __sub_ring_t & sq = ctx->sq;
-                const __u32 mask  = *sq.mask;
-                __u32 fhead = sq.free_ring.head;    // get the current head of the queue
-                __u32 ftail = sq.free_ring.tail;    // get the current tail of the queue
-                // If we don't have enough sqes, fail
-                if((ftail - fhead) < want) { return false; }
-                // copy all the indexes we want from the available list
-                for(i; want) {
-                        __cfadbg_print_safe(io, "Kernel I/O : allocating loop\n");
-                        idxs[i] = sq.free_ring.array[(fhead + i) & mask];
+                }
-                // Advance the head to mark the indexes as consumed
-                __atomic_store_n(&sq.free_ring.head, fhead + want, __ATOMIC_RELEASE);
-                // return success
-                return true;
+        }
         // Allocate an submit queue entry.
 …
         // for convenience, return both the index and the pointer to the sqe
         // sqe == &sqes[idx]
+        struct $io_context * cfa_io_allocate(struct io_uring_sqe * sqes[], __u32 idxs[], __u32 want) {
+                __cfadbg_print_safe(io, "Kernel I/O : attempting to allocate %u\n", want);
+                disable_interrupts();
+                processor * proc = __cfaabi_tls.this_processor;
+                $io_context * ctx = proc->io.ctx;
+                /* paranoid */ verify( __cfaabi_tls.this_processor );
+                /* paranoid */ verify( ctx );
+                __cfadbg_print_safe(io, "Kernel I/O : attempting to fast allocation\n");
+                // We can proceed to the fast path
+                if( __alloc(ctx, idxs, want) ) {
+                        // Allocation was successful
+                        __STATS__( true, io.alloc.fast += 1; )
+                        enable_interrupts( __cfaabi_dbg_ctx );
+                        __cfadbg_print_safe(io, "Kernel I/O : fast allocation successful from ring %d\n", ctx->fd);
+                        __fill( sqes, want, idxs, ctx );
+                        return ctx;
+                }
+                // The fast path failed, fallback
+                __STATS__( true, io.alloc.fail += 1; )
+                // Fast path failed, fallback on arbitration
+                __STATS__( true, io.alloc.slow += 1; )
+                enable_interrupts( __cfaabi_dbg_ctx );
+                $io_arbiter * ioarb = proc->cltr->io.arbiter;
+                /* paranoid */ verify( ioarb );
+                __cfadbg_print_safe(io, "Kernel I/O : falling back on arbiter for allocation\n");
+                struct $io_context * ret = __ioarbiter_allocate(*ioarb, proc, idxs, want);
+                __cfadbg_print_safe(io, "Kernel I/O : slow allocation completed from ring %d\n", ret->fd);
+                __fill( sqes, want, idxs,ret );
+                return ret;
+        }
+        //=============================================================================================
+        // submission
+        static inline void __submit( struct $io_context * ctx, __u32 idxs[], __u32 have, bool lazy) {
+                // We can proceed to the fast path
+                // Get the right objects
+                __sub_ring_t & sq = ctx->sq;
+                const __u32 mask  = *sq.mask;
+                __u32 tail = *sq.kring.tail;
+                // Add the sqes to the array
+                for( i; have ) {
+                        __cfadbg_print_safe(io, "Kernel I/O : __submit loop\n");
+                        sq.kring.array[ (tail + i) & mask ] = idxs[i];
+                }
+                // Make the sqes visible to the submitter
+                __atomic_store_n(sq.kring.tail, tail + have, __ATOMIC_RELEASE);
+                sq.to_submit++;
+                ctx->proc->io.pending = true;
+                ctx->proc->io.dirty   = true;
+                if(sq.to_submit > 30 || !lazy) {
+                        __cfa_io_flush( ctx->proc );
+                }
+        }
+        void cfa_io_submit( struct $io_context * inctx, __u32 idxs[], __u32 have, bool lazy ) __attribute__((nonnull (1))) {
+                __cfadbg_print_safe(io, "Kernel I/O : attempting to submit %u (%s)\n", have, lazy ? "lazy" : "eager");
+                disable_interrupts();
+                processor * proc = __cfaabi_tls.this_processor;
+                $io_context * ctx = proc->io.ctx;
+                /* paranoid */ verify( __cfaabi_tls.this_processor );
+                /* paranoid */ verify( ctx );
+                // Can we proceed to the fast path
+                if( ctx == inctx )              // We have the right instance?
+        [* volatile struct io_uring_sqe, __u32] __submit_alloc( struct __io_data & ring, __u64 data ) {
+                /* paranoid */ verify( data != 0 );
+                // Prepare the data we need
+                __attribute((unused)) int len   = 0;
+                __attribute((unused)) int block = 0;
+                __u32 cnt = *ring.submit_q.num;
+                __u32 mask = *ring.submit_q.mask;
+                __u32 off = thread_rand();
+                // Loop around looking for an available spot
+                for() {
+                        // Look through the list starting at some offset
+                        for(i; cnt) {
+                                __u64 expected = 3;
+                                __u32 idx = (i + off) & mask; // Get an index from a random
+                                volatile struct io_uring_sqe * sqe = &ring.submit_q.sqes[idx];
+                                volatile __u64 * udata = &sqe->user_data;
+                                // Allocate the entry by CASing the user_data field from 0 to the future address
+                                if( *udata == expected &&
+                                        __atomic_compare_exchange_n( udata, &expected, data, true, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED ) )
+                                {
+                                        // update statistics
+                                        __STATS__( false,
+                                                io.submit_q.alloc_avg.val   += len;
+                                                io.submit_q.alloc_avg.block += block;
+                                                io.submit_q.alloc_avg.cnt   += 1;
+                                        )
+                                        // debug log
+                                        __cfadbg_print_safe( io, "Kernel I/O : allocated [%p, %u] for %p (%p)\n", sqe, idx, active_thread(), (void*)data );
+                                        // Success return the data
+                                        return [sqe, idx];
+                                }
+                                verify(expected != data);
+                                // This one was used
+                                len ++;
+                        }
+                        block++;
+                        yield();
+                }
+        }
+        static inline __u32 __submit_to_ready_array( struct __io_data & ring, __u32 idx, const __u32 mask ) {
+                /* paranoid */ verify( idx <= mask   );
+                /* paranoid */ verify( idx != -1ul32 );
+                // We need to find a spot in the ready array
+                __attribute((unused)) int len   = 0;
+                __attribute((unused)) int block = 0;
+                __u32 ready_mask = ring.submit_q.ready_cnt - 1;
+                __u32 off = thread_rand();
+                __u32 picked;
+                LOOKING: for() {
+                        for(i; ring.submit_q.ready_cnt) {
+                                picked = (i + off) & ready_mask;
+                                __u32 expected = -1ul32;
+                                if( __atomic_compare_exchange_n( &ring.submit_q.ready[picked], &expected, idx, true, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED ) ) {
+                                        break LOOKING;
+                                }
+                                verify(expected != idx);
+                                len ++;
+                        }
+                        block++;
+                        __u32 released = __release_consumed_submission( ring );
+                        if( released == 0 ) {
+                                yield();
+                        }
+                }
+                // update statistics
+                __STATS__( false,
+                        io.submit_q.look_avg.val   += len;
+                        io.submit_q.look_avg.block += block;
+                        io.submit_q.look_avg.cnt   += 1;
+                )
+                return picked;
+        }
+        void __submit( struct io_context * ctx, __u32 idx ) __attribute__((nonnull (1))) {
+                __io_data & ring = *ctx->thrd.ring;
+                {
+                        __submit(ctx, idxs, have, lazy);
+                        // Mark the instance as no longer in-use, re-enable interrupts and return
+                        __STATS__( true, io.submit.fast += 1; )
+                        enable_interrupts( __cfaabi_dbg_ctx );
+                        __cfadbg_print_safe(io, "Kernel I/O : submitted on fast path\n");
+                        return;
+                }
+                // Fast path failed, fallback on arbitration
+                __STATS__( true, io.submit.slow += 1; )
+                enable_interrupts( __cfaabi_dbg_ctx );
+                __cfadbg_print_safe(io, "Kernel I/O : falling back on arbiter for submission\n");
+                __ioarbiter_submit(*inctx->arbiter, inctx, idxs, have, lazy);
+        }
+        //=============================================================================================
+        // Flushing
+                        __attribute__((unused)) volatile struct io_uring_sqe * sqe = &ring.submit_q.sqes[idx];
+                        __cfadbg_print_safe( io,
+                                "Kernel I/O : submitting %u (%p) for %p\n"
+                                "    data: %p\n"
+                                "    opcode: %s\n"
+                                "    fd: %d\n"
+                                "    flags: %d\n"
+                                "    prio: %d\n"
+                                "    off: %p\n"
+                                "    addr: %p\n"
+                                "    len: %d\n"
+                                "    other flags: %d\n"
+                                "    splice fd: %d\n"
+                                "    pad[0]: %llu\n"
+                                "    pad[1]: %llu\n"
+                                "    pad[2]: %llu\n",
+                                idx, sqe,
+                                active_thread(),
+                                (void*)sqe->user_data,
+                                opcodes[sqe->opcode],
+                                sqe->fd,
+                                sqe->flags,
+                                sqe->ioprio,
+                                (void*)sqe->off,
+                                (void*)sqe->addr,
+                                sqe->len,
+                                sqe->accept_flags,
+                                sqe->splice_fd_in,
+                                sqe->__pad2[0],
+                                sqe->__pad2[1],
+                                sqe->__pad2[2]
+                        );
+                }
+                // Get now the data we definetely need
+                volatile __u32 * const tail = ring.submit_q.tail;
+                const __u32 mask  = *ring.submit_q.mask;
+                // There are 2 submission schemes, check which one we are using
+                if( ring.poller_submits ) {
+                        // If the poller thread submits, then we just need to add this to the ready array
+                        __submit_to_ready_array( ring, idx, mask );
+                        post( ctx->thrd.sem );
+                        __cfadbg_print_safe( io, "Kernel I/O : Added %u to ready for %p\n", idx, active_thread() );
+                }
+                else if( ring.eager_submits ) {
+                        __attribute__((unused)) __u32 picked = __submit_to_ready_array( ring, idx, mask );
+                        #if defined(LEADER_LOCK)
+                                if( !try_lock(ring.submit_q.submit_lock) ) {
+                                        __STATS__( false,
+                                                io.submit_q.helped += 1;
+                                        )
+                                        return;
+                                }
+                                /* paranoid */ verify( ! __preemption_enabled() );
+                                __STATS__( true,
+                                        io.submit_q.leader += 1;
+                                )
+                        #else
+                                for() {
+                                        yield();
+                                        if( try_lock(ring.submit_q.submit_lock __cfaabi_dbg_ctx2) ) {
+                                                __STATS__( false,
+                                                        io.submit_q.leader += 1;
+                                                )
+                                                break;
+                                        }
+                                        // If some one else collected our index, we are done
+                                        #warning ABA problem
+                                        if( ring.submit_q.ready[picked] != idx ) {
+                                                __STATS__( false,
+                                                        io.submit_q.helped += 1;
+                                                )
+                                                return;
+                                        }
+                                        __STATS__( false,
+                                                io.submit_q.busy += 1;
+                                        )
+                                }
+                        #endif
+                        // We got the lock
+                        // Collect the submissions
+                        unsigned to_submit = __collect_submitions( ring );
+                        // Actually submit
+                        int ret = __io_uring_enter( ring, to_submit, false );
+                        #if defined(LEADER_LOCK)
+                                /* paranoid */ verify( ! __preemption_enabled() );
+                                next(ring.submit_q.submit_lock);
+                        #else
+                                unlock(ring.submit_q.submit_lock);
+                        #endif
+                        if( ret < 0 ) {
+                                return;
+                        }
+                        // Release the consumed SQEs
+                        __release_consumed_submission( ring );
+                        // update statistics
+                        __STATS__( false,
+                                io.submit_q.submit_avg.rdy += to_submit;
+                                io.submit_q.submit_avg.csm += ret;
+                                io.submit_q.submit_avg.cnt += 1;
+                        )
+                        __cfadbg_print_safe( io, "Kernel I/O : submitted %u (among %u) for %p\n", idx, ret, active_thread() );
+                }
+                else
+                {
+                        // get mutual exclusion
+                        #if defined(LEADER_LOCK)
+                                while(!try_lock(ring.submit_q.submit_lock));
+                        #else
+                                lock(ring.submit_q.submit_lock __cfaabi_dbg_ctx2);
+                        #endif
+                        /* paranoid */ verifyf( ring.submit_q.sqes[ idx ].user_data != 3ul64,
+                        /* paranoid */  "index %u already reclaimed\n"
+                        /* paranoid */  "head %u, prev %u, tail %u\n"
+                        /* paranoid */  "[-0: %u,-1: %u,-2: %u,-3: %u]\n",
+                        /* paranoid */  idx,
+                        /* paranoid */  *ring.submit_q.head, ring.submit_q.prev_head, *tail
+                        /* paranoid */  ,ring.submit_q.array[ ((*ring.submit_q.head) - 0) & (*ring.submit_q.mask) ]
+                        /* paranoid */  ,ring.submit_q.array[ ((*ring.submit_q.head) - 1) & (*ring.submit_q.mask) ]
+                        /* paranoid */  ,ring.submit_q.array[ ((*ring.submit_q.head) - 2) & (*ring.submit_q.mask) ]
+                        /* paranoid */  ,ring.submit_q.array[ ((*ring.submit_q.head) - 3) & (*ring.submit_q.mask) ]
+                        /* paranoid */ );
+                        // Append to the list of ready entries
+                        /* paranoid */ verify( idx <= mask );
+                        ring.submit_q.array[ (*tail) & mask ] = idx;
+                        __atomic_fetch_add(tail, 1ul32, __ATOMIC_SEQ_CST);
+                        // Submit however, many entries need to be submitted
+                        int ret = __io_uring_enter( ring, 1, false );
+                        if( ret < 0 ) {
+                                switch((int)errno) {
+                                default:
+                                        abort( "KERNEL ERROR: IO_URING SUBMIT - %s\n", strerror(errno) );
+                                }
+                        }
+                        /* paranoid */ verify(ret == 1);
+                        // update statistics
+                        __STATS__( false,
+                                io.submit_q.submit_avg.csm += 1;
+                                io.submit_q.submit_avg.cnt += 1;
+                        )
+                        {
+                                __attribute__((unused)) volatile __u32 * const head = ring.submit_q.head;
+                                __attribute__((unused)) __u32 last_idx = ring.submit_q.array[ ((*head) - 1) & mask ];
+                                __attribute__((unused)) volatile struct io_uring_sqe * sqe = &ring.submit_q.sqes[last_idx];
+                                __cfadbg_print_safe( io,
+                                        "Kernel I/O : last submitted is %u (%p)\n"
+                                        "    data: %p\n"
+                                        "    opcode: %s\n"
+                                        "    fd: %d\n"
+                                        "    flags: %d\n"
+                                        "    prio: %d\n"
+                                        "    off: %p\n"
+                                        "    addr: %p\n"
+                                        "    len: %d\n"
+                                        "    other flags: %d\n"
+                                        "    splice fd: %d\n"
+                                        "    pad[0]: %llu\n"
+                                        "    pad[1]: %llu\n"
+                                        "    pad[2]: %llu\n",
+                                        last_idx, sqe,
+                                        (void*)sqe->user_data,
+                                        opcodes[sqe->opcode],
+                                        sqe->fd,
+                                        sqe->flags,
+                                        sqe->ioprio,
+                                        (void*)sqe->off,
+                                        (void*)sqe->addr,
+                                        sqe->len,
+                                        sqe->accept_flags,
+                                        sqe->splice_fd_in,
+                                        sqe->__pad2[0],
+                                        sqe->__pad2[1],
+                                        sqe->__pad2[2]
+                                );
+                        }
+                        __atomic_thread_fence( __ATOMIC_SEQ_CST );
+                        // Release the consumed SQEs
+                        __release_consumed_submission( ring );
+                        // ring.submit_q.sqes[idx].user_data = 3ul64;
+                        #if defined(LEADER_LOCK)
+                                next(ring.submit_q.submit_lock);
+                        #else
+                                unlock(ring.submit_q.submit_lock);
+                        #endif
+                        __cfadbg_print_safe( io, "Kernel I/O : submitted %u for %p\n", idx, active_thread() );
+                }
+        }
+        // #define PARTIAL_SUBMIT 32
+        // go through the list of submissions in the ready array and moved them into
+        // the ring's submit queue
+        static unsigned __collect_submitions( struct __io_data & ring ) {
+                /* paranoid */ verify( ring.submit_q.ready != 0p );
+                /* paranoid */ verify( ring.submit_q.ready_cnt > 0 );
+                unsigned to_submit = 0;
+                __u32 tail = *ring.submit_q.tail;
+                const __u32 mask = *ring.submit_q.mask;
+                #if defined(PARTIAL_SUBMIT)
+                        #if defined(LEADER_LOCK)
+                                #error PARTIAL_SUBMIT and LEADER_LOCK cannot co-exist
+                        #endif
+                        const __u32 cnt = ring.submit_q.ready_cnt > PARTIAL_SUBMIT ? PARTIAL_SUBMIT : ring.submit_q.ready_cnt;
+                        const __u32 offset = ring.submit_q.prev_ready;
+                        ring.submit_q.prev_ready += cnt;
+                #else
+                        const __u32 cnt = ring.submit_q.ready_cnt;
+                        const __u32 offset = 0;
+                #endif
+                // Go through the list of ready submissions
+                for( c; cnt ) {
+                        __u32 i = (offset + c) % ring.submit_q.ready_cnt;
+                        // replace any submission with the sentinel, to consume it.
+                        __u32 idx = __atomic_exchange_n( &ring.submit_q.ready[i], -1ul32, __ATOMIC_RELAXED);
+                        // If it was already the sentinel, then we are done
+                        if( idx == -1ul32 ) continue;
+                        // If we got a real submission, append it to the list
+                        ring.submit_q.array[ (tail + to_submit) & mask ] = idx & mask;
+                        to_submit++;
+                }
+                // Increment the tail based on how many we are ready to submit
+                __atomic_fetch_add(ring.submit_q.tail, to_submit, __ATOMIC_SEQ_CST);
+                return to_submit;
+        }
         // Go through the ring's submit queue and release everything that has already been consumed
         // by io_uring
+        // This cannot be done by multiple threads
+        static __u32 __release_sqes( struct $io_context & ctx ) {
+                const __u32 mask = *ctx.sq.mask;
+        static __u32 __release_consumed_submission( struct __io_data & ring ) {
+                const __u32 smask = *ring.submit_q.mask;
+                // We need to get the lock to copy the old head and new head
+                if( !try_lock(ring.submit_q.release_lock __cfaabi_dbg_ctx2) ) return 0;
                 __attribute__((unused))
                 __u32 ctail = *ctx.sq.kring.tail;    // get the current tail of the queue
                 __u32 chead = *ctx.sq.kring.head;        // get the current head of the queue
                 __u32 phead = ctx.sq.kring.released; // get the head the last time we were here
                 __u32 ftail = ctx.sq.free_ring.tail;  // get the current tail of the queue
+                __u32 ctail = *ring.submit_q.tail;        // get the current tail of the queue
+                __u32 chead = *ring.submit_q.head;              // get the current head of the queue
+                __u32 phead = ring.submit_q.prev_head;  // get the head the last time we were here
+                ring.submit_q.prev_head = chead;                // note up to were we processed
+                unlock(ring.submit_q.release_lock);
                 // the 3 fields are organized like this diagram
 …
                 __u32 count = chead - phead;
-                if(count == 0) {
-                        return 0;
+                }
                 // We acquired an previous-head/current-head range
                 // go through the range and release the sqes
                 for( i; count ) {
+                        __cfadbg_print_safe(io, "Kernel I/O : release loop\n");
+                        __u32 idx = ctx.sq.kring.array[ (phead + i) & mask ];
+                        ctx.sq.free_ring.array[ (ftail + i) & mask ] = idx;
+                }
+                ctx.sq.kring.released = chead;          // note up to were we processed
+                __atomic_store_n(&ctx.sq.free_ring.tail, ftail + count, __ATOMIC_SEQ_CST);
+                __ioarbiter_notify(ctx);
+                        __u32 idx = ring.submit_q.array[ (phead + i) & smask ];
+                        /* paranoid */ verify( 0 != ring.submit_q.sqes[ idx ].user_data );
+                        __clean( &ring.submit_q.sqes[ idx ] );
+                }
                 return count;
+        }
+//=============================================================================================
+// I/O Arbiter
+//=============================================================================================
+        static $io_context * __ioarbiter_allocate( $io_arbiter & mutex this, processor * proc, __u32 idxs[], __u32 want ) {
+                __cfadbg_print_safe(io, "Kernel I/O : arbiter allocating\n");
+                __STATS__( false, io.alloc.block += 1; )
+                // No one has any resources left, wait for something to finish
+                // Mark as pending
+                __atomic_store_n( &this.pending.flag, true, __ATOMIC_SEQ_CST );
+                // Wait for our turn to submit
+                wait( this.pending.blocked, want );
+                __attribute((unused)) bool ret =
+                __alloc( this.pending.ctx, idxs, want);
+                /* paranoid */ verify( ret );
+                return this.pending.ctx;
+        }
+        static void __ioarbiter_notify( $io_arbiter & mutex this, $io_context * ctx ) {
+                /* paranoid */ verify( !is_empty(this.pending.blocked) );
+                this.pending.ctx = ctx;
+                while( !is_empty(this.pending.blocked) ) {
+                        __cfadbg_print_safe(io, "Kernel I/O : notifying\n");
+                        __u32 have = ctx->sq.free_ring.tail - ctx->sq.free_ring.head;
+                        __u32 want = front( this.pending.blocked );
+                        if( have > want ) return;
+                        signal_block( this.pending.blocked );
+                }
+                this.pending.flag = false;
+        }
+        static void __ioarbiter_notify( $io_context & ctx ) {
+                if(__atomic_load_n( &ctx.arbiter->pending.flag, __ATOMIC_SEQ_CST)) {
+                        __ioarbiter_notify( *ctx.arbiter, &ctx );
+                }
+        }
+        // Simply append to the pending
+        static void __ioarbiter_submit( $io_arbiter & mutex this, $io_context * ctx, __u32 idxs[], __u32 have, bool lazy ) {
+                __cfadbg_print_safe(io, "Kernel I/O : submitting %u from the arbiter to context %u\n", have, ctx->fd);
+                /* paranoid */ verify( &this == ctx->arbiter );
+                // Mark as pending
+                __atomic_store_n( &ctx->ext_sq.empty, false, __ATOMIC_SEQ_CST );
+                __cfadbg_print_safe(io, "Kernel I/O : waiting to submit %u\n", have);
+                // Wait for our turn to submit
+                wait( ctx->ext_sq.blocked );
+                // Submit our indexes
+                __submit(ctx, idxs, have, lazy);
+                __cfadbg_print_safe(io, "Kernel I/O : %u submitted from arbiter\n", have);
+        }
+        static void __ioarbiter_flush( $io_arbiter & mutex this, $io_context * ctx ) {
+                /* paranoid */ verify( &this == ctx->arbiter );
+                __STATS__( false, io.flush.external += 1; )
+                __cfadbg_print_safe(io, "Kernel I/O : arbiter flushing\n");
+                condition & blcked = ctx->ext_sq.blocked;
+                /* paranoid */ verify( ctx->ext_sq.empty == is_empty( blcked ) );
+                while(!is_empty( blcked )) {
+                        signal_block( blcked );
+                }
+                ctx->ext_sq.empty = true;
+        void __sqe_clean( volatile struct io_uring_sqe * sqe ) {
+                __clean( sqe );
+        }
+        static inline void __clean( volatile struct io_uring_sqe * sqe ) {
+                // If we are in debug mode, thrash the fields to make sure we catch reclamation errors
+                __cfaabi_dbg_debug_do(
+                        memset(sqe, 0xde, sizeof(*sqe));
+                        sqe->opcode = (sizeof(opcodes) / sizeof(const char *)) - 1;
+                );
+                // Mark the entry as unused
+                __atomic_store_n(&sqe->user_data, 3ul64, __ATOMIC_SEQ_CST);
+        }
 #endif

libcfa/src/concurrency/io/call.cfa.in

-              r266ecf1
+              r182256b
                         | IOSQE_IO_DRAIN
                 #endif
+                #if defined(CFA_HAVE_IOSQE_ASYNC)
+                        | IOSQE_ASYNC
+                #endif
+        ;
+        static const __u32 LINK_FLAGS = 0
                 #if defined(CFA_HAVE_IOSQE_IO_LINK)
                         | IOSQE_IO_LINK
 …
                         | IOSQE_IO_HARDLINK
                 #endif
-                #if defined(CFA_HAVE_IOSQE_ASYNC)
-                        | IOSQE_ASYNC
-                #endif
-                #if defined(CFA_HAVE_IOSQE_BUFFER_SELECTED)
-                        | IOSQE_BUFFER_SELECTED
-                #endif
+        ;
 …
+        ;
+        extern struct $io_context * cfa_io_allocate(struct io_uring_sqe * out_sqes[], __u32 out_idxs[], __u32 want)  __attribute__((nonnull (1,2)));
+        extern void cfa_io_submit( struct $io_context * in_ctx, __u32 in_idxs[], __u32 have, bool lazy ) __attribute__((nonnull (1,2)));
+        extern [* volatile struct io_uring_sqe, __u32] __submit_alloc( struct __io_data & ring, __u64 data );
+        extern void __submit( struct io_context * ctx, __u32 idx ) __attribute__((nonnull (1)));
+        static inline io_context * __get_io_context( void ) {
+                cluster * cltr = active_cluster();
+                /* paranoid */ verifyf( cltr, "No active cluster for io operation\\n");
+                assertf( cltr->io.cnt > 0, "Cluster %p has no default io contexts and no context was specified\\n", cltr );
+                /* paranoid */ verifyf( cltr->io.ctxs, "default io contexts for cluster %p are missing\\n", cltr);
+                return &cltr->io.ctxs[ thread_rand() % cltr->io.cnt ];
+        }
 #endif
 …
 extern "C" {
         #include <asm/types.h>
+        #include <sys/types.h>
         #include <sys/socket.h>
         #include <sys/syscall.h>
 …
                 return ', '.join(args_a)
 AsyncTemplate = """inline void async_{name}(io_future_t & future, {params}, __u64 submit_flags) {{
+AsyncTemplate = """inline void async_{name}(io_future_t & future, {params}, int submit_flags, io_cancellation * cancellation, io_context * context) {{
         #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_{op})
                 ssize_t res = {name}({args});
 …
                 }}
         #else
+                // we don't support LINK yet
+                if( 0 != (submit_flags & LINK_FLAGS) ) {{
+                        errno = ENOTSUP; return -1;
+                }}
+                if( !context ) {{
+                        context = __get_io_context();
+                }}
+                if(cancellation) {{
+                        cancellation->target = (__u64)(uintptr_t)&future;
+                }}
                 __u8 sflags = REGULAR_FLAGS & submit_flags;
+                struct __io_data & ring = *context->thrd.ring;
                 __u32 idx;
                 struct io_uring_sqe * sqe;
                 struct $io_context * ctx = cfa_io_allocate( &sqe, &idx, 1 );
+                [(volatile struct io_uring_sqe *) sqe, idx] = __submit_alloc( ring, (__u64)(uintptr_t)&future );
                 sqe->opcode = IORING_OP_{op};
-                sqe->user_data = (__u64)(uintptr_t)&future;
                 sqe->flags = sflags;
                 sqe->ioprio = 0;
 …
                 verify( sqe->user_data == (__u64)(uintptr_t)&future );
                 cfa_io_submit( ctx, &idx, 1, 0 != (submit_flags & CFA_IO_LAZY) );
+                __submit( context, idx );
         #endif
 }}"""
+SyncTemplate = """{ret} cfa_{name}({params}, __u64 submit_flags) {{
+SyncTemplate = """{ret} cfa_{name}({params}, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {{
+        if( timeout >= 0 ) {{
+                errno = ENOTSUP;
+                return -1;
+        }}
         io_future_t future;
         async_{name}( future, {args}, submit_flags );
+        async_{name}( future, {args}, submit_flags, cancellation, context );
         wait( future );
 …
         if c.define:
                 print("""#if defined({define})
         {ret} cfa_{name}({params}, __u64 submit_flags);
+        {ret} cfa_{name}({params}, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
 #endif""".format(define=c.define,ret=c.ret, name=c.name, params=c.params))
         else:
                 print("{ret} cfa_{name}({params}, __u64 submit_flags);"
+                print("{ret} cfa_{name}({params}, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);"
                 .format(ret=c.ret, name=c.name, params=c.params))
 …
         if c.define:
                 print("""#if defined({define})
         void async_{name}(io_future_t & future, {params}, __u64 submit_flags);
+        void async_{name}(io_future_t & future, {params}, int submit_flags, io_cancellation * cancellation, io_context * context);
 #endif""".format(define=c.define,name=c.name, params=c.params))
         else:
                 print("void async_{name}(io_future_t & future, {params}, __u64 submit_flags);"
+                print("void async_{name}(io_future_t & future, {params}, int submit_flags, io_cancellation * cancellation, io_context * context);"
                 .format(name=c.name, params=c.params))
 print("\n")
 …
 print("""
+//-----------------------------------------------------------------------------
+bool cancel(io_cancellation & this) {
+        #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_ASYNC_CANCEL)
+                return false;
+        #else
+                io_future_t future;
+                io_context * context = __get_io_context();
+                __u8 sflags = 0;
+                struct __io_data & ring = *context->thrd.ring;
+                __u32 idx;
+                volatile struct io_uring_sqe * sqe;
+                [sqe, idx] = __submit_alloc( ring, (__u64)(uintptr_t)&future );
+                sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0;
+                sqe->opcode = IORING_OP_ASYNC_CANCEL;
+                sqe->flags = sflags;
+                sqe->addr = this.target;
+                verify( sqe->user_data == (__u64)(uintptr_t)&future );
+                __submit( context, idx );
+                wait(future);
+                if( future.result == 0 ) return true; // Entry found
+                if( future.result == -EALREADY) return true; // Entry found but in progress
+                if( future.result == -ENOENT ) return false; // Entry not found
+                return false;
+        #endif
+}
 //-----------------------------------------------------------------------------
 // Check if a function is has asynchronous

libcfa/src/concurrency/io/setup.cfa

-              r266ecf1
+              r182256b
 #if !defined(CFA_HAVE_LINUX_IO_URING_H)
+        void __kernel_io_startup() {
+                // Nothing to do without io_uring
+        }
+        void __kernel_io_shutdown() {
+                // Nothing to do without io_uring
+        }
         void ?{}(io_context_params & this) {}
+        void  ?{}($io_context & this, struct cluster & cl) {}
+        void ^?{}($io_context & this) {}
+        void __cfa_io_start( processor * proc ) {}
+        void __cfa_io_flush( processor * proc ) {}
+        void __cfa_io_stop ( processor * proc ) {}
+        $io_arbiter * create(void) { return 0p; }
+        void destroy($io_arbiter *) {}
+        void ?{}(io_context & this, struct cluster & cl) {}
+        void ?{}(io_context & this, struct cluster & cl, const io_context_params & params) {}
+        void ^?{}(io_context & this) {}
+        void ^?{}(io_context & this, bool cluster_context) {}
+        void register_fixed_files( io_context &, int *, unsigned ) {}
+        void register_fixed_files( cluster    &, int *, unsigned ) {}
 #else
 …
         void ?{}(io_context_params & this) {
                 this.num_entries = 256;
+                this.num_ready = 256;
+                this.submit_aff = -1;
+                this.eager_submits = false;
+                this.poller_submits = false;
+                this.poll_submit = false;
+                this.poll_complete = false;
+        }
 …
 //=============================================================================================
+// I/O Startup / Shutdown logic + Master Poller
+//=============================================================================================
+        // IO Master poller loop forward
+        static void * iopoll_loop( __attribute__((unused)) void * args );
+        static struct {
+                      pthread_t  thrd;    // pthread handle to io poller thread
+                      void *     stack;   // pthread stack for io poller thread
+                      int        epollfd; // file descriptor to the epoll instance
+                volatile     bool run;     // Whether or not to continue
+                volatile     bool stopped; // Whether the poller has finished running
+                volatile uint64_t epoch;   // Epoch used for memory reclamation
+        } iopoll;
+        void __kernel_io_startup(void) {
+                __cfadbg_print_safe(io_core, "Kernel : Creating EPOLL instance\n" );
+                iopoll.epollfd = epoll_create1(0);
+                if (iopoll.epollfd == -1) {
+                        abort( "internal error, epoll_create1\n");
+                }
+                __cfadbg_print_safe(io_core, "Kernel : Starting io poller thread\n" );
+                iopoll.stack   = __create_pthread( &iopoll.thrd, iopoll_loop, 0p );
+                iopoll.run     = true;
+                iopoll.stopped = false;
+                iopoll.epoch   = 0;
+        }
+        void __kernel_io_shutdown(void) {
+                // Notify the io poller thread of the shutdown
+                iopoll.run = false;
+                sigval val = { 1 };
+                pthread_sigqueue( iopoll.thrd, SIGUSR1, val );
+                // Wait for the io poller thread to finish
+                __destroy_pthread( iopoll.thrd, iopoll.stack, 0p );
+                int ret = close(iopoll.epollfd);
+                if (ret == -1) {
+                        abort( "internal error, close epoll\n");
+                }
+                // Io polling is now fully stopped
+                __cfadbg_print_safe(io_core, "Kernel : IO poller stopped\n" );
+        }
+        static void * iopoll_loop( __attribute__((unused)) void * args ) {
+                __processor_id_t id;
+                id.full_proc = false;
+                id.id = doregister(&id);
+                __cfaabi_tls.this_proc_id = &id;
+                __cfadbg_print_safe(io_core, "Kernel : IO poller thread starting\n" );
+                // Block signals to control when they arrive
+                sigset_t mask;
+                sigfillset(&mask);
+                if ( pthread_sigmask( SIG_BLOCK, &mask, 0p ) == -1 ) {
+                abort( "internal error, pthread_sigmask" );
+                }
+                sigdelset( &mask, SIGUSR1 );
+                // Create sufficient events
+                struct epoll_event events[10];
+                // Main loop
+                while( iopoll.run ) {
+                        __cfadbg_print_safe(io_core, "Kernel I/O - epoll : waiting on io_uring contexts\n");
+                        // increment the epoch to notify any deleters we are starting a new cycle
+                        __atomic_fetch_add(&iopoll.epoch, 1, __ATOMIC_SEQ_CST);
+                        // Wait for events
+                        int nfds = epoll_pwait( iopoll.epollfd, events, 10, -1, &mask );
+                        __cfadbg_print_safe(io_core, "Kernel I/O - epoll : %d io contexts events, waking up\n", nfds);
+                        // Check if an error occured
+                        if (nfds == -1) {
+                                if( errno == EINTR ) continue;
+                                abort( "internal error, pthread_sigmask" );
+                        }
+                        for(i; nfds) {
+                                $io_ctx_thread * io_ctx = ($io_ctx_thread *)(uintptr_t)events[i].data.u64;
+                                /* paranoid */ verify( io_ctx );
+                                __cfadbg_print_safe(io_core, "Kernel I/O - epoll : Unparking io poller %d (%p)\n", io_ctx->ring->fd, io_ctx);
+                                #if !defined( __CFA_NO_STATISTICS__ )
+                                        __cfaabi_tls.this_stats = io_ctx->self.curr_cluster->stats;
+                                #endif
+                                eventfd_t v;
+                                eventfd_read(io_ctx->ring->efd, &v);
+                                post( io_ctx->sem );
+                        }
+                }
+                __atomic_store_n(&iopoll.stopped, true, __ATOMIC_SEQ_CST);
+                __cfadbg_print_safe(io_core, "Kernel : IO poller thread stopping\n" );
+                unregister(&id);
+                return 0p;
+        }
+//=============================================================================================
 // I/O Context Constrution/Destruction
 //=============================================================================================
+        static void __io_uring_setup ( $io_context & this, const io_context_params & params_in, int procfd );
+        static void __io_uring_teardown( $io_context & this );
+        static void __epoll_register($io_context & ctx);
+        static void __epoll_unregister($io_context & ctx);
+        void __ioarbiter_register( $io_arbiter & mutex, $io_context & ctx );
+        void __ioarbiter_unregister( $io_arbiter & mutex, $io_context & ctx );
+        void ?{}($io_context & this, processor * proc, struct cluster & cl) {
+                /* paranoid */ verify( cl.io.arbiter );
+                this.proc = proc;
+                this.arbiter = cl.io.arbiter;
+                this.ext_sq.empty = true;
+                (this.ext_sq.blocked){};
+                __io_uring_setup( this, cl.io.params, proc->idle );
+                __cfadbg_print_safe(io_core, "Kernel I/O : Created ring for io_context %u (%p)\n", this.fd, &this);
+        }
+        void ^?{}($io_context & this) {
+                __cfadbg_print_safe(io_core, "Kernel I/O : tearing down io_context %u\n", this.fd);
+                __io_uring_teardown( this );
+                __cfadbg_print_safe(io_core, "Kernel I/O : Destroyed ring for io_context %u\n", this.fd);
+        void ?{}($io_ctx_thread & this, struct cluster & cl) { (this.self){ "IO Poller", cl }; }
+        void main( $io_ctx_thread & this );
+        static inline $thread * get_thread( $io_ctx_thread & this ) { return &this.self; }
+        void ^?{}( $io_ctx_thread & mutex this ) {}
+        static void __io_create ( __io_data & this, const io_context_params & params_in );
+        static void __io_destroy( __io_data & this );
+        void ?{}(io_context & this, struct cluster & cl, const io_context_params & params) {
+                (this.thrd){ cl };
+                this.thrd.ring = malloc();
+                __cfadbg_print_safe(io_core, "Kernel I/O : Creating ring for io_context %p\n", &this);
+                __io_create( *this.thrd.ring, params );
+                __cfadbg_print_safe(io_core, "Kernel I/O : Starting poller thread for io_context %p\n", &this);
+                this.thrd.done = false;
+                __thrd_start( this.thrd, main );
+                __cfadbg_print_safe(io_core, "Kernel I/O : io_context %p ready\n", &this);
+        }
+        void ?{}(io_context & this, struct cluster & cl) {
+                io_context_params params;
+                (this){ cl, params };
+        }
+        void ^?{}(io_context & this, bool cluster_context) {
+                __cfadbg_print_safe(io_core, "Kernel I/O : tearing down io_context %p\n", &this);
+                // Notify the thread of the shutdown
+                __atomic_store_n(&this.thrd.done, true, __ATOMIC_SEQ_CST);
+                // If this is an io_context within a cluster, things get trickier
+                $thread & thrd = this.thrd.self;
+                if( cluster_context ) {
+                        // We are about to do weird things with the threads
+                        // we don't need interrupts to complicate everything
+                        disable_interrupts();
+                        // Get cluster info
+                        cluster & cltr = *thrd.curr_cluster;
+                        /* paranoid */ verify( cltr.idles.total == 0 || &cltr == mainCluster );
+                        /* paranoid */ verify( !ready_mutate_islocked() );
+                        // We need to adjust the clean-up based on where the thread is
+                        if( thrd.state == Ready || thrd.preempted != __NO_PREEMPTION ) {
+                                // This is the tricky case
+                                // The thread was preempted or ready to run and now it is on the ready queue
+                                // but the cluster is shutting down, so there aren't any processors to run the ready queue
+                                // the solution is to steal the thread from the ready-queue and pretend it was blocked all along
+                                ready_schedule_lock();
+                                        // The thread should on the list
+                                        /* paranoid */ verify( thrd.link.next != 0p );
+                                        // Remove the thread from the ready queue of this cluster
+                                        // The thread should be the last on the list
+                                        __attribute__((unused)) bool removed = remove_head( &cltr, &thrd );
+                                        /* paranoid */ verify( removed );
+                                        thrd.link.next = 0p;
+                                        thrd.link.prev = 0p;
+                                        // Fixup the thread state
+                                        thrd.state = Blocked;
+                                        thrd.ticket = TICKET_BLOCKED;
+                                        thrd.preempted = __NO_PREEMPTION;
+                                ready_schedule_unlock();
+                                // Pretend like the thread was blocked all along
+                        }
+                        // !!! This is not an else if !!!
+                        // Ok, now the thread is blocked (whether we cheated to get here or not)
+                        if( thrd.state == Blocked ) {
+                                // This is the "easy case"
+                                // The thread is parked and can easily be moved to active cluster
+                                verify( thrd.curr_cluster != active_cluster() || thrd.curr_cluster == mainCluster );
+                                thrd.curr_cluster = active_cluster();
+                                // unpark the fast io_poller
+                                unpark( &thrd );
+                        }
+                        else {
+                                // The thread is in a weird state
+                                // I don't know what to do here
+                                abort("io_context poller thread is in unexpected state, cannot clean-up correctly\n");
+                        }
+                        // The weird thread kidnapping stuff is over, restore interrupts.
+                        enable_interrupts( __cfaabi_dbg_ctx );
+                } else {
+                        post( this.thrd.sem );
+                }
+                ^(this.thrd){};
+                __cfadbg_print_safe(io_core, "Kernel I/O : Stopped poller thread for io_context %p\n", &this);
+                __io_destroy( *this.thrd.ring );
+                __cfadbg_print_safe(io_core, "Kernel I/O : Destroyed ring for io_context %p\n", &this);
+                free(this.thrd.ring);
+        }
+        void ^?{}(io_context & this) {
+                ^(this){ false };
+        }
 …
         extern void __enable_interrupts_hard();
         static void __io_uring_setup( $io_context & this, const io_context_params & params_in, int procfd ) {
+        static void __io_create( __io_data & this, const io_context_params & params_in ) {
                 // Step 1 : call to setup
                 struct io_uring_params params;
                 memset(&params, 0, sizeof(params));
                 // if( params_in.poll_submit   ) params.flags |= IORING_SETUP_SQPOLL;
                 // if( params_in.poll_complete ) params.flags |= IORING_SETUP_IOPOLL;
+                if( params_in.poll_submit   ) params.flags |= IORING_SETUP_SQPOLL;
+                if( params_in.poll_complete ) params.flags |= IORING_SETUP_IOPOLL;
                 __u32 nentries = params_in.num_entries != 0 ? params_in.num_entries : 256;
 …
                         abort("ERROR: I/O setup 'num_entries' must be a power of 2\n");
+                }
+                if( params_in.poller_submits && params_in.eager_submits ) {
+                        abort("ERROR: I/O setup 'poller_submits' and 'eager_submits' cannot be used together\n");
+                }
                 int fd = syscall(__NR_io_uring_setup, nentries, &params );
 …
                 // Step 2 : mmap result
+                struct __sub_ring_t & sq = this.sq;
+                struct __cmp_ring_t & cq = this.cq;
+                memset( &this, 0, sizeof(struct __io_data) );
+                struct __submition_data  & sq = this.submit_q;
+                struct __completion_data & cq = this.completion_q;
                 // calculate the right ring size
 …
                 // Get the pointers from the kernel to fill the structure
                 // submit queue
+                sq.kring.head  = (volatile __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.head);
+                sq.kring.tail  = (volatile __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.tail);
+                sq.kring.array = (         __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.array);
+                sq.mask        = (   const __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_mask);
+                sq.num         = (   const __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_entries);
+                sq.flags       = (         __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.flags);
+                sq.dropped     = (         __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.dropped);
+                sq.kring.released = 0;
+                sq.free_ring.head = 0;
+                sq.free_ring.tail = *sq.num;
+                sq.free_ring.array = alloc( *sq.num, 128`align );
+                for(i; (__u32)*sq.num) {
+                        sq.free_ring.array[i] = i;
+                }
+                sq.to_submit = 0;
+                sq.head    = (volatile __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.head);
+                sq.tail    = (volatile __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.tail);
+                sq.mask    = (   const __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_mask);
+                sq.num     = (   const __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_entries);
+                sq.flags   = (         __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.flags);
+                sq.dropped = (         __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.dropped);
+                sq.array   = (         __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.array);
+                sq.prev_head = *sq.head;
+                {
+                        const __u32 num = *sq.num;
+                        for( i; num ) {
+                                __sqe_clean( &sq.sqes[i] );
+                        }
+                }
+                (sq.submit_lock){};
+                (sq.release_lock){};
+                if( params_in.poller_submits || params_in.eager_submits ) {
+                        /* paranoid */ verify( is_pow2( params_in.num_ready ) || (params_in.num_ready < 8) );
+                        sq.ready_cnt = max( params_in.num_ready, 8 );
+                        sq.ready = alloc( sq.ready_cnt, 64`align );
+                        for(i; sq.ready_cnt) {
+                                sq.ready[i] = -1ul32;
+                        }
+                        sq.prev_ready = 0;
+                }
+                else {
+                        sq.ready_cnt = 0;
+                        sq.ready = 0p;
+                        sq.prev_ready = 0;
+                }
                 // completion queue
 …
                 // io_uring_register is so f*cking slow on some machine that it
                 // will never succeed if preemption isn't hard blocked
-                __cfadbg_print_safe(io_core, "Kernel I/O : registering %d for completion with ring %d\n", procfd, fd);
                 __disable_interrupts_hard();
+                int ret = syscall( __NR_io_uring_register, fd, IORING_REGISTER_EVENTFD, &procfd, 1);
+                int efd = eventfd(0, 0);
+                if (efd < 0) {
+                        abort("KERNEL ERROR: IO_URING EVENTFD - %s\n", strerror(errno));
+                }
+                int ret = syscall( __NR_io_uring_register, fd, IORING_REGISTER_EVENTFD, &efd, 1);
                 if (ret < 0) {
                         abort("KERNEL ERROR: IO_URING EVENTFD REGISTER - %s\n", strerror(errno));
 …
                 __enable_interrupts_hard();
-                __cfadbg_print_safe(io_core, "Kernel I/O : registered %d for completion with ring %d\n", procfd, fd);
                 // some paranoid checks
 …
                 /* paranoid */ verifyf( (*sq.mask) == ((*sq.num) - 1ul32), "IO_URING Expected mask to be %u (%u entries), was %u", (*sq.num) - 1ul32, *sq.num, *sq.mask );
                 /* paranoid */ verifyf( (*sq.num) >= nentries, "IO_URING Expected %u entries, got %u", nentries, *sq.num );
                 /* paranoid */ verifyf( (*sq.kring.head) == 0, "IO_URING Expected head to be 0, got %u", *sq.kring.head );
                 /* paranoid */ verifyf( (*sq.kring.tail) == 0, "IO_URING Expected tail to be 0, got %u", *sq.kring.tail );
+                /* paranoid */ verifyf( (*sq.head) == 0, "IO_URING Expected head to be 0, got %u", *sq.head );
+                /* paranoid */ verifyf( (*sq.tail) == 0, "IO_URING Expected tail to be 0, got %u", *sq.tail );
                 // Update the global ring info
                 this.ring_flags = 0;
+                this.ring_flags = params.flags;
                 this.fd         = fd;
+        }
+        static void __io_uring_teardown( $io_context & this ) {
+                this.efd        = efd;
+                this.eager_submits  = params_in.eager_submits;
+                this.poller_submits = params_in.poller_submits;
+        }
+        static void __io_destroy( __io_data & this ) {
                 // Shutdown the io rings
                 struct __sub_ring_t & sq = this.sq;
                 struct __cmp_ring_t & cq = this.cq;
+                struct __submition_data  & sq = this.submit_q;
+                struct __completion_data & cq = this.completion_q;
                 // unmap the submit queue entries
 …
                 // close the file descriptor
                 close(this.fd);
+                free( this.sq.free_ring.array ); // Maybe null, doesn't matter
+        }
+        void __cfa_io_start( processor * proc ) {
+                proc->io.ctx = alloc();
+                (*proc->io.ctx){proc, *proc->cltr};
+        }
+        void __cfa_io_stop ( processor * proc ) {
+                ^(*proc->io.ctx){};
+                free(proc->io.ctx);
+                close(this.efd);
+                free( this.submit_q.ready ); // Maybe null, doesn't matter
+        }
 …
 // I/O Context Sleep
 //=============================================================================================
+        // static inline void __epoll_ctl($io_context & ctx, int op, const char * error) {
+        //      struct epoll_event ev;
+        //      ev.events = EPOLLIN | EPOLLONESHOT;
+        //      ev.data.u64 = (__u64)&ctx;
+        //      int ret = epoll_ctl(iopoll.epollfd, op, ctx.efd, &ev);
+        //      if (ret < 0) {
+        //              abort( "KERNEL ERROR: EPOLL %s - (%d) %s\n", error, (int)errno, strerror(errno) );
+        //      }
+        // }
+        // static void __epoll_register($io_context & ctx) {
+        //      __epoll_ctl(ctx, EPOLL_CTL_ADD, "ADD");
+        // }
+        // static void __epoll_unregister($io_context & ctx) {
+        //      // Read the current epoch so we know when to stop
+        //      size_t curr = __atomic_load_n(&iopoll.epoch, __ATOMIC_SEQ_CST);
+        //      // Remove the fd from the iopoller
+        //      __epoll_ctl(ctx, EPOLL_CTL_DEL, "REMOVE");
+        //      // Notify the io poller thread of the shutdown
+        //      iopoll.run = false;
+        //      sigval val = { 1 };
+        //      pthread_sigqueue( iopoll.thrd, SIGUSR1, val );
+        //      // Make sure all this is done
+        //      __atomic_thread_fence(__ATOMIC_SEQ_CST);
+        //      // Wait for the next epoch
+        //      while(curr == iopoll.epoch && !iopoll.stopped) Pause();
+        // }
+        // void __ioctx_prepare_block($io_context & ctx) {
+        //      __cfadbg_print_safe(io_core, "Kernel I/O - epoll : Re-arming io poller %d (%p)\n", ctx.fd, &ctx);
+        //      __epoll_ctl(ctx, EPOLL_CTL_MOD, "REARM");
+        // }
+        static inline void __ioctx_epoll_ctl($io_ctx_thread & ctx, int op, const char * error) {
+                struct epoll_event ev;
+                ev.events = EPOLLIN | EPOLLONESHOT;
+                ev.data.u64 = (__u64)&ctx;
+                int ret = epoll_ctl(iopoll.epollfd, op, ctx.ring->efd, &ev);
+                if (ret < 0) {
+                        abort( "KERNEL ERROR: EPOLL %s - (%d) %s\n", error, (int)errno, strerror(errno) );
+                }
+        }
+        void __ioctx_register($io_ctx_thread & ctx) {
+                __ioctx_epoll_ctl(ctx, EPOLL_CTL_ADD, "ADD");
+        }
+        void __ioctx_prepare_block($io_ctx_thread & ctx) {
+                __cfadbg_print_safe(io_core, "Kernel I/O - epoll : Re-arming io poller %d (%p)\n", ctx.ring->fd, &ctx);
+                __ioctx_epoll_ctl(ctx, EPOLL_CTL_MOD, "REARM");
+        }
+        void __ioctx_unregister($io_ctx_thread & ctx) {
+                // Read the current epoch so we know when to stop
+                size_t curr = __atomic_load_n(&iopoll.epoch, __ATOMIC_SEQ_CST);
+                // Remove the fd from the iopoller
+                __ioctx_epoll_ctl(ctx, EPOLL_CTL_DEL, "REMOVE");
+                // Notify the io poller thread of the shutdown
+                iopoll.run = false;
+                sigval val = { 1 };
+                pthread_sigqueue( iopoll.thrd, SIGUSR1, val );
+                // Make sure all this is done
+                __atomic_thread_fence(__ATOMIC_SEQ_CST);
+                // Wait for the next epoch
+                while(curr == iopoll.epoch && !iopoll.stopped) Pause();
+        }
 //=============================================================================================
 // I/O Context Misc Setup
 //=============================================================================================
+        void ?{}( $io_arbiter & this ) {
+                this.pending.flag = false;
+        }
+        void ^?{}( $io_arbiter & mutex this ) {
+                // /* paranoid */ verify( empty(this.assigned) );
+                // /* paranoid */ verify( empty(this.available) );
+                /* paranoid */ verify( is_empty(this.pending.blocked) );
+        }
+        $io_arbiter * create(void) {
+                return new();
+        }
+        void destroy($io_arbiter * arbiter) {
+                delete(arbiter);
+        }
+//=============================================================================================
+// I/O Context Misc Setup
+//=============================================================================================
+        void register_fixed_files( io_context & ctx, int * files, unsigned count ) {
+                int ret = syscall( __NR_io_uring_register, ctx.thrd.ring->fd, IORING_REGISTER_FILES, files, count );
+                if( ret < 0 ) {
+                        abort( "KERNEL ERROR: IO_URING REGISTER - (%d) %s\n", (int)errno, strerror(errno) );
+                }
+                __cfadbg_print_safe( io_core, "Kernel I/O : Performed io_register for %p, returned %d\n", active_thread(), ret );
+        }
+        void register_fixed_files( cluster & cltr, int * files, unsigned count ) {
+                for(i; cltr.io.cnt) {
+                        register_fixed_files( cltr.io.ctxs[i], files, count );
+                }
+        }
 #endif

libcfa/src/concurrency/io/types.hfa

-              r266ecf1
+              r182256b
 #if defined(CFA_HAVE_LINUX_IO_URING_H)
+        #include "bits/sequence.hfa"
+        #include "monitor.hfa"
+        #define LEADER_LOCK
+        struct __leaderlock_t {
+                struct $thread * volatile value;        // ($thread) next_leader | (bool:1) is_locked
+        };
+        struct processor;
+        monitor $io_arbiter;
+        static inline void ?{}( __leaderlock_t & this ) { this.value = 0p; }
         //-----------------------------------------------------------------------
         // Ring Data structure
+      struct __sub_ring_t {
+                struct {
+                        // Head and tail of the ring (associated with array)
+                        volatile __u32 * head;   // one passed last index consumed by the kernel
+                        volatile __u32 * tail;   // one passed last index visible to the kernel
+                        volatile __u32 released; // one passed last index released back to the free list
+      struct __submition_data {
+                // Head and tail of the ring (associated with array)
+                volatile __u32 * head;
+                volatile __u32 * tail;
+                volatile __u32 prev_head;
+                        // The actual kernel ring which uses head/tail
+                        // indexes into the sqes arrays
+                        __u32 * array;
+                } kring;
+                struct {
+                        volatile __u32 head;
+                        volatile __u32 tail;
+                        // The ring which contains free allocations
+                        // indexes into the sqes arrays
+                        __u32 * array;
+                } free_ring;
+                // number of sqes to submit on next system call.
+                __u32 to_submit;
+                // The actual kernel ring which uses head/tail
+                // indexes into the sqes arrays
+                __u32 * array;
                 // number of entries and mask to go with it
 …
                 const __u32 * mask;
                 // Submission flags, currently only IORING_SETUP_SQPOLL
+                // Submission flags (Not sure what for)
                 __u32 * flags;
+                // number of sqes not submitted
+                // From documentation : [dropped] is incremented for each invalid submission queue entry encountered in the ring buffer.
+                // number of sqes not submitted (whatever that means)
                 __u32 * dropped;
+                // Like head/tail but not seen by the kernel
+                volatile __u32 * ready;
+                __u32 ready_cnt;
+                __u32 prev_ready;
+                #if defined(LEADER_LOCK)
+                        __leaderlock_t submit_lock;
+                #else
+                        __spinlock_t submit_lock;
+                #endif
+                __spinlock_t  release_lock;
                 // A buffer of sqes (not the actual ring)
                 struct io_uring_sqe * sqes;
+                volatile struct io_uring_sqe * sqes;
                 // The location and size of the mmaped area
 …
         };
         struct __cmp_ring_t {
+        struct __completion_data {
                 // Head and tail of the ring
                 volatile __u32 * head;
 …
                 const __u32 * num;
                 // I don't know what this value is for
+                // number of cqes not submitted (whatever that means)
                 __u32 * overflow;
 …
         };
+        struct __attribute__((aligned(128))) $io_context {
+                $io_arbiter * arbiter;
+                processor * proc;
+                struct {
+                        volatile bool empty;
+                        condition blocked;
+                } ext_sq;
+                struct __sub_ring_t sq;
+                struct __cmp_ring_t cq;
+        struct __io_data {
+                struct __submition_data submit_q;
+                struct __completion_data completion_q;
                 __u32 ring_flags;
                 int fd;
+        };
+        monitor __attribute__((aligned(128))) $io_arbiter {
+                struct {
+                        condition blocked;
+                        $io_context * ctx;
+                        volatile bool flag;
+                } pending;
+                int efd;
+                bool eager_submits:1;
+                bool poller_submits:1;
         };
 …
         #endif
+        // void __ioctx_prepare_block($io_context & ctx);
+        struct $io_ctx_thread;
+        void __ioctx_register($io_ctx_thread & ctx);
+        void __ioctx_unregister($io_ctx_thread & ctx);
+        void __ioctx_prepare_block($io_ctx_thread & ctx);
+        void __sqe_clean( volatile struct io_uring_sqe * sqe );
 #endif

libcfa/src/concurrency/iofwd.hfa

-              r266ecf1
+              r182256b
 #include <unistd.h>
 extern "C" {
         #include <asm/types.h>
+        #include <sys/types.h>
         #if CFA_HAVE_LINUX_IO_URING_H
                 #include <linux/io_uring.h>
 …
 struct cluster;
 struct io_future_t;
+struct $io_context;
+struct io_context;
+struct io_cancellation;
 struct iovec;
 …
 struct sockaddr;
 struct statx;
-struct epoll_event;
-//----------
-// underlying calls
-extern struct $io_context * cfa_io_allocate(struct io_uring_sqe * out_sqes[], __u32 out_idxs[], __u32 want)  __attribute__((nonnull (1,2)));
-extern void cfa_io_submit( struct $io_context * in_ctx, __u32 in_idxs[], __u32 have, bool lazy ) __attribute__((nonnull (1,2)));
 //----------
 // synchronous calls
 #if defined(CFA_HAVE_PREADV2)
         extern ssize_t cfa_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, __u64 submit_flags);
+        extern ssize_t cfa_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
 #endif
 #if defined(CFA_HAVE_PWRITEV2)
         extern ssize_t cfa_pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, __u64 submit_flags);
+        extern ssize_t cfa_pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
 #endif
 extern int cfa_fsync(int fd, __u64 submit_flags);
 extern int cfa_epoll_ctl(int epfd, int op, int fd, struct epoll_event *event, __u64 submit_flags);
 extern int cfa_sync_file_range(int fd, off64_t offset, off64_t nbytes, unsigned int flags, __u64 submit_flags);
 extern  ssize_t cfa_sendmsg(int sockfd, const struct msghdr *msg, int flags, __u64 submit_flags);
 extern ssize_t cfa_recvmsg(int sockfd, struct msghdr *msg, int flags, __u64 submit_flags);
 extern ssize_t cfa_send(int sockfd, const void *buf, size_t len, int flags, __u64 submit_flags);
 extern ssize_t cfa_recv(int sockfd, void *buf, size_t len, int flags, __u64 submit_flags);
 extern int cfa_accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags, __u64 submit_flags);
 extern int cfa_connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen, __u64 submit_flags);
 extern int cfa_fallocate(int fd, int mode, off_t offset, off_t len, __u64 submit_flags);
 extern int cfa_posix_fadvise(int fd, off_t offset, off_t len, int advice, __u64 submit_flags);
 extern int cfa_madvise(void *addr, size_t length, int advice, __u64 submit_flags);
 extern int cfa_openat(int dirfd, const char *pathname, int flags, mode_t mode, __u64 submit_flags);
+extern int cfa_fsync(int fd, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern int cfa_epoll_ctl(int epfd, int op, int fd, struct epoll_event *event, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern int cfa_sync_file_range(int fd, off64_t offset, off64_t nbytes, unsigned int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern  ssize_t cfa_sendmsg(int sockfd, const struct msghdr *msg, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern ssize_t cfa_recvmsg(int sockfd, struct msghdr *msg, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern ssize_t cfa_send(int sockfd, const void *buf, size_t len, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern ssize_t cfa_recv(int sockfd, void *buf, size_t len, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern int cfa_accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern int cfa_connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern int cfa_fallocate(int fd, int mode, off_t offset, off_t len, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern int cfa_posix_fadvise(int fd, off_t offset, off_t len, int advice, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern int cfa_madvise(void *addr, size_t length, int advice, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern int cfa_openat(int dirfd, const char *pathname, int flags, mode_t mode, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
 #if defined(CFA_HAVE_OPENAT2)
         extern int cfa_openat2(int dirfd, const char *pathname, struct open_how * how, size_t size, __u64 submit_flags);
+        extern int cfa_openat2(int dirfd, const char *pathname, struct open_how * how, size_t size, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
 #endif
 extern int cfa_close(int fd, __u64 submit_flags);
+extern int cfa_close(int fd, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
 #if defined(CFA_HAVE_STATX)
         extern int cfa_statx(int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf, __u64 submit_flags);
+        extern int cfa_statx(int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
 #endif
 extern ssize_t cfa_read(int fd, void * buf, size_t count, __u64 submit_flags);
 extern ssize_t cfa_write(int fd, void * buf, size_t count, __u64 submit_flags);
 extern ssize_t cfa_splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags, __u64 submit_flags);
 extern ssize_t cfa_tee(int fd_in, int fd_out, size_t len, unsigned int flags, __u64 submit_flags);
+extern ssize_t cfa_read(int fd, void * buf, size_t count, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern ssize_t cfa_write(int fd, void * buf, size_t count, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern ssize_t cfa_splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
+extern ssize_t cfa_tee(int fd_in, int fd_out, size_t len, unsigned int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context);
 //----------
 // asynchronous calls
 #if defined(CFA_HAVE_PREADV2)
         extern void async_preadv2(io_future_t & future, int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, __u64 submit_flags);
+        extern void async_preadv2(io_future_t & future, int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
 #endif
 #if defined(CFA_HAVE_PWRITEV2)
         extern void async_pwritev2(io_future_t & future, int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, __u64 submit_flags);
+        extern void async_pwritev2(io_future_t & future, int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
 #endif
 extern void async_fsync(io_future_t & future, int fd, __u64 submit_flags);
 extern void async_epoll_ctl(io_future_t & future, int epfd, int op, int fd, struct epoll_event *event, __u64 submit_flags);
 extern void async_sync_file_range(io_future_t & future, int fd, off64_t offset, off64_t nbytes, unsigned int flags, __u64 submit_flags);
 extern void async_sendmsg(io_future_t & future, int sockfd, const struct msghdr *msg, int flags, __u64 submit_flags);
 extern void async_recvmsg(io_future_t & future, int sockfd, struct msghdr *msg, int flags, __u64 submit_flags);
 extern void async_send(io_future_t & future, int sockfd, const void *buf, size_t len, int flags, __u64 submit_flags);
 extern void async_recv(io_future_t & future, int sockfd, void *buf, size_t len, int flags, __u64 submit_flags);
 extern void async_accept4(io_future_t & future, int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags, __u64 submit_flags);
 extern void async_connect(io_future_t & future, int sockfd, const struct sockaddr *addr, socklen_t addrlen, __u64 submit_flags);
 extern void async_fallocate(io_future_t & future, int fd, int mode, off_t offset, off_t len, __u64 submit_flags);
 extern void async_posix_fadvise(io_future_t & future, int fd, off_t offset, off_t len, int advice, __u64 submit_flags);
 extern void async_madvise(io_future_t & future, void *addr, size_t length, int advice, __u64 submit_flags);
 extern void async_openat(io_future_t & future, int dirfd, const char *pathname, int flags, mode_t mode, __u64 submit_flags);
+extern void async_fsync(io_future_t & future, int fd, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_epoll_ctl(io_future_t & future, int epfd, int op, int fd, struct epoll_event *event, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_sync_file_range(io_future_t & future, int fd, off64_t offset, off64_t nbytes, unsigned int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_sendmsg(io_future_t & future, int sockfd, const struct msghdr *msg, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_recvmsg(io_future_t & future, int sockfd, struct msghdr *msg, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_send(io_future_t & future, int sockfd, const void *buf, size_t len, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_recv(io_future_t & future, int sockfd, void *buf, size_t len, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_accept4(io_future_t & future, int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_connect(io_future_t & future, int sockfd, const struct sockaddr *addr, socklen_t addrlen, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_fallocate(io_future_t & future, int fd, int mode, off_t offset, off_t len, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_posix_fadvise(io_future_t & future, int fd, off_t offset, off_t len, int advice, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_madvise(io_future_t & future, void *addr, size_t length, int advice, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_openat(io_future_t & future, int dirfd, const char *pathname, int flags, mode_t mode, int submit_flags, io_cancellation * cancellation, io_context * context);
 #if defined(CFA_HAVE_OPENAT2)
         extern void async_openat2(io_future_t & future, int dirfd, const char *pathname, struct open_how * how, size_t size, __u64 submit_flags);
+        extern void async_openat2(io_future_t & future, int dirfd, const char *pathname, struct open_how * how, size_t size, int submit_flags, io_cancellation * cancellation, io_context * context);
 #endif
 extern void async_close(io_future_t & future, int fd, __u64 submit_flags);
+extern void async_close(io_future_t & future, int fd, int submit_flags, io_cancellation * cancellation, io_context * context);
 #if defined(CFA_HAVE_STATX)
         extern void async_statx(io_future_t & future, int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf, __u64 submit_flags);
+        extern void async_statx(io_future_t & future, int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf, int submit_flags, io_cancellation * cancellation, io_context * context);
 #endif
 void async_read(io_future_t & future, int fd, void * buf, size_t count, __u64 submit_flags);
 extern void async_write(io_future_t & future, int fd, void * buf, size_t count, __u64 submit_flags);
 extern void async_splice(io_future_t & future, int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags, __u64 submit_flags);
 extern void async_tee(io_future_t & future, int fd_in, int fd_out, size_t len, unsigned int flags, __u64 submit_flags);
+void async_read(io_future_t & future, int fd, void * buf, size_t count, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_write(io_future_t & future, int fd, void * buf, size_t count, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_splice(io_future_t & future, int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
+extern void async_tee(io_future_t & future, int fd_in, int fd_out, size_t len, unsigned int flags, int submit_flags, io_cancellation * cancellation, io_context * context);
 …
 // Check if a function is blocks a only the user thread
 bool has_user_level_blocking( fptr_t func );
+//-----------------------------------------------------------------------------
+void register_fixed_files( io_context & ctx , int * files, unsigned count );
+void register_fixed_files( cluster    & cltr, int * files, unsigned count );

libcfa/src/concurrency/kernel.cfa

-              r266ecf1
+              r182256b
 #include <signal.h>
 #include <unistd.h>
-extern "C" {
-        #include <sys/eventfd.h>
+}
 //CFA Includes
 …
 static void __run_thread(processor * this, $thread * dst);
 static void __wake_one(cluster * cltr);
+static void wait(__bin_sem_t & this);
 static void push  (__cluster_idles & idles, processor & proc);
 …
 static [unsigned idle, unsigned total, * processor] query( & __cluster_idles idles );
-extern void __cfa_io_start( processor * );
-extern void __cfa_io_drain( processor * );
-extern void __cfa_io_flush( processor * );
-extern void __cfa_io_stop ( processor * );
-static inline void __maybe_io_drain( processor * );
-extern void __disable_interrupts_hard();
-extern void __enable_interrupts_hard();
 //=============================================================================================
 …
         verify(this);
-        __cfa_io_start( this );
         __cfadbg_print_safe(runtime_core, "Kernel : core %p starting\n", this);
         #if !defined(__CFA_NO_STATISTICS__)
 …
                 MAIN_LOOP:
                 for() {
-                        // Check if there is pending io
-                        __maybe_io_drain( this );
                         // Try to get the next thread
                         readyThread = __next_thread( this->cltr );
                         if( !readyThread ) {
-                                __cfa_io_flush( this );
                                 readyThread = __next_thread_slow( this->cltr );
+                        }
 …
                                 #endif
+                                __cfadbg_print_safe(runtime_core, "Kernel : core %p waiting on eventfd %d\n", this, this->idle);
+                                __disable_interrupts_hard();
+                                eventfd_t val;
+                                eventfd_read( this->idle, &val );
+                                __enable_interrupts_hard();
+                                wait( this->idle );
                                 #if !defined(__CFA_NO_STATISTICS__)
 …
                         /* paranoid */ verify( readyThread );
-                        // Reset io dirty bit
-                        this->io.dirty = false;
                         // We found a thread run it
 …
+                                }
                         #endif
-                        if(this->io.pending && !this->io.dirty) {
-                                __cfa_io_flush( this );
+                        }
+                }
 …
+        }
-        __cfa_io_stop( this );
         post( this->terminated );
         if(this == mainProcessor) {
 …
         /* paranoid */ verifyf( thrd_dst->link.next == 0p, "Expected null got %p", thrd_dst->link.next );
         __builtin_prefetch( thrd_dst->context.SP );
-        __cfadbg_print_safe(runtime_core, "Kernel : core %p running thread %p (%s)\n", this, thrd_dst, thrd_dst->self_cor.name);
         $coroutine * proc_cor = get_coroutine(this->runner);
 …
         // Just before returning to the processor, set the processor coroutine to active
         proc_cor->state = Active;
-        __cfadbg_print_safe(runtime_core, "Kernel : core %p finished running thread %p\n", this, thrd_dst);
         /* paranoid */ verify( ! __preemption_enabled() );
 …
 // Kernel Idle Sleep
 //=============================================================================================
+extern "C" {
+        char * strerror(int);
+}
+#define CHECKED(x) { int err = x; if( err != 0 ) abort("KERNEL ERROR: Operation \"" #x "\" return error %d - %s\n", err, strerror(err)); }
+static void wait(__bin_sem_t & this) with( this ) {
+        verify(__cfaabi_dbg_in_kernel());
+        CHECKED( pthread_mutex_lock(&lock) );
+                while(val < 1) {
+                        pthread_cond_wait(&cond, &lock);
+                }
+                val -= 1;
+        CHECKED( pthread_mutex_unlock(&lock) );
+}
+static bool post(__bin_sem_t & this) with( this ) {
+        bool needs_signal = false;
+        CHECKED( pthread_mutex_lock(&lock) );
+                if(val < 1) {
+                        val += 1;
+                        pthread_cond_signal(&cond);
+                        needs_signal = true;
+                }
+        CHECKED( pthread_mutex_unlock(&lock) );
+        return needs_signal;
+}
+#undef CHECKED
 // Wake a thread from the front if there are any
 static void __wake_one(cluster * this) {
 …
         // We found a processor, wake it up
+        eventfd_t val;
+        val = 1;
+        eventfd_write( p->idle, val );
+        post( p->idle );
         #if !defined(__CFA_NO_STATISTICS__)
 …
         disable_interrupts();
                 /* paranoid */ verify( ! __preemption_enabled() );
+                eventfd_t val;
+                val = 1;
+                eventfd_write( this->idle, val );
+                post( this->idle );
         enable_interrupts( __cfaabi_dbg_ctx );
+}
 …
 // Kernel Utilities
 //=============================================================================================
-#if defined(CFA_HAVE_LINUX_IO_URING_H)
-#include "io/types.hfa"
-#endif
-static inline void __maybe_io_drain( processor * proc ) {
-        #if defined(CFA_HAVE_LINUX_IO_URING_H)
-                __cfadbg_print_safe(runtime_core, "Kernel : core %p checking io for ring %d\n", proc, proc->io.ctx->fd);
-                // Check if we should drain the queue
-                $io_context * ctx = proc->io.ctx;
-                unsigned head = *ctx->cq.head;
-                unsigned tail = *ctx->cq.tail;
-                if(head != tail) __cfa_io_drain( proc );
-        #endif
+}
 //-----------------------------------------------------------------------------
 // Debug

libcfa/src/concurrency/kernel.hfa

-              r266ecf1
+              r182256b
+}
+//-----------------------------------------------------------------------------
+// Underlying Locks
 #ifdef __CFA_WITH_VERIFY__
         extern bool __cfaabi_dbg_in_kernel();
 #endif
+//-----------------------------------------------------------------------------
+// I/O
+struct cluster;
+struct $io_context;
+struct $io_arbiter;
+struct io_context_params {
+        int num_entries;
+};
+void  ?{}(io_context_params & this);
+struct __bin_sem_t {
+        pthread_mutex_t         lock;
+        pthread_cond_t          cond;
+        int                     val;
+};
 //-----------------------------------------------------------------------------
 …
         pthread_t kernel_thread;
-        struct {
-                $io_context * ctx;
-                bool pending;
-                bool dirty;
-        } io;
         // Preemption data
         // Node which is added in the discrete event simulaiton
 …
         // Idle lock (kernel semaphore)
         int idle;
+        __bin_sem_t idle;
         // Termination synchronisation (user semaphore)
 …
 DLISTED_MGD_IMPL_OUT(processor)
+//-----------------------------------------------------------------------------
+// I/O
+struct __io_data;
+// IO poller user-thread
+// Not using the "thread" keyword because we want to control
+// more carefully when to start/stop it
+struct $io_ctx_thread {
+        struct __io_data * ring;
+        single_sem sem;
+        volatile bool done;
+        $thread self;
+};
+struct io_context {
+        $io_ctx_thread thrd;
+};
+struct io_context_params {
+        int num_entries;
+        int num_ready;
+        int submit_aff;
+        bool eager_submits:1;
+        bool poller_submits:1;
+        bool poll_submit:1;
+        bool poll_complete:1;
+};
+void  ?{}(io_context_params & this);
+void  ?{}(io_context & this, struct cluster & cl);
+void  ?{}(io_context & this, struct cluster & cl, const io_context_params & params);
+void ^?{}(io_context & this);
+struct io_cancellation {
+        __u64 target;
+};
+static inline void  ?{}(io_cancellation & this) { this.target = -1u; }
+static inline void ^?{}(io_cancellation &) {}
+bool cancel(io_cancellation & this);
 //-----------------------------------------------------------------------------
 …
         struct {
                 $io_arbiter * arbiter;
                 io_context_params params;
+                io_context * ctxs;
+                unsigned cnt;
         } io;

libcfa/src/concurrency/kernel/startup.cfa

-              r266ecf1
+              r182256b
 extern "C" {
       #include <limits.h>       // PTHREAD_STACK_MIN
-        #include <sys/eventfd.h>  // eventfd
       #include <sys/mman.h>     // mprotect
       #include <sys/resource.h> // getrlimit
 …
 static void ?{}(processorCtx_t & this) {}
 static void ?{}(processorCtx_t & this, processor * proc, current_stack_info_t * info);
+static void ?{}(__bin_sem_t & this);
+static void ^?{}(__bin_sem_t & this);
 #if defined(__CFA_WITH_VERIFY__)
 …
 extern void __kernel_alarm_startup(void);
 extern void __kernel_alarm_shutdown(void);
+extern void __kernel_io_startup (void);
+extern void __kernel_io_shutdown(void);
 //-----------------------------------------------------------------------------
 …
 KERNEL_STORAGE($thread,              mainThread);
 KERNEL_STORAGE(__stack_t,            mainThreadCtx);
+KERNEL_STORAGE(io_context,           mainPollerThread);
 KERNEL_STORAGE(__scheduler_RWLock_t, __scheduler_lock);
 #if !defined(__CFA_NO_STATISTICS__)
 …
         void ?{}(processor & this) with( this ) {
+                ( this.idle ){};
                 ( this.terminated ){};
                 ( this.runner ){};
 …
         __kernel_alarm_startup();
+        // Start IO
+        __kernel_io_startup();
         // Add the main thread to the ready queue
         // once resume is called on mainProcessor->runner the mainThread needs to be scheduled like any normal thread
 …
         // THE SYSTEM IS NOW COMPLETELY RUNNING
+        // SKULLDUGGERY: The constructor for the mainCluster will call alloc with a dimension of 0
+        // malloc *can* return a non-null value, we should free it if that is the case
+        free( mainCluster->io.ctxs );
+        // Now that the system is up, finish creating systems that need threading
+        mainCluster->io.ctxs = (io_context *)&storage_mainPollerThread;
+        mainCluster->io.cnt  = 1;
+        (*mainCluster->io.ctxs){ *mainCluster };
         __cfadbg_print_safe(runtime_core, "Kernel : Started\n--------------------------------------------------\n\n");
 …
 static void __kernel_shutdown(void) {
+        //Before we start shutting things down, wait for systems that need threading to shutdown
+        ^(*mainCluster->io.ctxs){};
+        mainCluster->io.cnt  = 0;
+        mainCluster->io.ctxs = 0p;
         /* paranoid */ verify( __preemption_enabled() );
         disable_interrupts();
 …
         // Disable preemption
         __kernel_alarm_shutdown();
+        // Stop IO
+        __kernel_io_shutdown();
         // Destroy the main processor and its context in reverse order of construction
 …
         pending_preemption = false;
-        this.io.ctx = 0p;
-        this.io.pending = false;
-        this.io.dirty   = false;
-        this.idle = eventfd(0, 0);
-        if (idle < 0) {
-                abort("KERNEL ERROR: PROCESSOR EVENTFD - %s\n", strerror(errno));
+        }
         #if !defined(__CFA_NO_STATISTICS__)
                 print_stats = 0;
 …
         // Finally we don't need the read_lock any more
         unregister((__processor_id_t*)&this);
-        close(this.idle);
+}
 void ?{}(processor & this, const char name[], cluster & _cltr) {
+        ( this.idle ){};
         ( this.terminated ){};
         ( this.runner ){};
 …
         threads{ __get };
-        io.arbiter = create();
-        io.params = io_params;
         doregister(this);
 …
         ready_mutate_unlock( last_size );
         enable_interrupts_noPoll(); // Don't poll, could be in main cluster
+        this.io.cnt  = num_io;
+        this.io.ctxs = aalloc(num_io);
+        for(i; this.io.cnt) {
+                (this.io.ctxs[i]){ this, io_params };
+        }
+}
 void ^?{}(cluster & this) {
+        destroy(this.io.arbiter);
+        for(i; this.io.cnt) {
+                ^(this.io.ctxs[i]){ true };
+        }
+        free(this.io.ctxs);
         // Lock the RWlock so no-one pushes/pops while we are changing the queue
 …
+}
+extern "C" {
+        char * strerror(int);
+}
+#define CHECKED(x) { int err = x; if( err != 0 ) abort("KERNEL ERROR: Operation \"" #x "\" return error %d - %s\n", err, strerror(err)); }
+static void ?{}(__bin_sem_t & this) with( this ) {
+        // Create the mutex with error checking
+        pthread_mutexattr_t mattr;
+        pthread_mutexattr_init( &mattr );
+        pthread_mutexattr_settype( &mattr, PTHREAD_MUTEX_ERRORCHECK_NP);
+        pthread_mutex_init(&lock, &mattr);
+        pthread_cond_init (&cond, (const pthread_condattr_t *)0p);  // workaround trac#208: cast should not be required
+        val = 0;
+}
+static void ^?{}(__bin_sem_t & this) with( this ) {
+        CHECKED( pthread_mutex_destroy(&lock) );
+        CHECKED( pthread_cond_destroy (&cond) );
+}
+#undef CHECKED
 #if defined(__CFA_WITH_VERIFY__)
 static bool verify_fwd_bck_rng(void) {

libcfa/src/concurrency/kernel_private.hfa

-              r266ecf1
+              r182256b
 //-----------------------------------------------------------------------------
 // I/O
+$io_arbiter * create(void);
+void destroy($io_arbiter *);
+void ^?{}(io_context & this, bool );
 //=======================================================================

libcfa/src/concurrency/preemption.cfa

-              r266ecf1
+              r182256b
         // Setup proper signal handlers
         __cfaabi_sigaction( SIGUSR1, sigHandler_ctxSwitch, SA_SIGINFO ); // __cfactx_switch handler
         __cfaabi_sigaction( SIGALRM, sigHandler_alarm    , SA_SIGINFO ); // debug handler
+        __cfaabi_sigaction( SIGUSR1, sigHandler_ctxSwitch, SA_SIGINFO | SA_RESTART ); // __cfactx_switch handler
+        __cfaabi_sigaction( SIGALRM, sigHandler_alarm    , SA_SIGINFO | SA_RESTART ); // debug handler
         signal_block( SIGALRM );

libcfa/src/concurrency/stats.cfa

-              r266ecf1
+              r182256b
                 #if defined(CFA_HAVE_LINUX_IO_URING_H)
+                        stats->io.alloc.fast        = 0;
+                        stats->io.alloc.slow        = 0;
+                        stats->io.alloc.fail        = 0;
+                        stats->io.alloc.revoke      = 0;
+                        stats->io.alloc.block       = 0;
+                        stats->io.submit.fast       = 0;
+                        stats->io.submit.slow       = 0;
+                        stats->io.flush.external    = 0;
+                        stats->io.calls.flush       = 0;
+                        stats->io.calls.submitted   = 0;
+                        stats->io.calls.drain       = 0;
+                        stats->io.calls.completed   = 0;
+                        stats->io.calls.errors.busy = 0;
+                        stats->io.poller.sleeps     = 0;
+                        stats->io.submit_q.submit_avg.rdy = 0;
+                        stats->io.submit_q.submit_avg.csm = 0;
+                        stats->io.submit_q.submit_avg.cnt = 0;
+                        stats->io.submit_q.look_avg.val   = 0;
+                        stats->io.submit_q.look_avg.cnt   = 0;
+                        stats->io.submit_q.look_avg.block = 0;
+                        stats->io.submit_q.alloc_avg.val   = 0;
+                        stats->io.submit_q.alloc_avg.cnt   = 0;
+                        stats->io.submit_q.alloc_avg.block = 0;
+                        stats->io.submit_q.helped = 0;
+                        stats->io.submit_q.leader = 0;
+                        stats->io.submit_q.busy   = 0;
+                        stats->io.complete_q.completed_avg.val = 0;
+                        stats->io.complete_q.completed_avg.cnt = 0;
+                        stats->io.complete_q.blocks = 0;
                 #endif
+        }
 …
                 #if defined(CFA_HAVE_LINUX_IO_URING_H)
+                        __atomic_fetch_add( &cltr->io.alloc.fast       , proc->io.alloc.fast       , __ATOMIC_SEQ_CST ); proc->io.alloc.fast        = 0;
+                        __atomic_fetch_add( &cltr->io.alloc.slow       , proc->io.alloc.slow       , __ATOMIC_SEQ_CST ); proc->io.alloc.slow        = 0;
+                        __atomic_fetch_add( &cltr->io.alloc.fail       , proc->io.alloc.fail       , __ATOMIC_SEQ_CST ); proc->io.alloc.fail        = 0;
+                        __atomic_fetch_add( &cltr->io.alloc.revoke     , proc->io.alloc.revoke     , __ATOMIC_SEQ_CST ); proc->io.alloc.revoke      = 0;
+                        __atomic_fetch_add( &cltr->io.alloc.block      , proc->io.alloc.block      , __ATOMIC_SEQ_CST ); proc->io.alloc.block       = 0;
+                        __atomic_fetch_add( &cltr->io.submit.fast      , proc->io.submit.fast      , __ATOMIC_SEQ_CST ); proc->io.submit.fast       = 0;
+                        __atomic_fetch_add( &cltr->io.submit.slow      , proc->io.submit.slow      , __ATOMIC_SEQ_CST ); proc->io.submit.slow       = 0;
+                        __atomic_fetch_add( &cltr->io.flush.external   , proc->io.flush.external   , __ATOMIC_SEQ_CST ); proc->io.flush.external    = 0;
+                        __atomic_fetch_add( &cltr->io.calls.flush      , proc->io.calls.flush      , __ATOMIC_SEQ_CST ); proc->io.calls.flush       = 0;
+                        __atomic_fetch_add( &cltr->io.calls.submitted  , proc->io.calls.submitted  , __ATOMIC_SEQ_CST ); proc->io.calls.submitted   = 0;
+                        __atomic_fetch_add( &cltr->io.calls.drain      , proc->io.calls.drain      , __ATOMIC_SEQ_CST ); proc->io.calls.drain       = 0;
+                        __atomic_fetch_add( &cltr->io.calls.completed  , proc->io.calls.completed  , __ATOMIC_SEQ_CST ); proc->io.calls.completed   = 0;
+                        __atomic_fetch_add( &cltr->io.calls.errors.busy, proc->io.calls.errors.busy, __ATOMIC_SEQ_CST ); proc->io.calls.errors.busy = 0;
+                        __atomic_fetch_add( &cltr->io.poller.sleeps    , proc->io.poller.sleeps    , __ATOMIC_SEQ_CST ); proc->io.poller.sleeps     = 0;
+                        __atomic_fetch_add( &cltr->io.submit_q.submit_avg.rdy     , proc->io.submit_q.submit_avg.rdy     , __ATOMIC_SEQ_CST ); proc->io.submit_q.submit_avg.rdy      = 0;
+                        __atomic_fetch_add( &cltr->io.submit_q.submit_avg.csm     , proc->io.submit_q.submit_avg.csm     , __ATOMIC_SEQ_CST ); proc->io.submit_q.submit_avg.csm      = 0;
+                        __atomic_fetch_add( &cltr->io.submit_q.submit_avg.avl     , proc->io.submit_q.submit_avg.avl     , __ATOMIC_SEQ_CST ); proc->io.submit_q.submit_avg.avl      = 0;
+                        __atomic_fetch_add( &cltr->io.submit_q.submit_avg.cnt     , proc->io.submit_q.submit_avg.cnt     , __ATOMIC_SEQ_CST ); proc->io.submit_q.submit_avg.cnt      = 0;
+                        __atomic_fetch_add( &cltr->io.submit_q.look_avg.val       , proc->io.submit_q.look_avg.val       , __ATOMIC_SEQ_CST ); proc->io.submit_q.look_avg.val        = 0;
+                        __atomic_fetch_add( &cltr->io.submit_q.look_avg.cnt       , proc->io.submit_q.look_avg.cnt       , __ATOMIC_SEQ_CST ); proc->io.submit_q.look_avg.cnt        = 0;
+                        __atomic_fetch_add( &cltr->io.submit_q.look_avg.block     , proc->io.submit_q.look_avg.block     , __ATOMIC_SEQ_CST ); proc->io.submit_q.look_avg.block      = 0;
+                        __atomic_fetch_add( &cltr->io.submit_q.alloc_avg.val      , proc->io.submit_q.alloc_avg.val      , __ATOMIC_SEQ_CST ); proc->io.submit_q.alloc_avg.val       = 0;
+                        __atomic_fetch_add( &cltr->io.submit_q.alloc_avg.cnt      , proc->io.submit_q.alloc_avg.cnt      , __ATOMIC_SEQ_CST ); proc->io.submit_q.alloc_avg.cnt       = 0;
+                        __atomic_fetch_add( &cltr->io.submit_q.alloc_avg.block    , proc->io.submit_q.alloc_avg.block    , __ATOMIC_SEQ_CST ); proc->io.submit_q.alloc_avg.block     = 0;
+                        __atomic_fetch_add( &cltr->io.submit_q.helped             , proc->io.submit_q.helped             , __ATOMIC_SEQ_CST ); proc->io.submit_q.helped              = 0;
+                        __atomic_fetch_add( &cltr->io.submit_q.leader             , proc->io.submit_q.leader             , __ATOMIC_SEQ_CST ); proc->io.submit_q.leader              = 0;
+                        __atomic_fetch_add( &cltr->io.submit_q.busy               , proc->io.submit_q.busy               , __ATOMIC_SEQ_CST ); proc->io.submit_q.busy                = 0;
+                        __atomic_fetch_add( &cltr->io.complete_q.completed_avg.val, proc->io.complete_q.completed_avg.val, __ATOMIC_SEQ_CST ); proc->io.complete_q.completed_avg.val = 0;
+                        __atomic_fetch_add( &cltr->io.complete_q.completed_avg.cnt, proc->io.complete_q.completed_avg.cnt, __ATOMIC_SEQ_CST ); proc->io.complete_q.completed_avg.cnt = 0;
+                        __atomic_fetch_add( &cltr->io.complete_q.blocks           , proc->io.complete_q.blocks           , __ATOMIC_SEQ_CST ); proc->io.complete_q.blocks            = 0;
                 #endif
+        }
 …
                 if( flags & CFA_STATS_READY_Q ) {
+                        double push_sur = (100.0 * ((double)ready.pick.push.success) / ready.pick.push.attempt);
+                        double pop_sur  = (100.0 * ((double)ready.pick.pop .success) / ready.pick.pop .attempt);
                         double push_len = ((double)ready.pick.push.attempt) / ready.pick.push.success;
                         double pop_len  = ((double)ready.pick.pop .attempt) / ready.pick.pop .success;
+                        double lpush_sur = (100.0 * ((double)ready.pick.push.lsuccess) / ready.pick.push.local);
+                        double lpop_sur  = (100.0 * ((double)ready.pick.pop .lsuccess) / ready.pick.pop .local);
                         double lpush_len = ((double)ready.pick.push.local) / ready.pick.push.lsuccess;
 …
                         __cfaabi_bits_print_safe( STDOUT_FILENO,
                                 "----- %s \"%s\" (%p) - Ready Q Stats -----\n"
+                                "- total threads  : %'15" PRIu64 "run, %'15" PRIu64 "schd (%'" PRIu64 "mig )\n"
+                                "- push avg probe : %'3.2lf, %'3.2lfl (%'15" PRIu64 " attempts, %'15" PRIu64 " locals)\n"
+                                "- pop  avg probe : %'3.2lf, %'3.2lfl (%'15" PRIu64 " attempts, %'15" PRIu64 " locals)\n"
+                                "- Idle Sleep     : %'15" PRIu64 "h, %'15" PRIu64 "c, %'15" PRIu64 "w, %'15" PRIu64 "e\n"
+                                "- total threads run      : %'15" PRIu64 "\n"
+                                "- total threads scheduled: %'15" PRIu64 "\n"
+                                "- push average probe len : %'18.2lf, %'18.2lf%% (%'15" PRIu64 " attempts)\n"
+                                "- pop  average probe len : %'18.2lf, %'18.2lf%% (%'15" PRIu64 " attempts)\n"
+                                "- local push avg prb len : %'18.2lf, %'18.2lf%% (%'15" PRIu64 " attempts)\n"
+                                "- local pop  avg prb len : %'18.2lf, %'18.2lf%% (%'15" PRIu64 " attempts)\n"
+                                "- thread migrations      : %'15" PRIu64 "\n"
+                                "- Idle Sleep -\n"
+                                "-- halts                 : %'15" PRIu64 "\n"
+                                "-- cancelled halts       : %'15" PRIu64 "\n"
+                                "-- schedule wake         : %'15" PRIu64 "\n"
+                                "-- wake on exit          : %'15" PRIu64 "\n"
                                 "\n"
                                 , type, name, id
                                 , ready.pick.pop.success
                                 , ready.pick.push.success
+                                , push_len, push_sur, ready.pick.push.attempt
+                                , pop_len , pop_sur , ready.pick.pop .attempt
+                                , lpush_len, lpush_sur, ready.pick.push.local
+                                , lpop_len , lpop_sur , ready.pick.pop .local
                                 , ready.threads.migration
-                                , push_len, lpush_len, ready.pick.push.attempt, ready.pick.push.local
-                                , pop_len , lpop_len , ready.pick.pop .attempt, ready.pick.pop .local
                                 , ready.sleep.halts, ready.sleep.cancels, ready.sleep.wakes, ready.sleep.exits
                         );
 …
                 #if defined(CFA_HAVE_LINUX_IO_URING_H)
                         if( flags & CFA_STATS_IO ) {
                                 uint64_t total_allocs = io.alloc.fast + io.alloc.slow;
                                 double avgfasta = ((double)io.alloc.fast) / total_allocs;
+                                double avgrdy = ((double)io.submit_q.submit_avg.rdy) / io.submit_q.submit_avg.cnt;
+                                double avgcsm = ((double)io.submit_q.submit_avg.csm) / io.submit_q.submit_avg.cnt;
+                                uint64_t total_submits = io.submit.fast + io.submit.slow;
+                                double avgfasts = ((double)io.submit.fast) / total_submits;
+                                double lavgv = 0;
+                                double lavgb = 0;
+                                if(io.submit_q.look_avg.cnt != 0) {
+                                        lavgv = ((double)io.submit_q.look_avg.val  ) / io.submit_q.look_avg.cnt;
+                                        lavgb = ((double)io.submit_q.look_avg.block) / io.submit_q.look_avg.cnt;
+                                }
+                                double avgsubs = ((double)io.calls.submitted) / io.calls.flush;
+                                double avgcomp = ((double)io.calls.completed) / io.calls.drain;
+                                double aavgv = 0;
+                                double aavgb = 0;
+                                if(io.submit_q.alloc_avg.cnt != 0) {
+                                        aavgv = ((double)io.submit_q.alloc_avg.val  ) / io.submit_q.alloc_avg.cnt;
+                                        aavgb = ((double)io.submit_q.alloc_avg.block) / io.submit_q.alloc_avg.cnt;
+                                }
                                 __cfaabi_bits_print_safe( STDOUT_FILENO,
                                         "----- %s \"%s\" (%p) - I/O Stats -----\n"
+                                        "- total allocations : %'" PRIu64 "f, %'" PRIu64 "s (%'2.2lff) \n"
+                                        "-     failures      : %'" PRIu64 "oom, %'" PRIu64 "rvk, %'" PRIu64 "blk\n"
+                                        "- total submits     : %'" PRIu64 "f, %'" PRIu64 "s (%'2.2lf) \n"
+                                        "- flush external    : %'" PRIu64 "\n"
+                                        "- io_uring_enter    : %'" PRIu64 " (%'" PRIu64 ", %'" PRIu64 " EBUSY)\n"
+                                        "-     submits       : %'" PRIu64 " (%'.2lf) \n"
+                                        "-     completes     : %'" PRIu64 " (%'.2lf) \n"
+                                        "- poller sleeping   : %'" PRIu64 "\n"
+                                        "- total submit calls     : %'15" PRIu64 "\n"
+                                        "- avg ready entries      : %'18.2lf\n"
+                                        "- avg submitted entries  : %'18.2lf\n"
+                                        "- total helped entries   : %'15" PRIu64 "\n"
+                                        "- total leader entries   : %'15" PRIu64 "\n"
+                                        "- total busy submit      : %'15" PRIu64 "\n"
+                                        "- total ready search     : %'15" PRIu64 "\n"
+                                        "- avg ready search len   : %'18.2lf\n"
+                                        "- avg ready search block : %'18.2lf\n"
+                                        "- total alloc search     : %'15" PRIu64 "\n"
+                                        "- avg alloc search len   : %'18.2lf\n"
+                                        "- avg alloc search block : %'18.2lf\n"
+                                        "- total wait calls       : %'15" PRIu64 "\n"
+                                        "- avg completion/wait    : %'18.2lf\n"
+                                        "- total completion blocks: %'15" PRIu64 "\n"
                                         "\n"
                                         , type,  name, id
+                                        , io.alloc.fast, io.alloc.slow, avgfasta
+                                        , io.alloc.fail, io.alloc.revoke, io.alloc.block
+                                        , io.submit.fast, io.submit.slow, avgfasts
+                                        , io.flush.external
+                                        , io.calls.flush, io.calls.drain, io.calls.errors.busy
+                                        , io.calls.submitted, avgsubs
+                                        , io.calls.completed, avgcomp
+                                        , io.poller.sleeps
+                                        , io.submit_q.submit_avg.cnt
+                                        , avgrdy, avgcsm
+                                        , io.submit_q.helped, io.submit_q.leader, io.submit_q.busy
+                                        , io.submit_q.look_avg.cnt
+                                        , lavgv, lavgb
+                                        , io.submit_q.alloc_avg.cnt
+                                        , aavgv, aavgb
+                                        , io.complete_q.completed_avg.cnt
+                                        , ((double)io.complete_q.completed_avg.val) / io.complete_q.completed_avg.cnt
+                                        , io.complete_q.blocks
                                 );
+                        }

libcfa/src/concurrency/stats.hfa

-              r266ecf1
+              r182256b
                 struct __attribute__((aligned(64))) __stats_io_t{
                         struct {
+                                volatile uint64_t fast;
+                                volatile uint64_t slow;
+                                volatile uint64_t fail;
+                                volatile uint64_t revoke;
+                                volatile uint64_t block;
+                        } alloc;
+                                struct {
+                                        volatile uint64_t rdy;
+                                        volatile uint64_t csm;
+                                        volatile uint64_t avl;
+                                        volatile uint64_t cnt;
+                                } submit_avg;
+                                struct {
+                                        volatile uint64_t val;
+                                        volatile uint64_t cnt;
+                                        volatile uint64_t block;
+                                } look_avg;
+                                struct {
+                                        volatile uint64_t val;
+                                        volatile uint64_t cnt;
+                                        volatile uint64_t block;
+                                } alloc_avg;
+                                volatile uint64_t helped;
+                                volatile uint64_t leader;
+                                volatile uint64_t busy;
+                        } submit_q;
                         struct {
-                                volatile uint64_t fast;
-                                volatile uint64_t slow;
-                        } submit;
-                        struct {
-                                volatile uint64_t external;
-                        } flush;
-                        struct {
-                                volatile uint64_t drain;
-                                volatile uint64_t completed;
-                                volatile uint64_t flush;
-                                volatile uint64_t submitted;
                                 struct {
+                                        volatile uint64_t busy;
+                                } errors;
+                        } calls;
+                        struct {
+                                volatile uint64_t sleeps;
+                        } poller;
+                                        volatile uint64_t val;
+                                        volatile uint64_t cnt;
+                                } completed_avg;
+                                volatile uint64_t blocks;
+                        } complete_q;
                 };
         #endif

tests/Makefile.am

-              r266ecf1
+              r182256b
         -Wall \
         -Wno-unused-function \
+        -quiet @CFA_FLAGS@
+        -quiet @CFA_FLAGS@ \
+        -DIN_DIR="${abs_srcdir}/.in/"
 AM_CFAFLAGS = -XCFA --deterministic-out
 …
         long_tests.hfa \
         .in/io.data \
-        io/.in/io.data \
         avltree/avl.h \
         avltree/avl-private.h \
 …
 # don't use distcc to do the linking because distcc doesn't do linking
 % : %.cfa $(CFACCBIN)
         $(CFACOMPILETEST) -c -o $(abspath ${@}).o -DIN_DIR="$(abspath $(dir ${<}))/.in/"
+        $(CFACOMPILETEST) -c -o $(abspath ${@}).o
         $(CFACCLINK) ${@}.o -o $(abspath ${@})
         rm $(abspath ${@}).o

Context Navigation

Changes in / [266ecf1:182256b]

Legend:

Jenkins/FullBuild

benchmark/io/http/http_ring.cpp

benchmark/io/http/main.cfa

benchmark/io/http/options.cfa

benchmark/io/http/options.hfa

benchmark/io/http/protocol.cfa

benchmark/io/http/protocol.hfa

benchmark/io/http/worker.cfa

benchmark/io/http/worker.hfa

libcfa/configure.ac

libcfa/prelude/defines.hfa.in

libcfa/src/bits/defs.hfa

libcfa/src/concurrency/io.cfa

libcfa/src/concurrency/io/call.cfa.in

libcfa/src/concurrency/io/setup.cfa

libcfa/src/concurrency/io/types.hfa

libcfa/src/concurrency/iofwd.hfa

libcfa/src/concurrency/kernel.cfa

libcfa/src/concurrency/kernel.hfa

libcfa/src/concurrency/kernel/startup.cfa

libcfa/src/concurrency/kernel_private.hfa

libcfa/src/concurrency/preemption.cfa

libcfa/src/concurrency/stats.cfa

libcfa/src/concurrency/stats.hfa

tests/Makefile.am

Download in other formats: