Diff [6dba87551bba313b262acd283b1152e2a09904fe:95789be69da1e578ec7ea697904a12ffc63b0263] for / – Cforall

benchmark/io/readv.cfa

-              r6dba8755
+              r95789be
+}
+#include <errno.h>
 #include <unistd.h>
 #include <clock.hfa>
+#include <iofwd.hfa>
 #include <kernel.hfa>
 #include <thread.hfa>
 …
 extern bool traceHeapOn();
-extern ssize_t cfa_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags);
-extern ssize_t cfa_preadv2_fixed(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags);
-extern void register_fixed_files( cluster &, int *, unsigned count );
 int fd;
 …
 volatile size_t count = 0;
 unsigned long int buflen = 50;
+unsigned long int buflen = 512;
 bool fixed_file = false;
 …
 int do_read(int fd, struct iovec * iov) {
+        // extern ssize_t cfa_preadv2(int, const struct iovec *, int, off_t, int, int = 0, Duration = -1`s, io_cancellation * = 0p, io_context * = 0p);
+        int sflags = 0;
         if(fixed_file) {
                 return cfa_preadv2_fixed(fd, iov, 1, 0, 0);
+                sflags |= CFA_IO_FIXED_FD1;
+        }
+        else {
+                return cfa_preadv2(fd, iov, 1, 0, 0);
+        }
+        return cfa_preadv2(fd, iov, 1, 0, 0, sflags, -1`s, 0p, 0p);
+}
 …
         /* paranoid */ assert( true == __atomic_load_n(&run, __ATOMIC_RELAXED) );
         char data[buflen];
+        __attribute__((aligned(512)))  char data[buflen];
         struct iovec iov = { data, buflen };
         while(__atomic_load_n(&run, __ATOMIC_RELAXED)) {
                 int r = do_read(fd, &iov);
                 if(r < 0) abort("%s\n", strerror(-r));
+                if(r < 0) abort("%s\n", strerror(errno));
                 __atomic_fetch_add( &count, 1, __ATOMIC_SEQ_CST );
 …
 int main(int argc, char * argv[]) {
         BENCH_DECL
+        unsigned flags = 0;
+        unsigned num_io = 1;
+        io_context_params params;
         int file_flags = 0;
         unsigned sublen = 16;
 …
                         BENCH_OPT_LONG
                         {"bufsize",       required_argument, 0, 'b'},
-                        {"userthread",    no_argument      , 0, 'u'},
                         {"submitthread",  no_argument      , 0, 's'},
                         {"eagersubmit",   no_argument      , 0, 'e'},
                         {"kpollsubmit",   no_argument      , 0, 'k'},
                         {"kpollcomplete", no_argument      , 0, 'i'},
+                        {"fixed-files",   no_argument      , 0, 'f'},
+                        {"open-direct",   no_argument      , 0, 'o'},
                         {"submitlength",  required_argument, 0, 'l'},
                         {0, 0, 0, 0}
 …
                 int idx = 0;
                 int opt = getopt_long(argc, argv, BENCH_OPT_SHORT "b:usekil:", options, &idx);
+                int opt = getopt_long(argc, argv, BENCH_OPT_SHORT "b:sekil:", options, &idx);
                 const char * arg = optarg ? optarg : "";
 …
+                                }
                                 break;
-                        case 'u':
-                                flags |= CFA_CLUSTER_IO_POLLER_USER_THREAD;
-                                break;
                         case 's':
                                 flags |= CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS;
+                                params.poller_submits = true;
                                 break;
                         case 'e':
                                 flags |= CFA_CLUSTER_IO_EAGER_SUBMITS;
+                                params.eager_submits = true;
                                 break;
                         case 'k':
+                                flags |= CFA_CLUSTER_IO_KERNEL_POLL_SUBMITS;
+                                params.poll_submit = true;
+                        case 'f':
                                 fixed_file = true;
                                 break;
                         case 'i':
+                                flags |= CFA_CLUSTER_IO_KERNEL_POLL_COMPLETES;
+                                params.poll_complete = true;
+                        case 'o':
                                 file_flags |= O_DIRECT;
                                 break;
 …
                                         goto usage;
+                                }
                                 flags |= (sublen << CFA_CLUSTER_IO_BUFFLEN_OFFSET);
+                                // flags |= (sublen << CFA_CLUSTER_IO_BUFFLEN_OFFSET);
                                 break;
                         default: /* ? */
 …
+        {
                 Time start, end;
                 BenchCluster cl = { flags, CFA_STATS_READY_Q | CFA_STATS_IO };
+                BenchCluster cl = { num_io, params, CFA_STATS_READY_Q | CFA_STATS_IO };
                 if(fixed_file) {
 …
                                 printf("\nDone\n");
+                        }
+                        printf("Readers closed\n");
+                }
                 printf("Took %'ld ms\n", (end - start)`ms);

libcfa/configure

-              r6dba8755
+              r95789be
 CFA_PREFIX
 CFA_NAME
+AM_T
 BUILDLIB_FALSE
 BUILDLIB_TRUE
 …
   BUILDLIB_FALSE=
 fi
+AM_T='$(T)'
 …
 for ac_header in linux/io_uring.h
 do :
 …
+fi
+        # check support for various io_uring flags
+                ac_fn_c_check_decl "$LINENO" "IOSQE_FIXED_FILE" "ac_cv_have_decl_IOSQE_FIXED_FILE" "#include <linux/io_uring.h>
+"
+if test "x$ac_cv_have_decl_IOSQE_FIXED_FILE" = xyes; then :
+  $as_echo "#define CFA_HAVE_IOSQE_FIXED_FILE 1" >>confdefs.h
+fi
+                ac_fn_c_check_decl "$LINENO" "IOSQE_IO_DRAIN" "ac_cv_have_decl_IOSQE_IO_DRAIN" "#include <linux/io_uring.h>
+"
+if test "x$ac_cv_have_decl_IOSQE_IO_DRAIN" = xyes; then :
+  $as_echo "#define CFA_HAVE_IOSQE_IO_DRAIN 1" >>confdefs.h
+fi
+                ac_fn_c_check_decl "$LINENO" "IOSQE_ASYNC" "ac_cv_have_decl_IOSQE_ASYNC" "#include <linux/io_uring.h>
+"
+if test "x$ac_cv_have_decl_IOSQE_ASYNC" = xyes; then :
+  $as_echo "#define CFA_HAVE_IOSQE_ASYNC 1" >>confdefs.h
+fi
+                ac_fn_c_check_decl "$LINENO" "IOSQE_IO_LINK" "ac_cv_have_decl_IOSQE_IO_LINK" "#include <linux/io_uring.h>
+"
+if test "x$ac_cv_have_decl_IOSQE_IO_LINK" = xyes; then :
+  $as_echo "#define CFA_HAVE_IOSQE_IO_LINK 1" >>confdefs.h
+fi
+                ac_fn_c_check_decl "$LINENO" "IOSQE_IO_HARDLINK" "ac_cv_have_decl_IOSQE_IO_HARDLINK" "#include <linux/io_uring.h>
+"
+if test "x$ac_cv_have_decl_IOSQE_IO_HARDLINK" = xyes; then :
+  $as_echo "#define CFA_HAVE_IOSQE_IO_HARDLINK 1" >>confdefs.h
+fi
+                ac_fn_c_check_decl "$LINENO" "SPLICE_F_FD_IN_FIXED" "ac_cv_have_decl_SPLICE_F_FD_IN_FIXED" "#include <linux/io_uring.h>
+"
+if test "x$ac_cv_have_decl_SPLICE_F_FD_IN_FIXED" = xyes; then :
+  $as_echo "#define CFA_HAVE_SPLICE_F_FD_IN_FIXED 1" >>confdefs.h
 fi
 …
 #! $SHELL
 # Generated automatically by $as_me ($PACKAGE) $VERSION
-# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`:
 # NOTE: Changes made to this file will be lost: look at ltmain.sh.

libcfa/configure.ac

-              r6dba8755
+              r95789be
 AM_CONDITIONAL([BUILDLIB], [test "x${CONFIG_BUILDLIB}" = "xyes"])
+AM_T='$(T)'
+AC_SUBST(AM_T)
 #==============================================================================
 #Trasforming cc1 will break compilation
 …
 #io_uring 5.6 and later uses probes
 define(ioring_ops, [IORING_OP_NOP,IORING_OP_READV,IORING_OP_WRITEV,IORING_OP_FSYNC,IORING_OP_READ_FIXED,IORING_OP_WRITE_FIXED,IORING_OP_POLL_ADD,IORING_OP_POLL_REMOVE,IORING_OP_SYNC_FILE_RANGE,IORING_OP_SENDMSG,IORING_OP_RECVMSG,IORING_OP_TIMEOUT,IORING_OP_TIMEOUT_REMOVE,IORING_OP_ACCEPT,IORING_OP_ASYNC_CANCEL,IORING_OP_LINK_TIMEOUT,IORING_OP_CONNECT,IORING_OP_FALLOCATE,IORING_OP_OPENAT,IORING_OP_CLOSE,IORING_OP_FILES_UPDATE,IORING_OP_STATX,IORING_OP_READ,IORING_OP_WRITE,IORING_OP_FADVISE,IORING_OP_MADVISE,IORING_OP_SEND,IORING_OP_RECV,IORING_OP_OPENAT2,IORING_OP_EPOLL_CTL,IORING_OP_SPLICE,IORING_OP_PROVIDE_BUFFERS,IORING_OP_REMOVE_BUFFER])
+define(ioring_flags, [IOSQE_FIXED_FILE,IOSQE_IO_DRAIN,IOSQE_ASYNC,IOSQE_IO_LINK,IOSQE_IO_HARDLINK,SPLICE_F_FD_IN_FIXED])
 define(ioring_from_decls, [
 …
                 ioring_from_decls
         ])
+        # check support for various io_uring flags
+        m4_foreach([op], [ioring_flags], [
+                AC_CHECK_DECL(op, [AC_DEFINE([CFA_HAVE_]op)], [], [[#include <linux/io_uring.h>]])
+        ])
 ])
 AC_CHECK_FUNCS([preadv2 pwritev2])

libcfa/prelude/defines.hfa.in

-              r6dba8755
+              r95789be
 #undef CFA_HAVE_IORING_OP_REMOVE_BUFFER
+#undef CFA_HAVE_IOSQE_FIXED_FILE
+#undef CFA_HAVE_IOSQE_IO_DRAIN
+#undef CFA_HAVE_IOSQE_ASYNC
+#undef CFA_HAVE_IOSQE_IO_LINK
+#undef CFA_HAVE_IOSQE_IO_HARDLINK
+#undef CFA_HAVE_SPLICE_F_FD_IN_FIXED
 #undef HAVE_PREADV2
 #undef HAVE_PWRITEV2

libcfa/src/Makefile.am

-              r6dba8755
+              r95789be
 # not all platforms support concurrency, add option do disable it
 thread_headers_nosrc = concurrency/invoke.h
+thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa concurrency/monitor.hfa concurrency/mutex.hfa
+thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/io.cfa concurrency/iocall.cfa concurrency/preemption.cfa concurrency/ready_queue.cfa concurrency/stats.cfa ${thread_headers:.hfa=.cfa}
+thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa \
+                concurrency/monitor.hfa concurrency/mutex.hfa
+thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa \
+                concurrency/invoke.c concurrency/io.cfa concurrency/iocall.cfa \
+                concurrency/io/setup.cfa \
+                concurrency/kernel/startup.cfa concurrency/preemption.cfa \
+                concurrency/ready_queue.cfa concurrency/stats.cfa \
+                ${thread_headers:.hfa=.cfa}
 else
 headers =

libcfa/src/bits/debug.hfa

r6dba8755	r95789be
15	15
16	16	#pragma once
	17
	18	#include <assert.h>
17	19
18	20	#ifdef __CFA_DEBUG__

libcfa/src/bits/defs.hfa

-              r6dba8755
+              r95789be
 #pragma once
-#include <stdbool.h>
-#include <stddef.h>
 #include <stdint.h>
 …
     return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
+}
-// #define __CFA_NO_BIT_TEST_AND_SET__
-#if defined( __i386 )
-static inline bool __atomic_bts(volatile unsigned long int * target, unsigned long int bit ) {
-        #if defined(__CFA_NO_BIT_TEST_AND_SET__)
-        unsigned long int mask = 1ul << bit;
-        unsigned long int ret = __atomic_fetch_or(target, mask, (int)__ATOMIC_RELAXED);
-        return (ret & mask) != 0;
-    #else
-        int result = 0;
-        asm volatile(
-            "LOCK btsl %[bit], %[target]\n\t"
-            : "=@ccc" (result)
-            : [target] "m" (*target), [bit] "r" (bit)
-        );
-        return result != 0;
-    #endif
+}
-static inline bool __atomic_btr(volatile unsigned long int * target, unsigned long int bit ) {
-        #if defined(__CFA_NO_BIT_TEST_AND_SET__)
-        unsigned long int mask = 1ul << bit;
-        unsigned long int ret = __atomic_fetch_and(target, ~mask, (int)__ATOMIC_RELAXED);
-        return (ret & mask) != 0;
-        #else
-        int result = 0;
-        asm volatile(
-            "LOCK btrl %[bit], %[target]\n\t"
-            :"=@ccc" (result)
-            : [target] "m" (*target), [bit] "r" (bit)
-        );
-        return result != 0;
-    #endif
+}
-#elif defined( __x86_64 )
-static inline bool __atomic_bts(volatile unsigned long long int * target, unsigned long long int bit ) {
-        #if defined(__CFA_NO_BIT_TEST_AND_SET__)
-        unsigned long long int mask = 1ul << bit;
-        unsigned long long int ret = __atomic_fetch_or(target, mask, (int)__ATOMIC_RELAXED);
-        return (ret & mask) != 0;
-    #else
-        int result = 0;
-        asm volatile(
-            "LOCK btsq %[bit], %[target]\n\t"
-            : "=@ccc" (result)
-            : [target] "m" (*target), [bit] "r" (bit)
-        );
-        return result != 0;
-    #endif
+}
-static inline bool __atomic_btr(volatile unsigned long long int * target, unsigned long long int bit ) {
-        #if defined(__CFA_NO_BIT_TEST_AND_SET__)
-        unsigned long long int mask = 1ul << bit;
-        unsigned long long int ret = __atomic_fetch_and(target, ~mask, (int)__ATOMIC_RELAXED);
-        return (ret & mask) != 0;
-        #else
-        int result = 0;
-        asm volatile(
-            "LOCK btrq %[bit], %[target]\n\t"
-            :"=@ccc" (result)
-            : [target] "m" (*target), [bit] "r" (bit)
-        );
-        return result != 0;
-    #endif
+}
-#elif defined( __ARM_ARCH )
-    #error __atomic_bts and __atomic_btr not implemented for arm
-#else
-        #error uknown hardware architecture
-#endif

libcfa/src/bits/locks.hfa

-              r6dba8755
+              r95789be
         #undef CHECKED
+        struct $thread;
+        extern void park( __cfaabi_dbg_ctx_param );
+        extern void unpark( struct $thread * this __cfaabi_dbg_ctx_param2 );
+        static inline struct $thread * active_thread ();
+        // Semaphore which only supports a single thread
+        struct single_sem {
+                struct $thread * volatile ptr;
+        };
+        static inline {
+                void  ?{}(single_sem & this) {
+                        this.ptr = 0p;
+                }
+                void ^?{}(single_sem & this) {}
+                bool wait(single_sem & this) {
+                        for() {
+                                struct $thread * expected = this.ptr;
+                                if(expected == 1p) {
+                                        if(__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
+                                                return false;
+                                        }
+                                }
+                                else {
+                                        /* paranoid */ verify( expected == 0p );
+                                        if(__atomic_compare_exchange_n(&this.ptr, &expected, active_thread(), false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
+                                                park( __cfaabi_dbg_ctx );
+                                                return true;
+                                        }
+                                }
+                        }
+                }
+                bool post(single_sem & this) {
+                        for() {
+                                struct $thread * expected = this.ptr;
+                                if(expected == 1p) return false;
+                                if(expected == 0p) {
+                                        if(__atomic_compare_exchange_n(&this.ptr, &expected, 1p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
+                                                return false;
+                                        }
+                                }
+                                else {
+                                        if(__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
+                                                unpark( expected __cfaabi_dbg_ctx2 );
+                                                return true;
+                                        }
+                                }
+                        }
+                }
+        }
 #endif

libcfa/src/concurrency/alarm.cfa

r6dba8755	r95789be
23	23
24	24	#include "alarm.hfa"
25		#include "kernel~~_private~~.hfa"
	25	#include "kernel/fwd.hfa"
26	26	#include "preemption.hfa"
27	27

libcfa/src/concurrency/invoke.h

-              r6dba8755
+              r95789be
 #include "bits/defs.hfa"
 #include "bits/locks.hfa"
+#include "kernel/fwd.hfa"
 #ifdef __cforall
 …
 #ifndef _INVOKE_H_
 #define _INVOKE_H_
-#ifdef __ARM_ARCH
-        // function prototypes are only really used by these macros on ARM
-        void disable_global_interrupts();
-        void enable_global_interrupts();
-        #define TL_GET( member ) ( { __typeof__( kernelTLS.member ) target; \
-                disable_global_interrupts(); \
-                target = kernelTLS.member; \
-                enable_global_interrupts(); \
-                target; } )
-        #define TL_SET( member, value ) disable_global_interrupts(); \
-                kernelTLS.member = value; \
-                enable_global_interrupts();
-#else
-        #define TL_GET( member ) kernelTLS.member
-        #define TL_SET( member, value ) kernelTLS.member = value;
-#endif
-        #ifdef __cforall
-        extern "Cforall" {
-                extern __attribute__((aligned(128))) thread_local struct KernelThreadData {
-                        struct $thread    * volatile this_thread;
-                        struct processor  * volatile this_processor;
-                        struct __stats_t  * volatile this_stats;
-                        struct {
-                                volatile unsigned short disable_count;
-                                volatile bool enabled;
-                                volatile bool in_progress;
-                        } preemption_state;
-                        #if defined(__SIZEOF_INT128__)
-                                __uint128_t rand_seed;
-                        #else
-                                uint64_t rand_seed;
-                        #endif
-                } kernelTLS __attribute__ ((tls_model ( "initial-exec" )));
+        }
-        #endif
         struct __stack_context_t {
 …
         enum __Coroutine_State { Halted, Start, Primed, Blocked, Ready, Active };
-        enum __Preemption_Reason { __NO_PREEMPTION, __ALARM_PREEMPTION, __POLL_PREEMPTION, __MANUAL_PREEMPTION };
         struct $coroutine {

libcfa/src/concurrency/io.cfa

-              r6dba8755
+              r95789be
 //
+#define __cforall_thread__
 #if defined(__CFA_DEBUG__)
         // #define __CFA_DEBUG_PRINT_IO__
 …
 #endif
+#include "kernel.hfa"
+#include "bitmanip.hfa"
+#if !defined(CFA_HAVE_LINUX_IO_URING_H)
+        void __kernel_io_startup( cluster &, unsigned, bool ) {
+                // Nothing to do without io_uring
+        }
+        void __kernel_io_finish_start( cluster & ) {
+                // Nothing to do without io_uring
+        }
+        void __kernel_io_prepare_stop( cluster & ) {
+                // Nothing to do without io_uring
+        }
+        void __kernel_io_shutdown( cluster &, bool ) {
+                // Nothing to do without io_uring
+        }
+#else
+#if defined(CFA_HAVE_LINUX_IO_URING_H)
         #define _GNU_SOURCE         /* See feature_test_macros(7) */
         #include <errno.h>
+        #include <signal.h>
         #include <stdint.h>
         #include <string.h>
         #include <unistd.h>
-        #include <sys/mman.h>
         extern "C" {
+                #include <sys/epoll.h>
                 #include <sys/syscall.h>
 …
+        }
+        #include "bits/signal.hfa"
+        #include "kernel_private.hfa"
+        #include "thread.hfa"
+        uint32_t entries_per_cluster() {
+                return 256;
+        }
+        static void * __io_poller_slow( void * arg );
+        // Weirdly, some systems that do support io_uring don't actually define these
+        #ifdef __alpha__
+                /*
+                * alpha is the only exception, all other architectures
+                * have common numbers for new system calls.
+                */
+                #ifndef __NR_io_uring_setup
+                        #define __NR_io_uring_setup           535
+                #endif
+                #ifndef __NR_io_uring_enter
+                        #define __NR_io_uring_enter           536
+                #endif
+                #ifndef __NR_io_uring_register
+                        #define __NR_io_uring_register        537
+                #endif
+        #else /* !__alpha__ */
+                #ifndef __NR_io_uring_setup
+                        #define __NR_io_uring_setup           425
+                #endif
+                #ifndef __NR_io_uring_enter
+                        #define __NR_io_uring_enter           426
+                #endif
+                #ifndef __NR_io_uring_register
+                        #define __NR_io_uring_register        427
+                #endif
+        #endif
+        // Fast poller user-thread
+        // Not using the "thread" keyword because we want to control
+        // more carefully when to start/stop it
+        struct __io_poller_fast {
+                struct __io_data * ring;
+                $thread thrd;
+        };
+        void ?{}( __io_poller_fast & this, struct cluster & cltr ) {
+                this.ring = cltr.io;
+                (this.thrd){ "Fast I/O Poller", cltr };
+        }
+        void ^?{}( __io_poller_fast & mutex this );
+        void main( __io_poller_fast & this );
+        static inline $thread * get_thread( __io_poller_fast & this ) { return &this.thrd; }
+        void ^?{}( __io_poller_fast & mutex this ) {}
+        struct __submition_data {
+                // Head and tail of the ring (associated with array)
+                volatile uint32_t * head;
+                volatile uint32_t * tail;
+                volatile uint32_t prev_head;
+                // The actual kernel ring which uses head/tail
+                // indexes into the sqes arrays
+                uint32_t * array;
+                // number of entries and mask to go with it
+                const uint32_t * num;
+                const uint32_t * mask;
+                // Submission flags (Not sure what for)
+                uint32_t * flags;
+                // number of sqes not submitted (whatever that means)
+                uint32_t * dropped;
+                // Like head/tail but not seen by the kernel
+                volatile uint32_t * ready;
+                uint32_t ready_cnt;
+                __spinlock_t lock;
+                __spinlock_t release_lock;
+                // A buffer of sqes (not the actual ring)
+                struct io_uring_sqe * sqes;
+                // The location and size of the mmaped area
+                void * ring_ptr;
+                size_t ring_sz;
+        };
+        struct __completion_data {
+                // Head and tail of the ring
+                volatile uint32_t * head;
+                volatile uint32_t * tail;
+                // number of entries and mask to go with it
+                const uint32_t * mask;
+                const uint32_t * num;
+                // number of cqes not submitted (whatever that means)
+                uint32_t * overflow;
+                // the kernel ring
+                struct io_uring_cqe * cqes;
+                // The location and size of the mmaped area
+                void * ring_ptr;
+                size_t ring_sz;
+        };
+        struct __io_data {
+                struct __submition_data submit_q;
+                struct __completion_data completion_q;
+                uint32_t ring_flags;
+                int cltr_flags;
+                int fd;
+                semaphore submit;
+                volatile bool done;
+                struct {
+                        struct {
+                                __processor_id_t id;
+                                void * stack;
+                                pthread_t kthrd;
+                                volatile bool blocked;
+                        } slow;
+                        __io_poller_fast fast;
+                        __bin_sem_t sem;
+                } poller;
+        };
+//=============================================================================================
+// I/O Startup / Shutdown logic
+//=============================================================================================
+        void __kernel_io_startup( cluster & this, unsigned io_flags, bool main_cluster ) {
+                if( (io_flags & CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS) && (io_flags & CFA_CLUSTER_IO_EAGER_SUBMITS) ) {
+                        abort("CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS and CFA_CLUSTER_IO_EAGER_SUBMITS cannot be mixed\n");
+                }
+                this.io = malloc();
+                // Step 1 : call to setup
+                struct io_uring_params params;
+                memset(&params, 0, sizeof(params));
+                if( io_flags & CFA_CLUSTER_IO_KERNEL_POLL_SUBMITS   ) params.flags |= IORING_SETUP_SQPOLL;
+                if( io_flags & CFA_CLUSTER_IO_KERNEL_POLL_COMPLETES ) params.flags |= IORING_SETUP_IOPOLL;
+                uint32_t nentries = entries_per_cluster();
+                int fd = syscall(__NR_io_uring_setup, nentries, &params );
+                if(fd < 0) {
+                        abort("KERNEL ERROR: IO_URING SETUP - %s\n", strerror(errno));
+                }
+                // Step 2 : mmap result
+                memset( this.io, 0, sizeof(struct __io_data) );
+                struct __submition_data  & sq = this.io->submit_q;
+                struct __completion_data & cq = this.io->completion_q;
+                // calculate the right ring size
+                sq.ring_sz = params.sq_off.array + (params.sq_entries * sizeof(unsigned)           );
+                cq.ring_sz = params.cq_off.cqes  + (params.cq_entries * sizeof(struct io_uring_cqe));
+                // Requires features
+                #if defined(IORING_FEAT_SINGLE_MMAP)
+                        // adjust the size according to the parameters
+                        if ((params.features & IORING_FEAT_SINGLE_MMAP) != 0) {
+                                cq.ring_sz = sq.ring_sz = max(cq.ring_sz, sq.ring_sz);
+                        }
+                #endif
+                // mmap the Submit Queue into existence
+                sq.ring_ptr = mmap(0, sq.ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING);
+                if (sq.ring_ptr == (void*)MAP_FAILED) {
+                        abort("KERNEL ERROR: IO_URING MMAP1 - %s\n", strerror(errno));
+                }
+                // Requires features
+                #if defined(IORING_FEAT_SINGLE_MMAP)
+                        // mmap the Completion Queue into existence (may or may not be needed)
+                        if ((params.features & IORING_FEAT_SINGLE_MMAP) != 0) {
+                                cq.ring_ptr = sq.ring_ptr;
+                        }
+                        else
+                #endif
+                {
+                        // We need multiple call to MMAP
+                        cq.ring_ptr = mmap(0, cq.ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING);
+                        if (cq.ring_ptr == (void*)MAP_FAILED) {
+                                munmap(sq.ring_ptr, sq.ring_sz);
+                                abort("KERNEL ERROR: IO_URING MMAP2 - %s\n", strerror(errno));
+                        }
+                }
+                // mmap the submit queue entries
+                size_t size = params.sq_entries * sizeof(struct io_uring_sqe);
+                sq.sqes = (struct io_uring_sqe *)mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES);
+                if (sq.sqes == (struct io_uring_sqe *)MAP_FAILED) {
+                        munmap(sq.ring_ptr, sq.ring_sz);
+                        if (cq.ring_ptr != sq.ring_ptr) munmap(cq.ring_ptr, cq.ring_sz);
+                        abort("KERNEL ERROR: IO_URING MMAP3 - %s\n", strerror(errno));
+                }
+                // Get the pointers from the kernel to fill the structure
+                // submit queue
+                sq.head    = (volatile uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.head);
+                sq.tail    = (volatile uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.tail);
+                sq.mask    = (   const uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_mask);
+                sq.num     = (   const uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_entries);
+                sq.flags   = (         uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.flags);
+                sq.dropped = (         uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.dropped);
+                sq.array   = (         uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.array);
+                sq.prev_head = *sq.head;
+                {
+                        const uint32_t num = *sq.num;
+                        for( i; num ) {
+                                sq.sqes[i].user_data = 0ul64;
+                        }
+                }
+                (sq.lock){};
+                (sq.release_lock){};
+                if( io_flags & ( CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS | CFA_CLUSTER_IO_EAGER_SUBMITS ) ) {
+                        /* paranoid */ verify( is_pow2( io_flags >> CFA_CLUSTER_IO_BUFFLEN_OFFSET ) || ((io_flags >> CFA_CLUSTER_IO_BUFFLEN_OFFSET) < 8)  );
+                        sq.ready_cnt = max(io_flags >> CFA_CLUSTER_IO_BUFFLEN_OFFSET, 8);
+                        sq.ready = alloc_align( 64, sq.ready_cnt );
+                        for(i; sq.ready_cnt) {
+                                sq.ready[i] = -1ul32;
+                        }
+                }
+                else {
+                        sq.ready_cnt = 0;
+                        sq.ready = 0p;
+                }
+                // completion queue
+                cq.head     = (volatile uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.head);
+                cq.tail     = (volatile uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.tail);
+                cq.mask     = (   const uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.ring_mask);
+                cq.num      = (   const uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.ring_entries);
+                cq.overflow = (         uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.overflow);
+                cq.cqes   = (struct io_uring_cqe *)(((intptr_t)cq.ring_ptr) + params.cq_off.cqes);
+                // some paranoid checks
+                /* paranoid */ verifyf( (*cq.mask) == ((*cq.num) - 1ul32), "IO_URING Expected mask to be %u (%u entries), was %u", (*cq.num) - 1ul32, *cq.num, *cq.mask  );
+                /* paranoid */ verifyf( (*cq.num)  >= nentries, "IO_URING Expected %u entries, got %u", nentries, *cq.num );
+                /* paranoid */ verifyf( (*cq.head) == 0, "IO_URING Expected head to be 0, got %u", *cq.head );
+                /* paranoid */ verifyf( (*cq.tail) == 0, "IO_URING Expected tail to be 0, got %u", *cq.tail );
+                /* paranoid */ verifyf( (*sq.mask) == ((*sq.num) - 1ul32), "IO_URING Expected mask to be %u (%u entries), was %u", (*sq.num) - 1ul32, *sq.num, *sq.mask );
+                /* paranoid */ verifyf( (*sq.num) >= nentries, "IO_URING Expected %u entries, got %u", nentries, *sq.num );
+                /* paranoid */ verifyf( (*sq.head) == 0, "IO_URING Expected head to be 0, got %u", *sq.head );
+                /* paranoid */ verifyf( (*sq.tail) == 0, "IO_URING Expected tail to be 0, got %u", *sq.tail );
+                // Update the global ring info
+                this.io->ring_flags = params.flags;
+                this.io->cltr_flags = io_flags;
+                this.io->fd         = fd;
+                this.io->done       = false;
+                (this.io->submit){ min(*sq.num, *cq.num) };
+                if(!main_cluster) {
+                        __kernel_io_finish_start( this );
+                }
+        }
+        void __kernel_io_finish_start( cluster & this ) {
+                if( this.io->cltr_flags & CFA_CLUSTER_IO_POLLER_USER_THREAD ) {
+                        __cfadbg_print_safe(io_core, "Kernel I/O : Creating fast poller for cluter %p\n", &this);
+                        (this.io->poller.fast){ this };
+                        __thrd_start( this.io->poller.fast, main );
+                }
+                // Create the poller thread
+                __cfadbg_print_safe(io_core, "Kernel I/O : Creating slow poller for cluster %p\n", &this);
+                this.io->poller.slow.blocked = false;
+                this.io->poller.slow.stack = __create_pthread( &this.io->poller.slow.kthrd, __io_poller_slow, &this );
+        }
+        void __kernel_io_prepare_stop( cluster & this ) {
+                __cfadbg_print_safe(io_core, "Kernel I/O : Stopping pollers for cluster\n", &this);
+                // Notify the poller thread of the shutdown
+                __atomic_store_n(&this.io->done, true, __ATOMIC_SEQ_CST);
+                // Stop the IO Poller
+                sigval val = { 1 };
+                pthread_sigqueue( this.io->poller.slow.kthrd, SIGUSR1, val );
+                post( this.io->poller.sem );
+                // Wait for the poller thread to finish
+                pthread_join( this.io->poller.slow.kthrd, 0p );
+                free( this.io->poller.slow.stack );
+                __cfadbg_print_safe(io_core, "Kernel I/O : Slow poller stopped for cluster\n", &this);
+                if( this.io->cltr_flags & CFA_CLUSTER_IO_POLLER_USER_THREAD ) {
+                        with( this.io->poller.fast ) {
+                                /* paranoid */ verify( this.nprocessors == 0 || &this == mainCluster );
+                                /* paranoid */ verify( !ready_mutate_islocked() );
+                                // We need to adjust the clean-up based on where the thread is
+                                if( thrd.state == Ready || thrd.preempted != __NO_PREEMPTION ) {
+                                        ready_schedule_lock( (struct __processor_id_t *)active_processor() );
+                                                // This is the tricky case
+                                                // The thread was preempted and now it is on the ready queue
+                                                // The thread should be the last on the list
+                                                /* paranoid */ verify( thrd.link.next != 0p );
+                                                // Remove the thread from the ready queue of this cluster
+                                                __attribute__((unused)) bool removed = remove_head( &this, &thrd );
+                                                /* paranoid */ verify( removed );
+                                                thrd.link.next = 0p;
+                                                thrd.link.prev = 0p;
+                                                __cfaabi_dbg_debug_do( thrd.unpark_stale = true );
+                                                // Fixup the thread state
+                                                thrd.state = Blocked;
+                                                thrd.ticket = 0;
+                                                thrd.preempted = __NO_PREEMPTION;
+                                        ready_schedule_unlock( (struct __processor_id_t *)active_processor() );
+                                        // Pretend like the thread was blocked all along
+                                }
+                                // !!! This is not an else if !!!
+                                if( thrd.state == Blocked ) {
+                                        // This is the "easy case"
+                                        // The thread is parked and can easily be moved to active cluster
+                                        verify( thrd.curr_cluster != active_cluster() || thrd.curr_cluster == mainCluster );
+                                        thrd.curr_cluster = active_cluster();
+                                        // unpark the fast io_poller
+                                        unpark( &thrd __cfaabi_dbg_ctx2 );
+                                }
+                                else {
+                                        // The thread is in a weird state
+                                        // I don't know what to do here
+                                        abort("Fast poller thread is in unexpected state, cannot clean-up correctly\n");
+                                }
+                        }
+                        ^(this.io->poller.fast){};
+                        __cfadbg_print_safe(io_core, "Kernel I/O : Fast poller stopped for cluster\n", &this);
+                }
+        }
+        void __kernel_io_shutdown( cluster & this, bool main_cluster ) {
+                if(!main_cluster) {
+                        __kernel_io_prepare_stop( this );
+                }
+                // Shutdown the io rings
+                struct __submition_data  & sq = this.io->submit_q;
+                struct __completion_data & cq = this.io->completion_q;
+                // unmap the submit queue entries
+                munmap(sq.sqes, (*sq.num) * sizeof(struct io_uring_sqe));
+                // unmap the Submit Queue ring
+                munmap(sq.ring_ptr, sq.ring_sz);
+                // unmap the Completion Queue ring, if it is different
+                if (cq.ring_ptr != sq.ring_ptr) {
+                        munmap(cq.ring_ptr, cq.ring_sz);
+                }
+                // close the file descriptor
+                close(this.io->fd);
+                free( this.io->submit_q.ready ); // Maybe null, doesn't matter
+                free( this.io );
+        }
+        int __io_uring_enter( struct __io_data & ring, unsigned to_submit, bool get, sigset_t * mask ) {
+        #include "stats.hfa"
+        #include "kernel.hfa"
+        #include "kernel/fwd.hfa"
+        #include "io/types.hfa"
+//=============================================================================================
+// I/O Syscall
+//=============================================================================================
+        static int __io_uring_enter( struct __io_data & ring, unsigned to_submit, bool get ) {
                 bool need_sys_to_submit = false;
                 bool need_sys_to_complete = false;
-                unsigned min_complete = 0;
                 unsigned flags = 0;
                 TO_SUBMIT:
 …
+                }
-                TO_COMPLETE:
                 if( get && !(ring.ring_flags & IORING_SETUP_SQPOLL) ) {
                         flags |= IORING_ENTER_GETEVENTS;
-                        if( mask ) {
-                                need_sys_to_complete = true;
-                                min_complete = 1;
-                                break TO_COMPLETE;
+                        }
                         if( (ring.ring_flags & IORING_SETUP_IOPOLL) ) {
                                 need_sys_to_complete = true;
 …
                 int ret = 0;
                 if( need_sys_to_submit || need_sys_to_complete ) {
                         ret = syscall( __NR_io_uring_enter, ring.fd, to_submit, min_complete, flags, mask, _NSIG / 8);
+                        ret = syscall( __NR_io_uring_enter, ring.fd, to_submit, 0, flags, 0p, _NSIG / 8);
                         if( ret < 0 ) {
                                 switch((int)errno) {
 …
         static uint32_t __release_consumed_submission( struct __io_data & ring );
         static inline void process(struct io_uring_cqe & cqe, struct __processor_id_t * id ) {
+        static inline void process(struct io_uring_cqe & cqe ) {
                 struct __io_user_data_t * data = (struct __io_user_data_t *)(uintptr_t)cqe.user_data;
                 __cfadbg_print_safe( io, "Kernel I/O : Syscall completed : cqe %p, result %d for %p\n", data, cqe.res, data->thrd );
                 data->result = cqe.res;
+                if(!id) { unpark(     data->thrd __cfaabi_dbg_ctx2 ); }
+                else  { __unpark( id, data->thrd __cfaabi_dbg_ctx2 ); }
+                unpark( data->thrd __cfaabi_dbg_ctx2 );
+        }
         // Process a single completion message from the io_uring
         // This is NOT thread-safe
         static [int, bool] __drain_io( & struct __io_data ring, * sigset_t mask ) {
+        static [int, bool] __drain_io( & struct __io_data ring ) {
                 /* paranoid */ verify( !kernelTLS.preemption_state.enabled );
                 unsigned to_submit = 0;
                 if( ring.cltr_flags & CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS ) {
+                if( ring.poller_submits ) {
                         // If the poller thread also submits, then we need to aggregate the submissions which are ready
                         to_submit = __collect_submitions( ring );
+                }
                 int ret = __io_uring_enter(ring, to_submit, true, mask);
+                int ret = __io_uring_enter(ring, to_submit, true);
                 if( ret < 0 ) {
                         return [0, true];
 …
                         /* paranoid */ verify(&cqe);
+                        process( cqe, !mask ? (struct __processor_id_t *)0p : &ring.poller.slow.id );
+                }
+                // Allow new submissions to happen
+                // V(ring.submit, count);
+                        process( cqe );
+                }
                 // Mark to the kernel that the cqe has been seen
 …
+        }
+        static void * __io_poller_slow( void * arg ) {
+                #if !defined( __CFA_NO_STATISTICS__ )
+                        __stats_t local_stats;
+                        __init_stats( &local_stats );
+                        kernelTLS.this_stats = &local_stats;
+                #endif
+                cluster * cltr = (cluster *)arg;
+                struct __io_data & ring = *cltr->io;
+                ring.poller.slow.id.id = doregister( &ring.poller.slow.id );
+                sigset_t mask;
+                sigfillset(&mask);
+                if ( pthread_sigmask( SIG_BLOCK, &mask, 0p ) == -1 ) {
+                        abort( "KERNEL ERROR: IO_URING - pthread_sigmask" );
+                }
+                sigdelset( &mask, SIGUSR1 );
+                verify( (*ring.submit_q.head) == (*ring.submit_q.tail) );
+                verify( (*ring.completion_q.head) == (*ring.completion_q.tail) );
+                __cfadbg_print_safe(io_core, "Kernel I/O : Slow poller for ring %p ready\n", &ring);
+                if( ring.cltr_flags & CFA_CLUSTER_IO_POLLER_USER_THREAD ) {
+                        while(!__atomic_load_n(&ring.done, __ATOMIC_SEQ_CST)) {
+                                __atomic_store_n( &ring.poller.slow.blocked, true, __ATOMIC_SEQ_CST );
+                                // In the user-thread approach drain and if anything was drained,
+                                // batton pass to the user-thread
+                                int count;
+                                bool again;
+                                [count, again] = __drain_io( ring, &mask );
+                                __atomic_store_n( &ring.poller.slow.blocked, false, __ATOMIC_SEQ_CST );
+                                // Update statistics
+                                __STATS__( true,
+                                        io.complete_q.completed_avg.val += count;
+                                        io.complete_q.completed_avg.slow_cnt += 1;
+                                )
+                                if(again) {
+                                        __cfadbg_print_safe(io_core, "Kernel I/O : Moving to ring %p to fast poller\n", &ring);
+                                        __unpark( &ring.poller.slow.id, &ring.poller.fast.thrd __cfaabi_dbg_ctx2 );
+                                        wait( ring.poller.sem );
+                                }
+                        }
+                }
+                else {
+                        while(!__atomic_load_n(&ring.done, __ATOMIC_SEQ_CST)) {
+                                //In the naive approach, just poll the io completion queue directly
+                                int count;
+                                bool again;
+                                [count, again] = __drain_io( ring, &mask );
+                                // Update statistics
+                                __STATS__( true,
+                                        io.complete_q.completed_avg.val += count;
+                                        io.complete_q.completed_avg.slow_cnt += 1;
+                                )
+                        }
+                }
+                __cfadbg_print_safe(io_core, "Kernel I/O : Slow poller for ring %p stopping\n", &ring);
+                unregister( &ring.poller.slow.id );
+                #if !defined(__CFA_NO_STATISTICS__)
+                        __tally_stats(cltr->stats, &local_stats);
+                #endif
+                return 0p;
+        }
+        void main( __io_poller_fast & this ) {
+                verify( this.ring->cltr_flags & CFA_CLUSTER_IO_POLLER_USER_THREAD );
+                // Start parked
+                park( __cfaabi_dbg_ctx );
+                __cfadbg_print_safe(io_core, "Kernel I/O : Fast poller for ring %p ready\n", &this.ring);
+        void main( $io_ctx_thread & this ) {
+                epoll_event ev;
+                __ioctx_register( this, ev );
+                __cfadbg_print_safe(io_core, "Kernel I/O : IO poller %p for ring %p ready\n", &this, &this.ring);
                 int reset = 0;
                 // Then loop until we need to start
+                while(!__atomic_load_n(&this.ring->done, __ATOMIC_SEQ_CST)) {
+                while(!__atomic_load_n(&this.done, __ATOMIC_SEQ_CST)) {
                         // Drain the io
                         int count;
                         bool again;
                         disable_interrupts();
                                 [count, again] = __drain_io( *this.ring, 0p );
+                                [count, again] = __drain_io( *this.ring );
                                 if(!again) reset++;
 …
                         // We didn't get anything baton pass to the slow poller
                         else {
                                 __cfadbg_print_safe(io_core, "Kernel I/O : Moving to ring %p to slow poller\n", &this.ring);
+                                __cfadbg_print_safe(io_core, "Kernel I/O : Parking io poller %p\n", &this.self);
                                 reset = 0;
+                                // wake up the slow poller
+                                post( this.ring->poller.sem );
+                                // park this thread
+                                park( __cfaabi_dbg_ctx );
+                                // block this thread
+                                __ioctx_prepare_block( this, ev );
+                                wait( this.sem );
+                        }
+                }
                 __cfadbg_print_safe(io_core, "Kernel I/O : Fast poller for ring %p stopping\n", &this.ring);
+        }
-        static inline void __wake_poller( struct __io_data & ring ) __attribute__((artificial));
-        static inline void __wake_poller( struct __io_data & ring ) {
-                if(!__atomic_load_n( &ring.poller.slow.blocked, __ATOMIC_SEQ_CST)) return;
-                sigval val = { 1 };
-                pthread_sigqueue( ring.poller.slow.kthrd, SIGUSR1, val );
+        }
 …
+        }
+        void __submit( struct __io_data & ring, uint32_t idx ) {
+        void __submit( struct io_context * ctx, uint32_t idx ) __attribute__((nonnull (1))) {
+                __io_data & ring = *ctx->thrd.ring;
                 // Get now the data we definetely need
                 uint32_t * const tail = ring.submit_q.tail;
                 const uint32_t mask = *ring.submit_q.mask;
+                const uint32_t mask  = *ring.submit_q.mask;
                 // There are 2 submission schemes, check which one we are using
                 if( ring.cltr_flags & CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS ) {
+                if( ring.poller_submits ) {
                         // If the poller thread submits, then we just need to add this to the ready array
                         __submit_to_ready_array( ring, idx, mask );
                         __wake_poller( ring );
+                        post( ctx->thrd.sem );
                         __cfadbg_print_safe( io, "Kernel I/O : Added %u to ready for %p\n", idx, active_thread() );
+                }
                 else if( ring.cltr_flags & CFA_CLUSTER_IO_EAGER_SUBMITS ) {
+                else if( ring.eager_submits ) {
                         uint32_t picked = __submit_to_ready_array( ring, idx, mask );
 …
                         // We got the lock
                         unsigned to_submit = __collect_submitions( ring );
                         int ret = __io_uring_enter( ring, to_submit, false, 0p );
+                        int ret = __io_uring_enter( ring, to_submit, false );
                         if( ret < 0 ) {
                                 unlock(ring.submit_q.lock);
 …
                         // Submit however, many entries need to be submitted
                         int ret = __io_uring_enter( ring, 1, false, 0p );
+                        int ret = __io_uring_enter( ring, 1, false );
                         if( ret < 0 ) {
                                 switch((int)errno) {
 …
                 return count;
+        }
-//=============================================================================================
-// I/O Submissions
-//=============================================================================================
-        void register_fixed_files( cluster & cl, int * files, unsigned count ) {
-                int ret = syscall( __NR_io_uring_register, cl.io->fd, IORING_REGISTER_FILES, files, count );
-                if( ret < 0 ) {
-                        abort( "KERNEL ERROR: IO_URING SYSCALL - (%d) %s\n", (int)errno, strerror(errno) );
+                }
-                __cfadbg_print_safe( io_core, "Kernel I/O : Performed io_register for %p, returned %d\n", active_thread(), ret );
+        }
 #endif

libcfa/src/concurrency/iocall.cfa

-              r6dba8755
+              r95789be
 //
+#define __cforall_thread__
 #include "bits/defs.hfa"
 …
 #if defined(CFA_HAVE_LINUX_IO_URING_H)
+        #include <assert.h>
         #include <stdint.h>
+        #include <errno.h>
         #include <linux/io_uring.h>
+        #include "kernel_private.hfa"
+        #include "kernel.hfa"
+        #include "kernel/fwd.hfa"
+        #include "io/types.hfa"
         extern [* struct io_uring_sqe, uint32_t] __submit_alloc( struct __io_data & ring, uint64_t data );
         extern void __submit( struct __io_data & ring, uint32_t idx );
+        extern void __submit( struct io_context * ctx, uint32_t idx ) __attribute__((nonnull (1)));
         static inline void ?{}(struct io_uring_sqe & this, uint8_t opcode, int fd) {
 …
+        }
+        static inline io_context * __get_io_context( void ) {
+                cluster * cltr = active_cluster();
+                /* paranoid */ verifyf( cltr, "No active cluster for io operation\n");
+                assertf( cltr->io.cnt > 0, "Cluster %p has no default io contexts and no context was specified\n", cltr );
+                /* paranoid */ verifyf( cltr->io.ctxs, "default io contexts for cluster %p are missing\n", cltr);
+                return &cltr->io.ctxs[ __tls_rand() % cltr->io.cnt ];
+        }
+      #if defined(CFA_HAVE_IOSQE_FIXED_FILE) && defined(CFA_HAVE_IOSQE_IO_DRAIN) && defined(CFA_HAVE_IOSQE_ASYNC)
+                #define REGULAR_FLAGS (IOSQE_FIXED_FILE | IOSQE_IO_DRAIN | IOSQE_ASYNC)
+        #elif defined(CFA_HAVE_IOSQE_FIXED_FILE) && defined(CFA_HAVE_IOSQE_ASYNC)
+                #define REGULAR_FLAGS (IOSQE_FIXED_FILE | IOSQE_ASYNC)
+      #elif defined(CFA_HAVE_IOSQE_FIXED_FILE) && defined(CFA_HAVE_IOSQE_IO_DRAIN)
+                #define REGULAR_FLAGS (IOSQE_FIXED_FILE | IOSQE_IO_DRAIN)
+      #elif defined(CFA_HAVE_IOSQE_IO_DRAIN) && defined(CFA_HAVE_IOSQE_ASYNC)
+                #define REGULAR_FLAGS (IOSQE_IO_DRAIN | IOSQE_ASYNC)
+        #elif defined(CFA_HAVE_IOSQE_FIXED_FILE)
+                #define REGULAR_FLAGS (IOSQE_FIXED_FILE)
+      #elif defined(CFA_HAVE_IOSQE_IO_DRAIN)
+                #define REGULAR_FLAGS (IOSQE_IO_DRAIN)
+      #elif defined(CFA_HAVE_IOSQE_ASYNC)
+                #define REGULAR_FLAGS (IOSQE_ASYNC)
+        #else
+                #define REGULAR_FLAGS (0)
+        #endif
+        #if defined(CFA_HAVE_IOSQE_IO_LINK) && defined(CFA_HAVE_IOSQE_IO_HARDLINK)
+                #define LINK_FLAGS (IOSQE_IO_LINK | IOSQE_IO_HARDLINK)
+        #elif defined(CFA_HAVE_IOSQE_IO_LINK)
+                #define LINK_FLAGS (IOSQE_IO_LINK)
+        #elif defined(CFA_HAVE_IOSQE_IO_HARDLINK)
+                #define LINK_FLAGS (IOSQE_IO_HARDLINK)
+        #else
+                #define LINK_FLAGS (0)
+        #endif
+        #if defined(CFA_HAVE_SPLICE_F_FD_IN_FIXED)
+                #define SPLICE_FLAGS (SPLICE_F_FD_IN_FIXED)
+        #else
+                #define SPLICE_FLAGS (0)
+        #endif
         #define __submit_prelude \
+                if( 0 != (submit_flags & LINK_FLAGS) ) { errno = ENOTSUP; return -1; } \
+                (void)timeout; (void)cancellation; \
+                if( !context ) context = __get_io_context(); \
                 __io_user_data_t data = { 0, active_thread() }; \
                 struct __io_data & ring = *data.thrd->curr_cluster->io; \
+                struct __io_data & ring = *context->thrd.ring; \
                 struct io_uring_sqe * sqe; \
                 uint32_t idx; \
+                [sqe, idx] = __submit_alloc( ring, (uint64_t)(uintptr_t)&data );
+                [sqe, idx] = __submit_alloc( ring, (uint64_t)(uintptr_t)&data ); \
+                sqe->flags = REGULAR_FLAGS & submit_flags;
         #define __submit_wait \
                 /*__cfaabi_bits_print_safe( STDERR_FILENO, "Preparing user data %p for %p\n", &data, data.thrd );*/ \
                 verify( sqe->user_data == (uint64_t)(uintptr_t)&data ); \
                 __submit( ring, idx ); \
+                __submit( context, idx ); \
                 park( __cfaabi_dbg_ctx ); \
+                if( data.result < 0 ) { \
+                        errno = -data.result; \
+                        return -1; \
+                } \
                 return data.result;
 #endif
 …
 // I/O Forwards
 //=============================================================================================
+#include <time.hfa>
 // Some forward declarations
 …
 // Asynchronous operations
 #if defined(HAVE_PREADV2)
         ssize_t cfa_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags) {
+        ssize_t cfa_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
                 #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_READV)
                         return preadv2(fd, iov, iovcnt, offset, flags);
 …
                 #endif
+        }
+        ssize_t cfa_preadv2_fixed(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags) {
+                #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_READV)
+                        return preadv2(fd, iov, iovcnt, offset, flags);
+#endif
+#if defined(HAVE_PWRITEV2)
+        ssize_t cfa_pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
+                #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_WRITEV)
+                        return pwritev2(fd, iov, iovcnt, offset, flags);
                 #else
                         __submit_prelude
+                        (*sqe){ IORING_OP_READV, fd, iov, iovcnt, offset };
+                        sqe->flags |= IOSQE_FIXED_FILE;
+                        (*sqe){ IORING_OP_WRITEV, fd, iov, iovcnt, offset };
                         __submit_wait
 …
 #endif
+#if defined(HAVE_PWRITEV2)
+        ssize_t cfa_pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags) {
+                #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_WRITEV)
+                        return pwritev2(fd, iov, iovcnt, offset, flags);
+                #else
+                        __submit_prelude
+                        (*sqe){ IORING_OP_WRITEV, fd, iov, iovcnt, offset };
+                        __submit_wait
+                #endif
+        }
+#endif
+int cfa_fsync(int fd) {
+int cfa_fsync(int fd, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
         #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_FSYNC)
                 return fsync(fd);
 …
+}
 int cfa_sync_file_range(int fd, int64_t offset, int64_t nbytes, unsigned int flags) {
+int cfa_sync_file_range(int fd, int64_t offset, int64_t nbytes, unsigned int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
         #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_SYNC_FILE_RANGE)
                 return sync_file_range(fd, offset, nbytes, flags);
 …
 ssize_t cfa_sendmsg(int sockfd, const struct msghdr *msg, int flags) {
+ssize_t cfa_sendmsg(int sockfd, const struct msghdr *msg, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
         #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_SENDMSG)
                 return sendmsg(sockfd, msg, flags);
 …
+}
 ssize_t cfa_recvmsg(int sockfd, struct msghdr *msg, int flags) {
+ssize_t cfa_recvmsg(int sockfd, struct msghdr *msg, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
         #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_RECVMSG)
                 return recvmsg(sockfd, msg, flags);
 …
+}
 ssize_t cfa_send(int sockfd, const void *buf, size_t len, int flags) {
+ssize_t cfa_send(int sockfd, const void *buf, size_t len, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
         #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_SEND)
                 return send( sockfd, buf, len, flags );
 …
+}
 ssize_t cfa_recv(int sockfd, void *buf, size_t len, int flags) {
+ssize_t cfa_recv(int sockfd, void *buf, size_t len, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
         #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_RECV)
                 return recv( sockfd, buf, len, flags );
 …
+}
 int cfa_accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags) {
+int cfa_accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
         #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_ACCEPT)
                 return accept4( sockfd, addr, addrlen, flags );
 …
+}
 int cfa_connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen) {
+int cfa_connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
         #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_CONNECT)
                 return connect( sockfd, addr, addrlen );
 …
+}
 int cfa_fallocate(int fd, int mode, uint64_t offset, uint64_t len) {
+int cfa_fallocate(int fd, int mode, uint64_t offset, uint64_t len, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
         #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_FALLOCATE)
                 return fallocate( fd, mode, offset, len );
 …
+}
 int cfa_fadvise(int fd, uint64_t offset, uint64_t len, int advice) {
+int cfa_fadvise(int fd, uint64_t offset, uint64_t len, int advice, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
         #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_FADVISE)
                 return posix_fadvise( fd, offset, len, advice );
 …
+}
 int cfa_madvise(void *addr, size_t length, int advice) {
+int cfa_madvise(void *addr, size_t length, int advice, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
         #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_MADVISE)
                 return madvise( addr, length, advice );
 …
+}
 int cfa_openat(int dirfd, const char *pathname, int flags, mode_t mode) {
+int cfa_openat(int dirfd, const char *pathname, int flags, mode_t mode, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
         #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_OPENAT)
                 return openat( dirfd, pathname, flags, mode );
 …
+}
 int cfa_close(int fd) {
+int cfa_close(int fd, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
         #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_CLOSE)
                 return close( fd );
 …
 // Forward declare in case it is not supported
 struct statx;
 int cfa_statx(int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf) {
+int cfa_statx(int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
         #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_STATX)
                 #if defined(__NR_statx)
 …
                 (*sqe){ IORING_OP_STATX, dirfd, pathname, mask, (uint64_t)statxbuf };
                 sqe->flags = flags;
                 __submit_wait
         #endif
+}
 ssize_t cfa_read(int fd, void *buf, size_t count) {
+                sqe->statx_flags = flags;
+                __submit_wait
+        #endif
+}
+ssize_t cfa_read(int fd, void *buf, size_t count, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
         #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_READ)
                 return read( fd, buf, count );
 …
+}
 ssize_t cfa_write(int fd, void *buf, size_t count) {
+ssize_t cfa_write(int fd, void *buf, size_t count, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
         #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_WRITE)
                 return read( fd, buf, count );
 …
+}
 ssize_t cfa_splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags) {
+ssize_t cfa_splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
         #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_SPLICE)
                 return splice( fd_in, off_in, fd_out, off_out, len, flags );
 …
                 sqe->splice_fd_in  = fd_in;
                 sqe->splice_off_in = off_in;
+                sqe->splice_flags  = flags;
+                __submit_wait
+        #endif
+}
+ssize_t cfa_splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags, int in_flags, int out_flags) {
+        #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_SPLICE)
+                return splice( fd_in, off_in, fd_out, off_out, len, flags );
+        #else
+                __submit_prelude
+                (*sqe){ IORING_OP_SPLICE, fd_out, 0p, len, off_out };
+                sqe->splice_fd_in  = fd_in;
+                sqe->splice_off_in = off_in;
+                sqe->splice_flags  = flags | out_flags;
+                sqe->flags = in_flags;
+                __submit_wait
+        #endif
+}
+ssize_t cfa_tee(int fd_in, int fd_out, size_t len, unsigned int flags) {
+                sqe->splice_flags  = flags | (SPLICE_FLAGS & submit_flags);
+                __submit_wait
+        #endif
+}
+ssize_t cfa_tee(int fd_in, int fd_out, size_t len, unsigned int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {
         #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_TEE)
                 return tee( fd_in, fd_out, len, flags );
 …
                 (*sqe){ IORING_OP_TEE, fd_out, 0p, len, 0 };
                 sqe->splice_fd_in = fd_in;
                 sqe->splice_flags = flags;
+                sqe->splice_flags  = flags | (SPLICE_FLAGS & submit_flags);
                 __submit_wait
 …
                 if( /*func == (fptr_t)splice || */
+                        func == (fptr_t)(ssize_t (*)(int, loff_t *, int, loff_t *, size_t, unsigned int))cfa_splice,
+                        func == (fptr_t)(ssize_t (*)(int, loff_t *, int, loff_t *, size_t, unsigned int, int, int))cfa_splice )
+                        func == (fptr_t)cfa_splice )
                         #define _CFA_IO_FEATURE_CFA_HAVE_IORING_OP_SPLICE ,
                         return IS_DEFINED(CFA_HAVE_IORING_OP_SPLICE);

libcfa/src/concurrency/iofwd.hfa

-              r6dba8755
+              r95789be
 extern "C" {
         #include <sys/types.h>
+        #if CFA_HAVE_LINUX_IO_URING_H
+                #include <linux/io_uring.h>
+        #endif
+}
 #include "bits/defs.hfa"
+#include "time.hfa"
+#if defined(CFA_HAVE_IOSQE_FIXED_FILE)
+        #define CFA_IO_FIXED_FD1 IOSQE_FIXED_FILE
+#endif
+#if defined(CFA_HAVE_SPLICE_F_FD_IN_FIXED)
+        #define CFA_IO_FIXED_FD2 SPLICE_F_FD_IN_FIXED
+#endif
+#if defined(CFA_HAVE_IOSQE_IO_DRAIN)
+        #define CFA_IO_DRAIN IOSQE_IO_DRAIN
+#endif
+#if defined(CFA_HAVE_IOSQE_ASYNC)
+        #define CFA_IO_ASYNC IOSQE_ASYNC
+#endif
+struct cluster;
+struct io_context;
+struct io_cancellation;
 struct iovec;
 …
 struct statx;
 extern ssize_t cfa_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags);
 extern ssize_t cfa_pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags);
 extern int cfa_fsync(int fd);
 extern int cfa_sync_file_range(int fd, int64_t offset, int64_t nbytes, unsigned int flags);
 extern ssize_t cfa_sendmsg(int sockfd, const struct msghdr *msg, int flags);
 extern ssize_t cfa_recvmsg(int sockfd, struct msghdr *msg, int flags);
 extern ssize_t cfa_send(int sockfd, const void *buf, size_t len, int flags);
 extern ssize_t cfa_recv(int sockfd, void *buf, size_t len, int flags);
 extern int cfa_accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags);
 extern int cfa_connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen);
 extern int cfa_fallocate(int fd, int mode, uint64_t offset, uint64_t len);
 extern int cfa_fadvise(int fd, uint64_t offset, uint64_t len, int advice);
 extern int cfa_madvise(void *addr, size_t length, int advice);
 extern int cfa_openat(int dirfd, const char *pathname, int flags, mode_t mode);
 extern int cfa_close(int fd);
 extern int cfa_statx(int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf);
 extern ssize_t cfa_read(int fd, void *buf, size_t count);
 extern ssize_t cfa_write(int fd, void *buf, size_t count);
 extern ssize_t cfa_splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags);
 extern ssize_t cfa_tee(int fd_in, int fd_out, size_t len, unsigned int flags);
+extern ssize_t cfa_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
+extern ssize_t cfa_pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
+extern int cfa_fsync(int fd, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
+extern int cfa_sync_file_range(int fd, int64_t offset, int64_t nbytes, unsigned int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
+extern ssize_t cfa_sendmsg(int sockfd, const struct msghdr *msg, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
+extern ssize_t cfa_recvmsg(int sockfd, struct msghdr *msg, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
+extern ssize_t cfa_send(int sockfd, const void *buf, size_t len, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
+extern ssize_t cfa_recv(int sockfd, void *buf, size_t len, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
+extern int cfa_accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
+extern int cfa_connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
+extern int cfa_fallocate(int fd, int mode, uint64_t offset, uint64_t len, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
+extern int cfa_fadvise(int fd, uint64_t offset, uint64_t len, int advice, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
+extern int cfa_madvise(void *addr, size_t length, int advice, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
+extern int cfa_openat(int dirfd, const char *pathname, int flags, mode_t mode, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
+extern int cfa_close(int fd, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
+extern int cfa_statx(int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
+extern ssize_t cfa_read(int fd, void *buf, size_t count, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
+extern ssize_t cfa_write(int fd, void *buf, size_t count, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
+extern ssize_t cfa_splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
+extern ssize_t cfa_tee(int fd_in, int fd_out, size_t len, unsigned int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p);
 //-----------------------------------------------------------------------------
 // Check if a function is blocks a only the user thread
 bool has_user_level_blocking( fptr_t func );
+//-----------------------------------------------------------------------------
+void register_fixed_files( io_context & ctx , int * files, unsigned count );
+void register_fixed_files( cluster    & cltr, int * files, unsigned count );

libcfa/src/concurrency/kernel.cfa

-              r6dba8755
+              r95789be
 //C Includes
-#include <stddef.h>
 #include <errno.h>
-#include <string.h>
 #include <stdio.h>
-#include <fenv.h>
 #include <signal.h>
 #include <unistd.h>
-#include <limits.h>                                                                             // PTHREAD_STACK_MIN
-#include <sys/mman.h>                                                                   // mprotect
-extern "C" {
-#include <sys/resource.h>
+}
 //CFA Includes
-#include "time.hfa"
 #include "kernel_private.hfa"
 #include "preemption.hfa"
-#include "startup.hfa"
 //Private includes
 …
 // Some assembly required
 #if defined( __i386 )
-        #define CtxGet( ctx )        \
-                __asm__ volatile (     \
-                        "movl %%esp,%0\n"\
-                        "movl %%ebp,%1\n"\
-                        : "=rm" (ctx.SP),\
-                                "=rm" (ctx.FP) \
+                )
         // mxcr : SSE Status and Control bits (control bits are preserved across function calls)
         // fcw  : X87 FPU control word (preserved across function calls)
 …
 #elif defined( __x86_64 )
-        #define CtxGet( ctx )        \
-                __asm__ volatile (     \
-                        "movq %%rsp,%0\n"\
-                        "movq %%rbp,%1\n"\
-                        : "=rm" (ctx.SP),\
-                                "=rm" (ctx.FP) \
+                )
         #define __x87_store         \
                 uint32_t __mxcr;      \
 …
 #elif defined( __ARM_ARCH )
-#define CtxGet( ctx ) __asm__ ( \
-                "mov %0,%%sp\n"   \
-                "mov %1,%%r11\n"   \
-        : "=rm" (ctx.SP), "=rm" (ctx.FP) )
 #else
         #error unknown hardware architecture
 #endif
+//-----------------------------------------------------------------------------
+//Start and stop routine for the kernel, declared first to make sure they run first
+static void __kernel_startup (void) __attribute__(( constructor( STARTUP_PRIORITY_KERNEL ) ));
+static void __kernel_shutdown(void) __attribute__(( destructor ( STARTUP_PRIORITY_KERNEL ) ));
+extern $thread * mainThread;
+extern processor * mainProcessor;
 //-----------------------------------------------------------------------------
 …
 static bool __has_next_thread(cluster * this);
 static void __run_thread(processor * this, $thread * dst);
-static bool __wake_proc(processor *);
 static bool __wake_one(struct __processor_id_t * id, cluster * cltr);
 static void __halt(processor * this);
+//-----------------------------------------------------------------------------
+// Kernel storage
+KERNEL_STORAGE(cluster,              mainCluster);
+KERNEL_STORAGE(processor,            mainProcessor);
+KERNEL_STORAGE($thread,              mainThread);
+KERNEL_STORAGE(__stack_t,            mainThreadCtx);
+KERNEL_STORAGE(__scheduler_RWLock_t, __scheduler_lock);
+#if !defined(__CFA_NO_STATISTICS__)
+KERNEL_STORAGE(__stats_t, mainProcStats);
+#endif
+cluster              * mainCluster;
+processor            * mainProcessor;
+$thread              * mainThread;
+__scheduler_RWLock_t * __scheduler_lock;
+extern "C" {
+        struct { __dllist_t(cluster) list; __spinlock_t lock; } __cfa_dbg_global_clusters;
+}
+size_t __page_size = 0;
+//-----------------------------------------------------------------------------
+// Global state
+thread_local struct KernelThreadData kernelTLS __attribute__ ((tls_model ( "initial-exec" ))) @= {
+        NULL,                                                                                           // cannot use 0p
+        NULL,
+        NULL,
+        { 1, false, false },
+};
+//-----------------------------------------------------------------------------
+// Struct to steal stack
+struct current_stack_info_t {
+        __stack_t * storage;                                                            // pointer to stack object
+        void * base;                                                                            // base of stack
+        void * limit;                                                                           // stack grows towards stack limit
+        void * context;                                                                         // address of cfa_context_t
+};
+void ?{}( current_stack_info_t & this ) {
+        __stack_context_t ctx;
+        CtxGet( ctx );
+        this.base = ctx.FP;
+        rlimit r;
+        getrlimit( RLIMIT_STACK, &r);
+        size_t size = r.rlim_cur;
+        this.limit = (void *)(((intptr_t)this.base) - size);
+        this.context = &storage_mainThreadCtx;
+}
+//-----------------------------------------------------------------------------
+// Main thread construction
+void ?{}( $coroutine & this, current_stack_info_t * info) with( this ) {
+        stack.storage = info->storage;
+        with(*stack.storage) {
+                limit     = info->limit;
+                base      = info->base;
+        }
+        __attribute__((may_alias)) intptr_t * istorage = (intptr_t*) &stack.storage;
+        *istorage |= 0x1;
+        name = "Main Thread";
+        state = Start;
+        starter = 0p;
+        last = 0p;
+        cancellation = 0p;
+}
+void ?{}( $thread & this, current_stack_info_t * info) with( this ) {
+        ticket = 1;
+        state = Start;
+        self_cor{ info };
+        curr_cor = &self_cor;
+        curr_cluster = mainCluster;
+        self_mon.owner = &this;
+        self_mon.recursion = 1;
+        self_mon_p = &self_mon;
+        link.next = 0p;
+        link.prev = 0p;
+        node.next = 0p;
+        node.prev = 0p;
+        doregister(curr_cluster, this);
+        monitors{ &self_mon_p, 1, (fptr_t)0 };
+}
+//-----------------------------------------------------------------------------
+// Processor coroutine
+void ?{}(processorCtx_t & this) {
+}
+// Construct the processor context of non-main processors
+static void ?{}(processorCtx_t & this, processor * proc, current_stack_info_t * info) {
+        (this.__cor){ info };
+        this.proc = proc;
+}
+static void * __invoke_processor(void * arg);
+static init(processor & this, const char name[], cluster & _cltr) with( this ) {
+        this.name = name;
+        this.cltr = &_cltr;
+        id = -1u;
+        destroyer = 0p;
+        do_terminate = false;
+        preemption_alarm = 0p;
+        pending_preemption = false;
+        #if !defined(__CFA_NO_STATISTICS__)
+                print_stats = 0;
+                print_halts = false;
+        #endif
+        int target = __atomic_add_fetch( &cltr->nprocessors, 1u, __ATOMIC_SEQ_CST );
+        id = doregister((__processor_id_t*)&this);
+        // Lock the RWlock so no-one pushes/pops while we are changing the queue
+        uint_fast32_t last_size = ready_mutate_lock();
+                // Adjust the ready queue size
+                ready_queue_grow( cltr, target );
+        // Unlock the RWlock
+        ready_mutate_unlock( last_size );
+        __cfadbg_print_safe(runtime_core, "Kernel : core %p created\n", &this);
+}
+// Not a ctor, it just preps the destruction but should not destroy members
+void deinit(processor & this) {
+        int target = __atomic_sub_fetch( &this.cltr->nprocessors, 1u, __ATOMIC_SEQ_CST );
+        // Lock the RWlock so no-one pushes/pops while we are changing the queue
+        uint_fast32_t last_size = ready_mutate_lock();
+                // Adjust the ready queue size
+                ready_queue_shrink( this.cltr, target );
+                // Make sure we aren't on the idle queue
+                unsafe_remove( this.cltr->idles, &this );
+        // Unlock the RWlock
+        ready_mutate_unlock( last_size );
+        // Finally we don't need the read_lock any more
+        unregister((__processor_id_t*)&this);
+}
+void ?{}(processor & this, const char name[], cluster & _cltr) {
+        ( this.idle ){};
+        ( this.terminated ){ 0 };
+        ( this.runner ){};
+        init( this, name, _cltr );
+        __cfadbg_print_safe(runtime_core, "Kernel : Starting core %p\n", &this);
+        this.stack = __create_pthread( &this.kernel_thread, __invoke_processor, (void *)&this );
+}
+void ^?{}(processor & this) with( this ){
+        if( ! __atomic_load_n(&do_terminate, __ATOMIC_ACQUIRE) ) {
+                __cfadbg_print_safe(runtime_core, "Kernel : core %p signaling termination\n", &this);
+                __atomic_store_n(&do_terminate, true, __ATOMIC_RELAXED);
+                __wake_proc( &this );
+                P( terminated );
+                verify( kernelTLS.this_processor != &this);
+        }
+        int err = pthread_join( kernel_thread, 0p );
+        if( err != 0 ) abort("KERNEL ERROR: joining processor %p caused error %s\n", &this, strerror(err));
+        free( this.stack );
+        deinit( this );
+}
+void ?{}(cluster & this, const char name[], Duration preemption_rate, unsigned io_flags) with( this ) {
+        this.name = name;
+        this.preemption_rate = preemption_rate;
+        this.nprocessors = 0;
+        ready_queue{};
+        #if !defined(__CFA_NO_STATISTICS__)
+                print_stats = 0;
+                stats = alloc();
+                __init_stats( stats );
+        #endif
+        threads{ __get };
+        doregister(this);
+        // Lock the RWlock so no-one pushes/pops while we are changing the queue
+        uint_fast32_t last_size = ready_mutate_lock();
+                // Adjust the ready queue size
+                ready_queue_grow( &this, 0 );
+        // Unlock the RWlock
+        ready_mutate_unlock( last_size );
+        __kernel_io_startup( this, io_flags, &this == mainCluster );
+}
+void ^?{}(cluster & this) {
+        __kernel_io_shutdown( this, &this == mainCluster );
+        // Lock the RWlock so no-one pushes/pops while we are changing the queue
+        uint_fast32_t last_size = ready_mutate_lock();
+                // Adjust the ready queue size
+                ready_queue_shrink( &this, 0 );
+        // Unlock the RWlock
+        ready_mutate_unlock( last_size );
+        #if !defined(__CFA_NO_STATISTICS__)
+                if( 0 != this.print_stats ) {
+                        __print_stats( this.stats, this.print_stats, true, this.name, (void*)&this );
+                }
+                free( this.stats );
+        #endif
+        unregister(this);
+}
+bool __wake_proc(processor *);
 //=============================================================================================
 …
+}
-// KERNEL_ONLY
-// Context invoker for processors
-// This is the entry point for processors (kernel threads)
-// It effectively constructs a coroutine by stealing the pthread stack
-static void * __invoke_processor(void * arg) {
-        #if !defined( __CFA_NO_STATISTICS__ )
-                __stats_t local_stats;
-                __init_stats( &local_stats );
-                kernelTLS.this_stats = &local_stats;
-        #endif
-        processor * proc = (processor *) arg;
-        kernelTLS.this_processor = proc;
-        kernelTLS.this_thread    = 0p;
-        kernelTLS.preemption_state.[enabled, disable_count] = [false, 1];
-        // SKULLDUGGERY: We want to create a context for the processor coroutine
-        // which is needed for the 2-step context switch. However, there is no reason
-        // to waste the perfectly valid stack create by pthread.
-        current_stack_info_t info;
-        __stack_t ctx;
-        info.storage = &ctx;
-        (proc->runner){ proc, &info };
-        __cfaabi_dbg_print_safe("Coroutine : created stack %p\n", get_coroutine(proc->runner)->stack.storage);
-        //Set global state
-        kernelTLS.this_thread = 0p;
-        //We now have a proper context from which to schedule threads
-        __cfadbg_print_safe(runtime_core, "Kernel : core %p created (%p, %p)\n", proc, &proc->runner, &ctx);
-        // SKULLDUGGERY: Since the coroutine doesn't have its own stack, we can't
-        // resume it to start it like it normally would, it will just context switch
-        // back to here. Instead directly call the main since we already are on the
-        // appropriate stack.
-        get_coroutine(proc->runner)->state = Active;
-        main( proc->runner );
-        get_coroutine(proc->runner)->state = Halted;
-        // Main routine of the core returned, the core is now fully terminated
-        __cfadbg_print_safe(runtime_core, "Kernel : core %p main ended (%p)\n", proc, &proc->runner);
-        #if !defined(__CFA_NO_STATISTICS__)
-                __tally_stats(proc->cltr->stats, &local_stats);
-                if( 0 != proc->print_stats ) {
-                        __print_stats( &local_stats, proc->print_stats, true, proc->name, (void*)proc );
+                }
-        #endif
-        return 0p;
+}
-static void Abort( int ret, const char func[] ) {
-        if ( ret ) {                                                                            // pthread routines return errno values
-                abort( "%s : internal error, error(%d) %s.", func, ret, strerror( ret ) );
-        } // if
-} // Abort
-void * __create_pthread( pthread_t * pthread, void * (*start)(void *), void * arg ) {
-        pthread_attr_t attr;
-        Abort( pthread_attr_init( &attr ), "pthread_attr_init" ); // initialize attribute
-        size_t stacksize;
-        // default stack size, normally defined by shell limit
-        Abort( pthread_attr_getstacksize( &attr, &stacksize ), "pthread_attr_getstacksize" );
-        assert( stacksize >= PTHREAD_STACK_MIN );
-        void * stack;
-        __cfaabi_dbg_debug_do(
-                stack = memalign( __page_size, stacksize + __page_size );
-                // pthread has no mechanism to create the guard page in user supplied stack.
-                if ( mprotect( stack, __page_size, PROT_NONE ) == -1 ) {
-                        abort( "mprotect : internal error, mprotect failure, error(%d) %s.", errno, strerror( errno ) );
-                } // if
-        );
-        __cfaabi_dbg_no_debug_do(
-                stack = malloc( stacksize );
-        );
-        Abort( pthread_attr_setstack( &attr, stack, stacksize ), "pthread_attr_setstack" );
-        Abort( pthread_create( pthread, &attr, start, arg ), "pthread_create" );
-        return stack;
+}
-// KERNEL_ONLY
-static void __kernel_first_resume( processor * this ) {
-        $thread * src = mainThread;
-        $coroutine * dst = get_coroutine(this->runner);
-        verify( ! kernelTLS.preemption_state.enabled );
-        kernelTLS.this_thread->curr_cor = dst;
-        __stack_prepare( &dst->stack, 65000 );
-        __cfactx_start(main, dst, this->runner, __cfactx_invoke_coroutine);
-        verify( ! kernelTLS.preemption_state.enabled );
-        dst->last = &src->self_cor;
-        dst->starter = dst->starter ? dst->starter : &src->self_cor;
-        // make sure the current state is still correct
-        /* paranoid */ verify(src->state == Ready);
-        // context switch to specified coroutine
-        verify( dst->context.SP );
-        __cfactx_switch( &src->context, &dst->context );
-        // when __cfactx_switch returns we are back in the src coroutine
-        mainThread->curr_cor = &mainThread->self_cor;
-        // make sure the current state has been update
-        /* paranoid */ verify(src->state == Active);
-        verify( ! kernelTLS.preemption_state.enabled );
+}
-// KERNEL_ONLY
-static void __kernel_last_resume( processor * this ) {
-        $coroutine * src = &mainThread->self_cor;
-        $coroutine * dst = get_coroutine(this->runner);
-        verify( ! kernelTLS.preemption_state.enabled );
-        verify( dst->starter == src );
-        verify( dst->context.SP );
-        // SKULLDUGGERY in debug the processors check that the
-        // stack is still within the limit of the stack limits after running a thread.
-        // that check doesn't make sense if we context switch to the processor using the
-        // coroutine semantics. Since this is a special case, use the current context
-        // info to populate these fields.
-        __cfaabi_dbg_debug_do(
-                __stack_context_t ctx;
-                CtxGet( ctx );
-                mainThread->context.SP = ctx.SP;
-                mainThread->context.FP = ctx.FP;
+        )
-        // context switch to the processor
-        __cfactx_switch( &src->context, &dst->context );
+}
 //-----------------------------------------------------------------------------
 // Scheduler routines
 …
 //=============================================================================================
-// Kernel Setup logic
-//=============================================================================================
-//-----------------------------------------------------------------------------
-// Kernel boot procedures
-static void __kernel_startup(void) {
-        verify( ! kernelTLS.preemption_state.enabled );
-        __cfadbg_print_safe(runtime_core, "Kernel : Starting\n");
-        __page_size = sysconf( _SC_PAGESIZE );
-        __cfa_dbg_global_clusters.list{ __get };
-        __cfa_dbg_global_clusters.lock{};
-        // Initialize the global scheduler lock
-        __scheduler_lock = (__scheduler_RWLock_t*)&storage___scheduler_lock;
-        (*__scheduler_lock){};
-        // Initialize the main cluster
-        mainCluster = (cluster *)&storage_mainCluster;
-        (*mainCluster){"Main Cluster"};
-        __cfadbg_print_safe(runtime_core, "Kernel : Main cluster ready\n");
-        // Start by initializing the main thread
-        // SKULLDUGGERY: the mainThread steals the process main thread
-        // which will then be scheduled by the mainProcessor normally
-        mainThread = ($thread *)&storage_mainThread;
-        current_stack_info_t info;
-        info.storage = (__stack_t*)&storage_mainThreadCtx;
-        (*mainThread){ &info };
-        __cfadbg_print_safe(runtime_core, "Kernel : Main thread ready\n");
-        // Construct the processor context of the main processor
-        void ?{}(processorCtx_t & this, processor * proc) {
-                (this.__cor){ "Processor" };
-                this.__cor.starter = 0p;
-                this.proc = proc;
+        }
-        void ?{}(processor & this) with( this ) {
-                ( this.idle ){};
-                ( this.terminated ){ 0 };
-                ( this.runner ){};
-                init( this, "Main Processor", *mainCluster );
-                kernel_thread = pthread_self();
-                runner{ &this };
-                __cfadbg_print_safe(runtime_core, "Kernel : constructed main processor context %p\n", &runner);
+        }
-        // Initialize the main processor and the main processor ctx
-        // (the coroutine that contains the processing control flow)
-        mainProcessor = (processor *)&storage_mainProcessor;
-        (*mainProcessor){};
-        //initialize the global state variables
-        kernelTLS.this_processor = mainProcessor;
-        kernelTLS.this_thread    = mainThread;
-        #if !defined( __CFA_NO_STATISTICS__ )
-                kernelTLS.this_stats = (__stats_t *)& storage_mainProcStats;
-                __init_stats( kernelTLS.this_stats );
-        #endif
-        // Enable preemption
-        kernel_start_preemption();
-        // Add the main thread to the ready queue
-        // once resume is called on mainProcessor->runner the mainThread needs to be scheduled like any normal thread
-        __schedule_thread((__processor_id_t *)mainProcessor, mainThread);
-        // SKULLDUGGERY: Force a context switch to the main processor to set the main thread's context to the current UNIX
-        // context. Hence, the main thread does not begin through __cfactx_invoke_thread, like all other threads. The trick here is that
-        // mainThread is on the ready queue when this call is made.
-        __kernel_first_resume( kernelTLS.this_processor );
-        // THE SYSTEM IS NOW COMPLETELY RUNNING
-        // Now that the system is up, finish creating systems that need threading
-        __kernel_io_finish_start( *mainCluster );
-        __cfadbg_print_safe(runtime_core, "Kernel : Started\n--------------------------------------------------\n\n");
-        verify( ! kernelTLS.preemption_state.enabled );
-        enable_interrupts( __cfaabi_dbg_ctx );
-        verify( TL_GET( preemption_state.enabled ) );
+}
-static void __kernel_shutdown(void) {
-        //Before we start shutting things down, wait for systems that need threading to shutdown
-        __kernel_io_prepare_stop( *mainCluster );
-        /* paranoid */ verify( TL_GET( preemption_state.enabled ) );
-        disable_interrupts();
-        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-        __cfadbg_print_safe(runtime_core, "\n--------------------------------------------------\nKernel : Shutting down\n");
-        // SKULLDUGGERY: Notify the mainProcessor it needs to terminates.
-        // When its coroutine terminates, it return control to the mainThread
-        // which is currently here
-        __atomic_store_n(&mainProcessor->do_terminate, true, __ATOMIC_RELEASE);
-        __kernel_last_resume( kernelTLS.this_processor );
-        mainThread->self_cor.state = Halted;
-        // THE SYSTEM IS NOW COMPLETELY STOPPED
-        // Disable preemption
-        kernel_stop_preemption();
-        // Destroy the main processor and its context in reverse order of construction
-        // These were manually constructed so we need manually destroy them
-        void ^?{}(processor & this) with( this ){
-                deinit( this );
-                /* paranoid */ verify( this.do_terminate == true );
-                __cfaabi_dbg_print_safe("Kernel : destroyed main processor context %p\n", &runner);
+        }
-        ^(*mainProcessor){};
-        // Final step, destroy the main thread since it is no longer needed
-        // Since we provided a stack to this taxk it will not destroy anything
-        /* paranoid */ verify(mainThread->self_cor.stack.storage == (__stack_t*)(((uintptr_t)&storage_mainThreadCtx)| 0x1));
-        ^(*mainThread){};
-        ^(*mainCluster){};
-        ^(*__scheduler_lock){};
-        ^(__cfa_dbg_global_clusters.list){};
-        ^(__cfa_dbg_global_clusters.lock){};
-        __cfadbg_print_safe(runtime_core, "Kernel : Shutdown complete\n");
+}
-//=============================================================================================
 // Kernel Idle Sleep
 //=============================================================================================
 …
 // Unconditionnaly wake a thread
 static bool __wake_proc(processor * this) {
+bool __wake_proc(processor * this) {
         __cfadbg_print_safe(runtime_core, "Kernel : waking Processor %p\n", this);
 …
 //-----------------------------------------------------------------------------
-// Global Queues
-void doregister( cluster     & cltr ) {
-        lock      ( __cfa_dbg_global_clusters.lock __cfaabi_dbg_ctx2);
-        push_front( __cfa_dbg_global_clusters.list, cltr );
-        unlock    ( __cfa_dbg_global_clusters.lock );
+}
-void unregister( cluster     & cltr ) {
-        lock  ( __cfa_dbg_global_clusters.lock __cfaabi_dbg_ctx2);
-        remove( __cfa_dbg_global_clusters.list, cltr );
-        unlock( __cfa_dbg_global_clusters.lock );
+}
-void doregister( cluster * cltr, $thread & thrd ) {
-        lock      (cltr->thread_list_lock __cfaabi_dbg_ctx2);
-        cltr->nthreads += 1;
-        push_front(cltr->threads, thrd);
-        unlock    (cltr->thread_list_lock);
+}
-void unregister( cluster * cltr, $thread & thrd ) {
-        lock  (cltr->thread_list_lock __cfaabi_dbg_ctx2);
-        remove(cltr->threads, thrd );
-        cltr->nthreads -= 1;
-        unlock(cltr->thread_list_lock);
+}
-//-----------------------------------------------------------------------------
 // Debug
 __cfaabi_dbg_debug_do(

libcfa/src/concurrency/kernel.hfa

-              r6dba8755
+              r95789be
 #pragma once
-#include <stdbool.h>
-#include <stdint.h>
 #include "invoke.h"
 #include "time_t.hfa"
 …
 extern "C" {
+#include <pthread.h>
+#include <semaphore.h>
+#include <bits/pthreadtypes.h>
+}
 …
 struct __io_data;
+#define CFA_CLUSTER_IO_POLLER_USER_THREAD    (1 << 0) // 0x01
+#define CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS (1 << 1) // 0x02
+#define CFA_CLUSTER_IO_EAGER_SUBMITS         (1 << 2) // 0x04
+#define CFA_CLUSTER_IO_KERNEL_POLL_SUBMITS   (1 << 3) // 0x08
+#define CFA_CLUSTER_IO_KERNEL_POLL_COMPLETES (1 << 4) // 0x10
+#define CFA_CLUSTER_IO_BUFFLEN_OFFSET        16
+// IO poller user-thread
+// Not using the "thread" keyword because we want to control
+// more carefully when to start/stop it
+struct $io_ctx_thread {
+        struct __io_data * ring;
+        single_sem sem;
+        volatile bool done;
+        $thread self;
+};
+struct io_context {
+        $io_ctx_thread thrd;
+};
+struct io_context_params {
+        int num_entries;
+        int num_ready;
+        int submit_aff;
+        bool eager_submits:1;
+        bool poller_submits:1;
+        bool poll_submit:1;
+        bool poll_complete:1;
+};
+void  ?{}(io_context_params & this);
+void  ?{}(io_context & this, struct cluster & cl);
+void  ?{}(io_context & this, struct cluster & cl, const io_context_params & params);
+void ^?{}(io_context & this);
+struct io_cancellation {
+        uint32_t target;
+};
+static inline void  ?{}(io_cancellation & this) { this.target = -1u; }
+static inline void ^?{}(io_cancellation & this) {}
+bool cancel(io_cancellation & this);
 //-----------------------------------------------------------------------------
 …
         } node;
+        struct __io_data * io;
+        struct {
+                io_context * ctxs;
+                unsigned cnt;
+        } io;
         #if !defined(__CFA_NO_STATISTICS__)
 …
 extern Duration default_preemption();
 void ?{} (cluster & this, const char name[], Duration preemption_rate, unsigned flags);
+void ?{} (cluster & this, const char name[], Duration preemption_rate, unsigned num_io, const io_context_params & io_params);
 void ^?{}(cluster & this);
+static inline void ?{} (cluster & this)                                           { this{"Anonymous Cluster", default_preemption(), 0}; }
+static inline void ?{} (cluster & this, Duration preemption_rate)                 { this{"Anonymous Cluster", preemption_rate, 0}; }
+static inline void ?{} (cluster & this, const char name[])                        { this{name, default_preemption(), 0}; }
+static inline void ?{} (cluster & this, unsigned flags)                           { this{"Anonymous Cluster", default_preemption(), flags}; }
+static inline void ?{} (cluster & this, Duration preemption_rate, unsigned flags) { this{"Anonymous Cluster", preemption_rate, flags}; }
+static inline void ?{} (cluster & this, const char name[], unsigned flags)        { this{name, default_preemption(), flags}; }
+static inline void ?{} (cluster & this)                                            { io_context_params default_params;    this{"Anonymous Cluster", default_preemption(), 1, default_params}; }
+static inline void ?{} (cluster & this, Duration preemption_rate)                  { io_context_params default_params;    this{"Anonymous Cluster", preemption_rate, 1, default_params}; }
+static inline void ?{} (cluster & this, const char name[])                         { io_context_params default_params;    this{name, default_preemption(), 1, default_params}; }
+static inline void ?{} (cluster & this, unsigned num_io)                           { io_context_params default_params;    this{"Anonymous Cluster", default_preemption(), num_io, default_params}; }
+static inline void ?{} (cluster & this, Duration preemption_rate, unsigned num_io) { io_context_params default_params;    this{"Anonymous Cluster", preemption_rate, num_io, default_params}; }
+static inline void ?{} (cluster & this, const char name[], unsigned num_io)        { io_context_params default_params;    this{name, default_preemption(), num_io, default_params}; }
+static inline void ?{} (cluster & this, const io_context_params & io_params)                                            { this{"Anonymous Cluster", default_preemption(), 1, io_params}; }
+static inline void ?{} (cluster & this, Duration preemption_rate, const io_context_params & io_params)                  { this{"Anonymous Cluster", preemption_rate, 1, io_params}; }
+static inline void ?{} (cluster & this, const char name[], const io_context_params & io_params)                         { this{name, default_preemption(), 1, io_params}; }
+static inline void ?{} (cluster & this, unsigned num_io, const io_context_params & io_params)                           { this{"Anonymous Cluster", default_preemption(), num_io, io_params}; }
+static inline void ?{} (cluster & this, Duration preemption_rate, unsigned num_io, const io_context_params & io_params) { this{"Anonymous Cluster", preemption_rate, num_io, io_params}; }
+static inline void ?{} (cluster & this, const char name[], unsigned num_io, const io_context_params & io_params)        { this{name, default_preemption(), num_io, io_params}; }
 static inline [cluster *&, cluster *& ] __get( cluster & this ) __attribute__((const)) { return this.node.[next, prev]; }

libcfa/src/concurrency/kernel_private.hfa

-              r6dba8755
+              r95789be
 #include "stats.hfa"
-#include "bits/random.hfa"
 //-----------------------------------------------------------------------------
 // Scheduler
 …
-struct event_kernel_t {
-        alarm_list_t alarms;
-        __spinlock_t lock;
-};
-extern event_kernel_t * event_kernel;
-struct __cfa_kernel_preemption_state_t {
-        bool enabled;
-        bool in_progress;
-        unsigned short disable_count;
-};
-extern volatile thread_local __cfa_kernel_preemption_state_t preemption_state __attribute__ ((tls_model ( "initial-exec" )));
 extern cluster * mainCluster;
 …
 void __unpark( struct __processor_id_t *, $thread * thrd __cfaabi_dbg_ctx_param2 );
+//-----------------------------------------------------------------------------
+// I/O
+void __kernel_io_startup     ( cluster &, unsigned, bool );
+void __kernel_io_finish_start( cluster & );
+void __kernel_io_prepare_stop( cluster & );
+void __kernel_io_shutdown    ( cluster &, bool );
+static inline bool __post(single_sem & this, struct __processor_id_t * id) {
+        for() {
+                struct $thread * expected = this.ptr;
+                if(expected == 1p) return false;
+                if(expected == 0p) {
+                        if(__atomic_compare_exchange_n(&this.ptr, &expected, 1p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
+                                return false;
+                        }
+                }
+                else {
+                        if(__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
+                                __unpark( id, expected __cfaabi_dbg_ctx2 );
+                                return true;
+                        }
+                }
+        }
+}
 //-----------------------------------------------------------------------------
 // Utils
-#define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)]
-static inline uint64_t __tls_rand() {
-        #if defined(__SIZEOF_INT128__)
-                return __lehmer64( kernelTLS.rand_seed );
-        #else
-                return __xorshift64( kernelTLS.rand_seed );
-        #endif
+}
-void doregister( struct cluster & cltr );
-void unregister( struct cluster & cltr );
 void doregister( struct cluster * cltr, struct $thread & thrd );
 void unregister( struct cluster * cltr, struct $thread & thrd );
+//-----------------------------------------------------------------------------
+// I/O
+void ^?{}(io_context & this, bool );
 //=======================================================================
 …
 void ready_queue_shrink(struct cluster * cltr, int target);
-//-----------------------------------------------------------------------
-// IO user data
-struct __io_user_data_t {
-        int32_t result;
-        $thread * thrd;
-};
-//-----------------------------------------------------------------------
-// Statics call at the end of each thread to register statistics
-#if !defined(__CFA_NO_STATISTICS__)
-        static inline struct __stats_t * __tls_stats() {
-                /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-                /* paranoid */ verify( kernelTLS.this_stats );
-                return kernelTLS.this_stats;
+        }
-        #define __STATS__(in_kernel, ...) { \
-                if( !(in_kernel) ) disable_interrupts(); \
-                with( *__tls_stats() ) { \
-                        __VA_ARGS__ \
-                } \
-                if( !(in_kernel) ) enable_interrupts( __cfaabi_dbg_ctx ); \
+        }
-#else
-        #define __STATS__(in_kernel, ...)
-#endif
 // Local Variables: //

libcfa/src/concurrency/preemption.cfa

-              r6dba8755
+              r95789be
 #include "bits/signal.hfa"
+#include "kernel_private.hfa"
 #if !defined(__CFA_DEFAULT_PREEMPTION__)
 …
 // Startup routine to activate preemption
 // Called from kernel_startup
 void kernel_start_preemption() {
+void __kernel_alarm_startup() {
         __cfaabi_dbg_print_safe( "Kernel : Starting preemption\n" );
 …
 // Shutdown routine to deactivate preemption
 // Called from kernel_shutdown
 void kernel_stop_preemption() {
+void __kernel_alarm_shutdown() {
         __cfaabi_dbg_print_safe( "Kernel : Preemption stopping\n" );

libcfa/src/concurrency/preemption.hfa

-              r6dba8755
+              r95789be
 #pragma once
+#include "bits/locks.hfa"
 #include "alarm.hfa"
-#include "kernel_private.hfa"
+void kernel_start_preemption();
+void kernel_stop_preemption();
+struct event_kernel_t {
+        alarm_list_t alarms;
+        __spinlock_t lock;
+};
+extern event_kernel_t * event_kernel;
 void update_preemption( processor * this, Duration duration );

libcfa/src/concurrency/thread.hfa

-              r6dba8755
+              r95789be
 //-----------------------------------------------------------------------------
-// Thread getters
-static inline struct $thread * active_thread () { return TL_GET( this_thread ); }
-//-----------------------------------------------------------------------------
 // Scheduler API
 …
 bool force_yield( enum __Preemption_Reason );
-static inline void yield() {
-        force_yield(__MANUAL_PREEMPTION);
+}
-// Yield: yield N times
-static inline void yield( unsigned times ) {
-        for( times ) {
-                yield();
+        }
+}
 //----------
 // sleep: force thread to block and be rescheduled after Duration duration

src/cfa.make

-              r6dba8755
+              r95789be
+CFACOMPILE = $(CFACC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CFAFLAGS) $(CFAFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCFACOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+AM_T_CFA = $(am__t_CFA_@AM_T@)
+am__t_CFA_ =
+am__t_CFA_0 =
+am__t_CFA_1 = /usr/bin/time --quiet -f "$@ %E" # trailling space is necessary
+CFACOMPILE = $(AM_T_CFA)$(CFACC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CFAFLAGS) $(CFAFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCFACOMPILE = $(AM_T_CFA)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
         $(LIBTOOLFLAGS) --mode=compile $(CFACC) $(DEFS) \
         $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CFAFLAGS) $(AM_CFLAGS) $(CFAFLAGS) $(CFLAGS)

Context Navigation

Changes in / [6dba8755:95789be]

Legend:

benchmark/io/readv.cfa

libcfa/configure

libcfa/configure.ac

libcfa/prelude/defines.hfa.in

libcfa/src/Makefile.am

libcfa/src/bits/debug.hfa

libcfa/src/bits/defs.hfa

libcfa/src/bits/locks.hfa

libcfa/src/concurrency/alarm.cfa

libcfa/src/concurrency/invoke.h

libcfa/src/concurrency/io.cfa

libcfa/src/concurrency/iocall.cfa

libcfa/src/concurrency/iofwd.hfa

libcfa/src/concurrency/kernel.cfa

libcfa/src/concurrency/kernel.hfa

libcfa/src/concurrency/kernel_private.hfa

libcfa/src/concurrency/preemption.cfa

libcfa/src/concurrency/preemption.hfa

libcfa/src/concurrency/thread.hfa

src/cfa.make

Download in other formats: