Diff [fc9bb791282f9039edf2fe373d71ff9c77f57c60:d34575bd6570f3b8d33ebcfeefd4138a053de439] for / – Cforall

benchmark/io/batch-readv.c

-              rfc9bb79
+              rd34575b
 // Program to test the optimial batchsize in a single threaded process
 extern "C" {
+        #ifndef _GNU_SOURCE         /* See feature_test_macros(7) */
+        #define _GNU_SOURCE         /* See feature_test_macros(7) */
+        #endif
+        #include <errno.h>
+        #include <stdio.h>
+        #include <stdint.h>
+        #include <stdlib.h>
+        #include <string.h>
+        #include <getopt.h>
         #include <locale.h>
-        #include <getopt.h>
-        #include <unistd.h>
-        #include <sys/mman.h>
-        #include <sys/syscall.h>
-        #include <sys/uio.h>
-        #include <fcntl.h>
         #include <time.h>                                                                               // timespec
         #include <sys/time.h>                                                                   // timeval
-        #include <linux/io_uring.h>
+}
 enum { TIMEGRAN = 1000000000LL };                                       // nanosecond granularity, except for timeval
 …
 #include <omp.h>
+# ifndef __NR_io_uring_setup
+#  define __NR_io_uring_setup           425
+# endif
+# ifndef __NR_io_uring_enter
+#  define __NR_io_uring_enter           426
+# endif
+# ifndef __NR_io_uring_register
+#  define __NR_io_uring_register        427
+# endif
+#include "io_uring.h"
-struct io_uring_sq {
-        // Head and tail of the ring (associated with array)
-        volatile uint32_t * head;
-        volatile uint32_t * tail;
-        // The actual kernel ring which uses head/tail
-        // indexes into the sqes arrays
-        uint32_t * array;
-        // number of entries and mask to go with it
-        const uint32_t * num;
-        const uint32_t * mask;
-        // Submission flags (Not sure what for)
-        uint32_t * flags;
-        // number of sqes not submitted (whatever that means)
-        uint32_t * dropped;
-        // Like head/tail but not seen by the kernel
-        volatile uint32_t alloc;
-        // A buffer of sqes (not the actual ring)
-        struct io_uring_sqe * sqes;
-        // The location and size of the mmaped area
-        void * ring_ptr;
-        size_t ring_sz;
-};
-struct io_uring_cq {
-        // Head and tail of the ring
-        volatile uint32_t * head;
-        volatile uint32_t * tail;
-        // number of entries and mask to go with it
-        const uint32_t * mask;
-        const uint32_t * num;
-        // number of cqes not submitted (whatever that means)
-        uint32_t * overflow;
-        // the kernel ring
-        struct io_uring_cqe * cqes;
-        // The location and size of the mmaped area
-        void * ring_ptr;
-        size_t ring_sz;
-};
-struct io_ring {
-        struct io_uring_sq submit_q;
-        struct io_uring_cq completion_q;
-        uint32_t flags;
-        int fd;
-};
-struct fred {
-        io_ring io;
-};
-fred self;
 int myfd;
 …
         myfd = open(__FILE__, 0);
+        // Step 1 : call to setup
+        struct io_uring_params params;
+        memset(&params, 0, sizeof(params));
+        uint32_t nentries = 2048;
+        int fd = syscall(__NR_io_uring_setup, nentries, &params );
+        if(fd < 0) {
+                fprintf(stderr, "KERNEL ERROR: IO_URING SETUP - %s\n", strerror(errno));
+                abort();
+        }
+        // Step 2 : mmap result
+        memset(&self.io, 0, sizeof(struct io_ring));
+        struct io_uring_sq & sq = self.io.submit_q;
+        struct io_uring_cq & cq = self.io.completion_q;
+        // calculate the right ring size
+        sq.ring_sz = params.sq_off.array + (params.sq_entries * sizeof(unsigned)           );
+        cq.ring_sz = params.cq_off.cqes  + (params.cq_entries * sizeof(struct io_uring_cqe));
+        // Requires features
+        // // adjust the size according to the parameters
+        // if ((params.features & IORING_FEAT_SINGLE_MMAP) != 0) {
+        //      cq->ring_sz = sq->ring_sz = max(cq->ring_sz, sq->ring_sz);
+        // }
+        // mmap the Submit Queue into existence
+        sq.ring_ptr = mmap(0, sq.ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING);
+        if (sq.ring_ptr == (void*)MAP_FAILED) {
+                fprintf(stderr, "KERNEL ERROR: IO_URING MMAP1 - %s\n", strerror(errno));
+                abort();
+        }
+        // mmap the Completion Queue into existence (may or may not be needed)
+        // Requires features
+        // if ((params.features & IORING_FEAT_SINGLE_MMAP) != 0) {
+        //      cq->ring_ptr = sq->ring_ptr;
+        // }
+        // else {
+                // We need multiple call to MMAP
+                cq.ring_ptr = mmap(0, cq.ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING);
+                if (cq.ring_ptr == (void*)MAP_FAILED) {
+                        munmap(sq.ring_ptr, sq.ring_sz);
+                        fprintf(stderr, "KERNEL ERROR: IO_URING MMAP2 - %s\n", strerror(errno));
+                        abort();
+                }
+        // }
+        // mmap the submit queue entries
+        size_t size = params.sq_entries * sizeof(struct io_uring_sqe);
+        sq.sqes = (struct io_uring_sqe *)mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES);
+        if (sq.sqes == (struct io_uring_sqe *)MAP_FAILED) {
+                munmap(sq.ring_ptr, sq.ring_sz);
+                if (cq.ring_ptr != sq.ring_ptr) munmap(cq.ring_ptr, cq.ring_sz);
+                fprintf(stderr, "KERNEL ERROR: IO_URING MMAP3 - %s\n", strerror(errno));
+                abort();
+        }
+        // Get the pointers from the kernel to fill the structure
+        // submit queue
+        sq.head    = (volatile uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.head);
+        sq.tail    = (volatile uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.tail);
+        sq.mask    = (   const uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_mask);
+        sq.num     = (   const uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_entries);
+        sq.flags   = (         uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.flags);
+        sq.dropped = (         uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.dropped);
+        sq.array   = (         uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.array);
+        sq.alloc = *sq.tail;
+        // completion queue
+        cq.head     = (volatile uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.head);
+        cq.tail     = (volatile uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.tail);
+        cq.mask     = (   const uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.ring_mask);
+        cq.num      = (   const uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.ring_entries);
+        cq.overflow = (         uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.overflow);
+        cq.cqes   = (struct io_uring_cqe *)(((intptr_t)cq.ring_ptr) + params.cq_off.cqes);
+        self.io.fd = fd;
+        init_uring(2048);
         // Allocate the sqe
 …
         printf("Took %'ld ms\n", to_miliseconds(end - start));
+        printf("Submitted       %'llu\n", submits);
+        printf("Completed       %'llu\n", completes);
+        printf("Submitted / sec %'.f\n", submits   / to_fseconds(end - start));
+        printf("Completed / sec %'.f\n", completes / to_fseconds(end - start));
+        printf("Submitted        %'llu\n", submits);
+        printf("Completed        %'llu\n", completes);
+        printf("Submitted / sec  %'.f\n", submits   / to_fseconds(end - start));
+        printf("Completed / sec  %'.f\n", completes / to_fseconds(end - start));
+        printf("ns per Submitted %'.f\n", 1000000000.0 * to_fseconds(end - start) / (submits   / batch) );
+        printf("ns per Completed %'.f\n", 1000000000.0 * to_fseconds(end - start) / (completes / batch) );
+}

benchmark/io/readv.cfa

-              rfc9bb79
+              rd34575b
+#define _GNU_SOURCE
 #include <stdlib.h>
 #include <stdio.h>
 …
 extern bool traceHeapOn();
 extern ssize_t cfa_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags);
+extern ssize_t cfa_preadv2_fixed(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags);
+extern void register_fixed_files( cluster &, int *, unsigned count );
 int fd;
 …
 unsigned long int buflen = 50;
+bool fixed_file = false;
 thread __attribute__((aligned(128))) Reader {};
 void ?{}( Reader & this ) {
         ((thread&)this){ "Reader Thread", *the_benchmark_cluster };
+}
+int do_read(int fd, struct iovec * iov) {
+        if(fixed_file) {
+                return cfa_preadv2_fixed(fd, iov, 1, 0, 0);
+        }
+        else {
+                return cfa_preadv2(fd, iov, 1, 0, 0);
+        }
+}
 …
         while(__atomic_load_n(&run, __ATOMIC_RELAXED)) {
                 int r = cfa_preadv2(fd, &iov, 1, 0, 0);
+                int r = do_read(fd, &iov);
                 if(r < 0) abort("%s\n", strerror(-r));
 …
         BENCH_DECL
         unsigned flags = 0;
+        int file_flags = 0;
         unsigned sublen = 16;
 …
                 static struct option options[] = {
                         BENCH_OPT_LONG
+                        {"bufsize",      required_argument, 0, 'b'},
+                        {"userthread",   no_argument      , 0, 'u'},
+                        {"submitthread", no_argument      , 0, 's'},
+                        {"submitlength", required_argument, 0, 'l'},
+                        {"bufsize",       required_argument, 0, 'b'},
+                        {"userthread",    no_argument      , 0, 'u'},
+                        {"submitthread",  no_argument      , 0, 's'},
+                        {"eagersubmit",   no_argument      , 0, 'e'},
+                        {"kpollsubmit",   no_argument      , 0, 'k'},
+                        {"kpollcomplete", no_argument      , 0, 'i'},
+                        {"submitlength",  required_argument, 0, 'l'},
                         {0, 0, 0, 0}
                 };
                 int idx = 0;
                 int opt = getopt_long(argc, argv, BENCH_OPT_SHORT "b:usl:", options, &idx);
+                int opt = getopt_long(argc, argv, BENCH_OPT_SHORT "b:usekil:", options, &idx);
                 const char * arg = optarg ? optarg : "";
 …
                                 flags |= CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS;
                                 break;
+                        case 'e':
+                                flags |= CFA_CLUSTER_IO_EAGER_SUBMITS;
+                                break;
+                        case 'k':
+                                flags |= CFA_CLUSTER_IO_KERNEL_POLL_SUBMITS;
+                                fixed_file = true;
+                                break;
+                        case 'i':
+                                flags |= CFA_CLUSTER_IO_KERNEL_POLL_COMPLETES;
+                                file_flags |= O_DIRECT;
+                                break;
                         case 'l':
                                 sublen = strtoul(arg, &end, 10);
 …
                                 fprintf( stderr, "  -u, --userthread         If set, cluster uses user-thread to poll I/O\n" );
                                 fprintf( stderr, "  -s, --submitthread       If set, cluster uses polling thread to submit I/O\n" );
+                                fprintf( stderr, "  -e, --eagersubmit        If set, cluster submits I/O eagerly but still aggregates submits\n" );
+                                fprintf( stderr, "  -k, --kpollsubmit        If set, cluster uses IORING_SETUP_SQPOLL\n" );
+                                fprintf( stderr, "  -i, --kpollcomplete      If set, cluster uses IORING_SETUP_IOPOLL\n" );
+                                fprintf( stderr, "  -l, --submitlength=LEN   Max number of submitions that can be submitted together\n" );
                                 exit(EXIT_FAILURE);
+                }
+        }
         fd = open(__FILE__, 0);
         if(fd < 0) {
+        int lfd = open(__FILE__, file_flags);
+        if(lfd < 0) {
                 fprintf(stderr, "Could not open source file\n");
                 exit(EXIT_FAILURE);
 …
                 Time start, end;
                 BenchCluster cl = { flags, CFA_STATS_READY_Q | CFA_STATS_IO };
+                if(fixed_file) {
+                        fd = 0;
+                        register_fixed_files( cl.self, &lfd, 1 );
+                }
+                else {
+                        fd = lfd;
+                }
+                {
                         BenchProc procs[nprocs];
 …
+        }
         close(fd);
+        close(lfd);
+}

libcfa/src/Makefile.am

-              rfc9bb79
+              rd34575b
 if BUILDLIB
 headers_nosrc = bitmanip.hfa exception.hfa math.hfa gmp.hfa time_t.hfa clock.hfa \
+                bits/align.hfa bits/containers.hfa bits/defs.hfa bits/debug.hfa bits/locks.hfa containers/list.hfa containers/stackLockFree.hfa
+                bits/align.hfa bits/containers.hfa bits/defs.hfa bits/debug.hfa bits/locks.hfa \
+                containers/list.hfa containers/stackLockFree.hfa concurrency/iofwd.hfa
 headers = common.hfa fstream.hfa heap.hfa iostream.hfa iterator.hfa limits.hfa rational.hfa \
                 time.hfa stdlib.hfa memory.hfa \

libcfa/src/Makefile.in

-              rfc9bb79
+              rd34575b
         bits/align.hfa bits/containers.hfa bits/defs.hfa \
         bits/debug.hfa bits/locks.hfa containers/list.hfa \
         containers/stackLockFree.hfa concurrency/coroutine.hfa \
         concurrency/thread.hfa concurrency/kernel.hfa \
         concurrency/monitor.hfa concurrency/mutex.hfa \
         concurrency/invoke.h
+        containers/stackLockFree.hfa concurrency/iofwd.hfa \
+        concurrency/coroutine.hfa concurrency/thread.hfa \
+        concurrency/kernel.hfa concurrency/monitor.hfa \
+        concurrency/mutex.hfa concurrency/invoke.h
 HEADERS = $(nobase_cfa_include_HEADERS)
 am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
 …
 #----------------------------------------------------------------------------------------------------------------
 @BUILDLIB_TRUE@headers_nosrc = bitmanip.hfa exception.hfa math.hfa gmp.hfa time_t.hfa clock.hfa \
+@BUILDLIB_TRUE@         bits/align.hfa bits/containers.hfa bits/defs.hfa bits/debug.hfa bits/locks.hfa containers/list.hfa containers/stackLockFree.hfa
+@BUILDLIB_TRUE@         bits/align.hfa bits/containers.hfa bits/defs.hfa bits/debug.hfa bits/locks.hfa \
+@BUILDLIB_TRUE@         containers/list.hfa containers/stackLockFree.hfa concurrency/iofwd.hfa
 @BUILDLIB_FALSE@headers =

libcfa/src/concurrency/io.cfa

-              rfc9bb79
+              rd34575b
 //
+// #define __CFA_DEBUG_PRINT_IO__
+// #define __CFA_DEBUG_PRINT_IO_CORE__
+#if defined(__CFA_DEBUG__)
+        // #define __CFA_DEBUG_PRINT_IO__
+        #define __CFA_DEBUG_PRINT_IO_CORE__
+#endif
 #include "kernel.hfa"
 …
                 volatile uint32_t * head;
                 volatile uint32_t * tail;
+                volatile uint32_t prev_head;
                 // The actual kernel ring which uses head/tail
 …
                 __spinlock_t lock;
+                __spinlock_t release_lock;
                 // A buffer of sqes (not the actual ring)
 …
 //=============================================================================================
         void __kernel_io_startup( cluster & this, unsigned io_flags, bool main_cluster ) {
+                if( (io_flags & CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS) && (io_flags & CFA_CLUSTER_IO_EAGER_SUBMITS) ) {
+                        abort("CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS and CFA_CLUSTER_IO_EAGER_SUBMITS cannot be mixed\n");
+                }
                 this.io = malloc();
 …
                 struct io_uring_params params;
                 memset(&params, 0, sizeof(params));
+                if( io_flags & CFA_CLUSTER_IO_KERNEL_POLL_SUBMITS   ) params.flags |= IORING_SETUP_SQPOLL;
+                if( io_flags & CFA_CLUSTER_IO_KERNEL_POLL_COMPLETES ) params.flags |= IORING_SETUP_IOPOLL;
                 uint32_t nentries = entries_per_cluster();
 …
                         // adjust the size according to the parameters
                         if ((params.features & IORING_FEAT_SINGLE_MMAP) != 0) {
                                 cq->ring_sz = sq->ring_sz = max(cq->ring_sz, sq->ring_sz);
+                                cq.ring_sz = sq.ring_sz = max(cq.ring_sz, sq.ring_sz);
+                        }
                 #endif
 …
                         // mmap the Completion Queue into existence (may or may not be needed)
                         if ((params.features & IORING_FEAT_SINGLE_MMAP) != 0) {
                                 cq->ring_ptr = sq->ring_ptr;
+                                cq.ring_ptr = sq.ring_ptr;
+                        }
                         else
 …
                 sq.dropped = (         uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.dropped);
                 sq.array   = (         uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.array);
+                sq.prev_head = *sq.head;
+                {
 …
+                }
+                if( io_flags & CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS ) {
+                (sq.lock){};
+                (sq.release_lock){};
+                if( io_flags & ( CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS | CFA_CLUSTER_IO_EAGER_SUBMITS ) ) {
                         /* paranoid */ verify( is_pow2( io_flags >> CFA_CLUSTER_IO_BUFFLEN_OFFSET ) || ((io_flags >> CFA_CLUSTER_IO_BUFFLEN_OFFSET) < 8)  );
                         sq.ready_cnt = max(io_flags >> CFA_CLUSTER_IO_BUFFLEN_OFFSET, 8);
 …
                 // Create the poller thread
                 __cfadbg_print_safe(io_core, "Kernel I/O : Creating slow poller for cluter %p\n", &this);
+                __cfadbg_print_safe(io_core, "Kernel I/O : Creating slow poller for cluster %p\n", &this);
                 this.io->poller.slow.blocked = false;
                 this.io->poller.slow.stack = __create_pthread( &this.io->poller.slow.kthrd, __io_poller_slow, &this );
 …
+        }
+        int __io_uring_enter( struct __io_data & ring, unsigned to_submit, bool get, sigset_t * mask ) {
+                bool need_sys_to_submit = false;
+                bool need_sys_to_complete = false;
+                unsigned min_complete = 0;
+                unsigned flags = 0;
+                TO_SUBMIT:
+                if( to_submit > 0 ) {
+                        if( !(ring.ring_flags & IORING_SETUP_SQPOLL) ) {
+                                need_sys_to_submit = true;
+                                break TO_SUBMIT;
+                        }
+                        if( (*ring.submit_q.flags) & IORING_SQ_NEED_WAKEUP ) {
+                                need_sys_to_submit = true;
+                                flags |= IORING_ENTER_SQ_WAKEUP;
+                        }
+                }
+                TO_COMPLETE:
+                if( get && !(ring.ring_flags & IORING_SETUP_SQPOLL) ) {
+                        flags |= IORING_ENTER_GETEVENTS;
+                        if( mask ) {
+                                need_sys_to_complete = true;
+                                min_complete = 1;
+                                break TO_COMPLETE;
+                        }
+                        if( (ring.ring_flags & IORING_SETUP_IOPOLL) ) {
+                                need_sys_to_complete = true;
+                        }
+                }
+                int ret = 0;
+                if( need_sys_to_submit || need_sys_to_complete ) {
+                        ret = syscall( __NR_io_uring_enter, ring.fd, to_submit, min_complete, flags, mask, _NSIG / 8);
+                        if( ret < 0 ) {
+                                switch((int)errno) {
+                                case EAGAIN:
+                                case EINTR:
+                                        ret = -1;
+                                        break;
+                                default:
+                                        abort( "KERNEL ERROR: IO_URING SYSCALL - (%d) %s\n", (int)errno, strerror(errno) );
+                                }
+                        }
+                }
+                // Memory barrier
+                __atomic_thread_fence( __ATOMIC_SEQ_CST );
+                return ret;
+        }
 //=============================================================================================
 // I/O Polling
 //=============================================================================================
+        static unsigned __collect_submitions( struct __io_data & ring );
+        static uint32_t __release_consumed_submission( struct __io_data & ring );
         // Process a single completion message from the io_uring
         // This is NOT thread-safe
+        static [int, bool] __drain_io( & struct __io_data ring, * sigset_t mask, int waitcnt, bool in_kernel ) {
+        static [int, bool] __drain_io( & struct __io_data ring, * sigset_t mask ) {
+                /* paranoid */ verify( !kernelTLS.preemption_state.enabled );
                 unsigned to_submit = 0;
                 if( ring.cltr_flags & CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS ) {
                         // If the poller thread also submits, then we need to aggregate the submissions which are ready
+                        uint32_t tail = *ring.submit_q.tail;
+                        const uint32_t mask = *ring.submit_q.mask;
+                        // Go through the list of ready submissions
+                        for( i; ring.submit_q.ready_cnt ) {
+                                // replace any submission with the sentinel, to consume it.
+                                uint32_t idx = __atomic_exchange_n( &ring.submit_q.ready[i], -1ul32, __ATOMIC_RELAXED);
+                                // If it was already the sentinel, then we are done
+                                if( idx == -1ul32 ) continue;
+                                // If we got a real submission, append it to the list
+                                ring.submit_q.array[ (tail + to_submit) & mask ] = idx & mask;
+                                to_submit++;
+                        }
+                        // Increment the tail based on how many we are ready to submit
+                        __atomic_fetch_add(ring.submit_q.tail, to_submit, __ATOMIC_SEQ_CST);
+                }
+                const uint32_t smask = *ring.submit_q.mask;
+                uint32_t shead = *ring.submit_q.head;
+                int ret = syscall( __NR_io_uring_enter, ring.fd, to_submit, waitcnt, IORING_ENTER_GETEVENTS, mask, _NSIG / 8);
+                        to_submit = __collect_submitions( ring );
+                }
+                int ret = __io_uring_enter(ring, to_submit, true, mask);
                 if( ret < 0 ) {
+                        switch((int)errno) {
+                        case EAGAIN:
+                        case EINTR:
+                                return -EAGAIN;
+                        default:
+                                abort( "KERNEL ERROR: IO_URING WAIT - %s\n", strerror(errno) );
+                        }
+                        return [0, true];
+                }
+                // update statistics
+                if (to_submit > 0) {
+                        __STATS__( true,
+                                if( to_submit > 0 ) {
+                                        io.submit_q.submit_avg.rdy += to_submit;
+                                        io.submit_q.submit_avg.csm += ret;
+                                        io.submit_q.submit_avg.cnt += 1;
+                                }
+                        )
+                }
                 // Release the consumed SQEs
+                for( i; ret ) {
+                        uint32_t idx = ring.submit_q.array[ (i + shead) & smask ];
+                        ring.submit_q.sqes[ idx ].user_data = 0;
+                }
+                uint32_t avail = 0;
+                uint32_t sqe_num = *ring.submit_q.num;
+                for(i; sqe_num) {
+                        if( ring.submit_q.sqes[ i ].user_data == 0 ) avail++;
+                }
+                // update statistics
+                #if !defined(__CFA_NO_STATISTICS__)
+                        __tls_stats()->io.submit_q.submit_avg.rdy += to_submit;
+                        __tls_stats()->io.submit_q.submit_avg.csm += ret;
+                        __tls_stats()->io.submit_q.submit_avg.avl += avail;
+                        __tls_stats()->io.submit_q.submit_avg.cnt += 1;
+                #endif
+                __release_consumed_submission( ring );
                 // Drain the queue
 …
                 const uint32_t mask = *ring.completion_q.mask;
-                // Memory barrier
-                __atomic_thread_fence( __ATOMIC_SEQ_CST );
                 // Nothing was new return 0
                 if (head == tail) {
                         return 0;
+                        return [0, to_submit > 0];
+                }
                 uint32_t count = tail - head;
+                /* paranoid */ verify( count != 0 );
                 for(i; count) {
                         unsigned idx = (head + i) & mask;
 …
                         data->result = cqe.res;
                         if(!in_kernel) { unpark( data->thrd __cfaabi_dbg_ctx2 ); }
                         else         { __unpark( &ring.poller.slow.id, data->thrd __cfaabi_dbg_ctx2 ); }
+                        if(!mask) { unpark( data->thrd __cfaabi_dbg_ctx2 ); }
+                        else      { __unpark( &ring.poller.slow.id, data->thrd __cfaabi_dbg_ctx2 ); }
+                }
 …
                                 int count;
                                 bool again;
                                 [count, again] = __drain_io( ring, &mask, 1, true );
+                                [count, again] = __drain_io( ring, &mask );
                                 __atomic_store_n( &ring.poller.slow.blocked, false, __ATOMIC_SEQ_CST );
                                 // Update statistics
                                 #if !defined(__CFA_NO_STATISTICS__)
                                         __tls_stats()->io.complete_q.completed_avg.val += count;
                                         __tls_stats()->io.complete_q.completed_avg.slow_cnt += 1;
                                 #endif
+                                __STATS__( true,
+                                        io.complete_q.completed_avg.val += count;
+                                        io.complete_q.completed_avg.slow_cnt += 1;
+                                )
                                 if(again) {
 …
                                 int count;
                                 bool again;
                                 [count, again] = __drain_io( ring, &mask, 0, true );
+                                [count, again] = __drain_io( ring, &mask );
                                 // Update statistics
                                 #if !defined(__CFA_NO_STATISTICS__)
                                         __tls_stats()->io.complete_q.completed_avg.val += count;
                                         __tls_stats()->io.complete_q.completed_avg.slow_cnt += 1;
                                 #endif
+                                __STATS__( true,
+                                        io.complete_q.completed_avg.val += count;
+                                        io.complete_q.completed_avg.slow_cnt += 1;
+                                )
+                        }
+                }
 …
                         bool again;
                         disable_interrupts();
                                 [count, again] = __drain_io( *this.ring, 0p, 0, false );
+                                [count, again] = __drain_io( *this.ring, 0p );
                                 if(!again) reset++;
                                 // Update statistics
                                 #if !defined(__CFA_NO_STATISTICS__)
                                         __tls_stats()->io.complete_q.completed_avg.val += count;
                                         __tls_stats()->io.complete_q.completed_avg.fast_cnt += 1;
                                 #endif
+                                __STATS__( true,
+                                        io.complete_q.completed_avg.val += count;
+                                        io.complete_q.completed_avg.fast_cnt += 1;
+                                )
                         enable_interrupts( __cfaabi_dbg_ctx );
 …
 // Submition steps :
+// 1 - We need to make sure we don't overflow any of the buffer, P(ring.submit) to make sure
+//     entries are available. The semaphore make sure that there is no more operations in
+//     progress then the number of entries in the buffer. This probably limits concurrency
+//     more than necessary since submitted but not completed operations don't need any
+//     entries in user space. However, I don't know what happens if we overflow the buffers
+//     because too many requests completed at once. This is a safe approach in all cases.
+//     Furthermore, with hundreds of entries, this may be okay.
+//
+// 2 - Allocate a queue entry. The ring already has memory for all entries but only the ones
+// 1 - Allocate a queue entry. The ring already has memory for all entries but only the ones
 //     listed in sq.array are visible by the kernel. For those not listed, the kernel does not
 //     offer any assurance that an entry is not being filled by multiple flags. Therefore, we
 //     need to write an allocator that allows allocating concurrently.
 //
 // 3 - Actually fill the submit entry, this is the only simple and straightforward step.
+// 2 - Actually fill the submit entry, this is the only simple and straightforward step.
 //
 // 4 - Append the entry index to the array and adjust the tail accordingly. This operation
+// 3 - Append the entry index to the array and adjust the tail accordingly. This operation
 //     needs to arrive to two concensus at the same time:
 //     A - The order in which entries are listed in the array: no two threads must pick the
 …
         [* struct io_uring_sqe, uint32_t] __submit_alloc( struct __io_data & ring, uint64_t data ) {
+                verify( data != 0 );
+                /* paranoid */ verify( data != 0 );
                 // Prepare the data we need
 …
+                                {
                                         // update statistics
+                                        #if !defined(__CFA_NO_STATISTICS__)
+                                                disable_interrupts();
+                                                        __tls_stats()->io.submit_q.alloc_avg.val   += len;
+                                                        __tls_stats()->io.submit_q.alloc_avg.block += block;
+                                                        __tls_stats()->io.submit_q.alloc_avg.cnt   += 1;
+                                                enable_interrupts( __cfaabi_dbg_ctx );
+                                        #endif
+                                        __STATS__( false,
+                                                io.submit_q.alloc_avg.val   += len;
+                                                io.submit_q.alloc_avg.block += block;
+                                                io.submit_q.alloc_avg.cnt   += 1;
+                                        )
 …
                         block++;
+                        yield();
+                        if( try_lock(ring.submit_q.lock __cfaabi_dbg_ctx2) ) {
+                                __release_consumed_submission( ring );
+                                unlock( ring.submit_q.lock );
+                        }
+                        else {
+                                yield();
+                        }
+                }
                 // update statistics
+                #if !defined(__CFA_NO_STATISTICS__)
+                disable_interrupts();
+                        __tls_stats()->io.submit_q.look_avg.val   += len;
+                        __tls_stats()->io.submit_q.look_avg.block += block;
+                        __tls_stats()->io.submit_q.look_avg.cnt   += 1;
+                enable_interrupts( __cfaabi_dbg_ctx );
+                #endif
+                __STATS__( false,
+                        io.submit_q.look_avg.val   += len;
+                        io.submit_q.look_avg.block += block;
+                        io.submit_q.look_avg.cnt   += 1;
+                )
                 return picked;
 …
                 if( ring.cltr_flags & CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS ) {
                         // If the poller thread submits, then we just need to add this to the ready array
                         __submit_to_ready_array( ring, idx, mask );
 …
                         __cfadbg_print_safe( io, "Kernel I/O : Added %u to ready for %p\n", idx, active_thread() );
+                }
+                else if( ring.cltr_flags & CFA_CLUSTER_IO_EAGER_SUBMITS ) {
+                        uint32_t picked = __submit_to_ready_array( ring, idx, mask );
+                        for() {
+                                yield();
+                                // If some one else collected our index, we are done
+                                #warning ABA problem
+                                if( ring.submit_q.ready[picked] != idx ) {
+                                        __STATS__( false,
+                                                io.submit_q.helped += 1;
+                                        )
+                                        return;
+                                }
+                                if( try_lock(ring.submit_q.lock __cfaabi_dbg_ctx2) ) {
+                                        __STATS__( false,
+                                                io.submit_q.leader += 1;
+                                        )
+                                        break;
+                                }
+                                __STATS__( false,
+                                        io.submit_q.busy += 1;
+                                )
+                        }
+                        // We got the lock
+                        unsigned to_submit = __collect_submitions( ring );
+                        int ret = __io_uring_enter( ring, to_submit, false, 0p );
+                        if( ret < 0 ) {
+                                unlock(ring.submit_q.lock);
+                                return;
+                        }
+                        /* paranoid */ verify( ret > 0 || (ring.ring_flags & IORING_SETUP_SQPOLL) );
+                        // Release the consumed SQEs
+                        __release_consumed_submission( ring );
+                        // update statistics
+                        __STATS__( true,
+                                io.submit_q.submit_avg.rdy += to_submit;
+                                io.submit_q.submit_avg.csm += ret;
+                                io.submit_q.submit_avg.cnt += 1;
+                        )
+                        unlock(ring.submit_q.lock);
+                }
                 else {
 …
                         lock(ring.submit_q.lock __cfaabi_dbg_ctx2);
+                        /* paranoid */ verifyf( ring.submit_q.sqes[ idx ].user_data != 0,
+                        /* paranoid */  "index %u already reclaimed\n"
+                        /* paranoid */  "head %u, prev %u, tail %u\n"
+                        /* paranoid */  "[-0: %u,-1: %u,-2: %u,-3: %u]\n",
+                        /* paranoid */  idx,
+                        /* paranoid */  *ring.submit_q.head, ring.submit_q.prev_head, *tail
+                        /* paranoid */  ,ring.submit_q.array[ ((*ring.submit_q.head) - 0) & (*ring.submit_q.mask) ]
+                        /* paranoid */  ,ring.submit_q.array[ ((*ring.submit_q.head) - 1) & (*ring.submit_q.mask) ]
+                        /* paranoid */  ,ring.submit_q.array[ ((*ring.submit_q.head) - 2) & (*ring.submit_q.mask) ]
+                        /* paranoid */  ,ring.submit_q.array[ ((*ring.submit_q.head) - 3) & (*ring.submit_q.mask) ]
+                        /* paranoid */ );
                         // Append to the list of ready entries
                         /* paranoid */ verify( idx <= mask );
+                        ring.submit_q.array[ (*tail) & mask ] = idx & mask;
+                        ring.submit_q.array[ (*tail) & mask ] = idx;
                         __atomic_fetch_add(tail, 1ul32, __ATOMIC_SEQ_CST);
                         // Submit however, many entries need to be submitted
                         int ret = syscall( __NR_io_uring_enter, ring.fd, 1, 0, 0, 0p, 0);
+                        int ret = __io_uring_enter( ring, 1, false, 0p );
                         if( ret < 0 ) {
                                 switch((int)errno) {
 …
                         // update statistics
+                        #if !defined(__CFA_NO_STATISTICS__)
+                                __tls_stats()->io.submit_q.submit_avg.csm += 1;
+                                __tls_stats()->io.submit_q.submit_avg.cnt += 1;
+                        #endif
+                        ring.submit_q.sqes[ idx & mask ].user_data = 0;
+                        __STATS__( false,
+                                io.submit_q.submit_avg.csm += 1;
+                                io.submit_q.submit_avg.cnt += 1;
+                        )
+                        // Release the consumed SQEs
+                        __release_consumed_submission( ring );
                         unlock(ring.submit_q.lock);
 …
+                }
+        }
+        static unsigned __collect_submitions( struct __io_data & ring ) {
+                /* paranoid */ verify( ring.submit_q.ready != 0p );
+                /* paranoid */ verify( ring.submit_q.ready_cnt > 0 );
+                unsigned to_submit = 0;
+                uint32_t tail = *ring.submit_q.tail;
+                const uint32_t mask = *ring.submit_q.mask;
+                // Go through the list of ready submissions
+                for( i; ring.submit_q.ready_cnt ) {
+                        // replace any submission with the sentinel, to consume it.
+                        uint32_t idx = __atomic_exchange_n( &ring.submit_q.ready[i], -1ul32, __ATOMIC_RELAXED);
+                        // If it was already the sentinel, then we are done
+                        if( idx == -1ul32 ) continue;
+                        // If we got a real submission, append it to the list
+                        ring.submit_q.array[ (tail + to_submit) & mask ] = idx & mask;
+                        to_submit++;
+                }
+                // Increment the tail based on how many we are ready to submit
+                __atomic_fetch_add(ring.submit_q.tail, to_submit, __ATOMIC_SEQ_CST);
+                return to_submit;
+        }
+        static uint32_t __release_consumed_submission( struct __io_data & ring ) {
+                const uint32_t smask = *ring.submit_q.mask;
+                if( !try_lock(ring.submit_q.release_lock __cfaabi_dbg_ctx2) ) return 0;
+                uint32_t chead = *ring.submit_q.head;
+                uint32_t phead = ring.submit_q.prev_head;
+                ring.submit_q.prev_head = chead;
+                unlock(ring.submit_q.release_lock);
+                uint32_t count = chead - phead;
+                for( i; count ) {
+                        uint32_t idx = ring.submit_q.array[ (phead + i) & smask ];
+                        ring.submit_q.sqes[ idx ].user_data = 0;
+                }
+                return count;
+        }
+//=============================================================================================
+// I/O Submissions
+//=============================================================================================
+        void register_fixed_files( cluster & cl, int * files, unsigned count ) {
+                int ret = syscall( __NR_io_uring_register, cl.io->fd, IORING_REGISTER_FILES, files, count );
+                if( ret < 0 ) {
+                        abort( "KERNEL ERROR: IO_URING SYSCALL - (%d) %s\n", (int)errno, strerror(errno) );
+                }
+                __cfadbg_print_safe( io_core, "Kernel I/O : Performed io_register for %p, returned %d\n", active_thread(), ret );
+        }
 #endif

libcfa/src/concurrency/iocall.cfa

-              rfc9bb79
+              rd34575b
         extern ssize_t read (int fd, void *buf, size_t count);
+        extern ssize_t splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags);
+        extern ssize_t tee(int fd_in, int fd_out, size_t len, unsigned int flags);
+}
 …
                 #endif
+        }
+        ssize_t cfa_preadv2_fixed(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags) {
+                #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_READV)
+                        return preadv2(fd, iov, iovcnt, offset, flags);
+                #else
+                        __submit_prelude
+                        (*sqe){ IORING_OP_READV, fd, iov, iovcnt, offset };
+                        sqe->flags |= IOSQE_FIXED_FILE;
+                        __submit_wait
+                #endif
+        }
 #endif
 …
+}
 ssize_t cfa_read(int fd, void *buf, size_t count) {
         #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_READ)
 …
                 (*sqe){ IORING_OP_WRITE, fd, buf, count, 0 };
+                __submit_wait
+        #endif
+}
+ssize_t cfa_splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags) {
+        #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_SPLICE)
+                return splice( fd_in, off_in, fd_out, off_out, len, flags );
+        #else
+                __submit_prelude
+                (*sqe){ IORING_OP_SPLICE, fd_out, 0p, len, off_out };
+                sqe->splice_fd_in  = fd_in;
+                sqe->splice_off_in = off_in;
+                sqe->splice_flags  = flags;
+                __submit_wait
+        #endif
+}
+ssize_t cfa_tee(int fd_in, int fd_out, size_t len, unsigned int flags) {
+        #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_TEE)
+                return tee( fd_in, fd_out, len, flags );
+        #else
+                __submit_prelude
+                (*sqe){ IORING_OP_TEE, fd_out, 0p, len, 0 };
+                sqe->splice_fd_in = fd_in;
+                sqe->splice_flags = flags;
                 __submit_wait
 …
                         #define _CFA_IO_FEATURE_IORING_OP_WRITE ,
                         return IS_DEFINED(IORING_OP_WRITE);
+                if( /*func == (fptr_t)splice || */
+                        func == (fptr_t)cfa_splice )
+                        #define _CFA_IO_FEATURE_IORING_OP_SPLICE ,
+                        return IS_DEFINED(IORING_OP_SPLICE);
+                if( /*func == (fptr_t)tee || */
+                        func == (fptr_t)cfa_tee )
+                        #define _CFA_IO_FEATURE_IORING_OP_TEE ,
+                        return IS_DEFINED(IORING_OP_TEE);
         #endif

libcfa/src/concurrency/kernel.hfa

-              rfc9bb79
+              rd34575b
 struct __io_data;
+#define CFA_CLUSTER_IO_POLLER_USER_THREAD    1 << 0 // 0x1
+#define CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS 1 << 1 // 0x2
+// #define CFA_CLUSTER_IO_POLLER_KERNEL_SIDE 1 << 2 // 0x4
+#define CFA_CLUSTER_IO_POLLER_USER_THREAD    (1 << 0) // 0x01
+#define CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS (1 << 1) // 0x02
+#define CFA_CLUSTER_IO_EAGER_SUBMITS         (1 << 2) // 0x04
+#define CFA_CLUSTER_IO_KERNEL_POLL_SUBMITS   (1 << 3) // 0x08
+#define CFA_CLUSTER_IO_KERNEL_POLL_COMPLETES (1 << 4) // 0x10
 #define CFA_CLUSTER_IO_BUFFLEN_OFFSET        16

libcfa/src/concurrency/kernel_private.hfa

-              rfc9bb79
+              rd34575b
 // Statics call at the end of each thread to register statistics
 #if !defined(__CFA_NO_STATISTICS__)
+static inline struct __stats_t * __tls_stats() {
+        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( kernelTLS.this_stats );
+        return kernelTLS.this_stats;
+}
+        static inline struct __stats_t * __tls_stats() {
+                /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+                /* paranoid */ verify( kernelTLS.this_stats );
+                return kernelTLS.this_stats;
+        }
+        #define __STATS__(in_kernel, ...) { \
+                if( !(in_kernel) ) disable_interrupts(); \
+                with( *__tls_stats() ) { \
+                        __VA_ARGS__ \
+                } \
+                if( !(in_kernel) ) enable_interrupts( __cfaabi_dbg_ctx ); \
+        }
+#else
+        #define __STATS__(in_kernel, ...)
 #endif

libcfa/src/concurrency/preemption.cfa

rfc9bb79	rd34575b
186	186	void enable_interrupts( __cfaabi_dbg_ctx_param ) {
187	187	processor * proc = kernelTLS.this_processor; // Cache the processor now since interrupts can start happening after the atomic store
	188	/* paranoid */ verify( proc );
188	189
189	190	with( kernelTLS.preemption_state ){

libcfa/src/concurrency/stats.cfa

-              rfc9bb79
+              rd34575b
                         stats->io.submit_q.submit_avg.rdy = 0;
                         stats->io.submit_q.submit_avg.csm = 0;
-                        stats->io.submit_q.submit_avg.avl = 0;
                         stats->io.submit_q.submit_avg.cnt = 0;
                         stats->io.submit_q.look_avg.val   = 0;
 …
                         stats->io.submit_q.alloc_avg.cnt   = 0;
                         stats->io.submit_q.alloc_avg.block = 0;
+                        stats->io.submit_q.helped = 0;
+                        stats->io.submit_q.leader = 0;
+                        stats->io.submit_q.busy   = 0;
                         stats->io.complete_q.completed_avg.val = 0;
                         stats->io.complete_q.completed_avg.slow_cnt = 0;
 …
                         __atomic_fetch_add( &cltr->io.submit_q.alloc_avg.cnt           , proc->io.submit_q.alloc_avg.cnt           , __ATOMIC_SEQ_CST );
                         __atomic_fetch_add( &cltr->io.submit_q.alloc_avg.block         , proc->io.submit_q.alloc_avg.block         , __ATOMIC_SEQ_CST );
+                        __atomic_fetch_add( &cltr->io.submit_q.helped                  , proc->io.submit_q.helped                  , __ATOMIC_SEQ_CST );
+                        __atomic_fetch_add( &cltr->io.submit_q.leader                  , proc->io.submit_q.leader                  , __ATOMIC_SEQ_CST );
+                        __atomic_fetch_add( &cltr->io.submit_q.busy                    , proc->io.submit_q.busy                    , __ATOMIC_SEQ_CST );
                         __atomic_fetch_add( &cltr->io.complete_q.completed_avg.val     , proc->io.complete_q.completed_avg.val     , __ATOMIC_SEQ_CST );
                         __atomic_fetch_add( &cltr->io.complete_q.completed_avg.slow_cnt, proc->io.complete_q.completed_avg.slow_cnt, __ATOMIC_SEQ_CST );
 …
                                 double avgrdy = ((double)io.submit_q.submit_avg.rdy) / io.submit_q.submit_avg.cnt;
                                 double avgcsm = ((double)io.submit_q.submit_avg.csm) / io.submit_q.submit_avg.cnt;
-                                double avgavl = ((double)io.submit_q.submit_avg.avl) / io.submit_q.submit_avg.cnt;
                                 double lavgv = 0;
 …
                                         "- avg ready entries      : %'18.2lf\n"
                                         "- avg submitted entries  : %'18.2lf\n"
+                                        "- avg available entries  : %'18.2lf\n"
+                                        "- total helped entries   : %'15" PRIu64 "\n"
+                                        "- total leader entries   : %'15" PRIu64 "\n"
+                                        "- total busy submit      : %'15" PRIu64 "\n"
                                         "- total ready search     : %'15" PRIu64 "\n"
                                         "- avg ready search len   : %'18.2lf\n"
 …
                                         , cluster ? "Cluster" : "Processor",  name, id
                                         , io.submit_q.submit_avg.cnt
+                                        , avgrdy, avgcsm, avgavl
+                                        , avgrdy, avgcsm
+                                        , io.submit_q.helped, io.submit_q.leader, io.submit_q.busy
                                         , io.submit_q.look_avg.cnt
                                         , lavgv, lavgb

libcfa/src/concurrency/stats.hfa

-              rfc9bb79
+              rd34575b
                                         volatile uint64_t block;
                                 } alloc_avg;
+                                volatile uint64_t helped;
+                                volatile uint64_t leader;
+                                volatile uint64_t busy;
                         } submit_q;
                         struct {

src/Makefile.in

rfc9bb79	rd34575b
626	626
627	627	BUILT_SOURCES = Parser/parser.hh
628		AM_YFLAGS = -d -t -v
	628	AM_YFLAGS = -d -t -v -Wno-yacc
629	629	SRC_RESOLVEXPR = \
630	630	ResolvExpr/AdjustExprType.cc \

src/Parser/module.mk

rfc9bb79	rd34575b
17	17	BUILT_SOURCES = Parser/parser.hh
18	18
19		AM_YFLAGS = -d -t -v
	19	AM_YFLAGS = -d -t -v -Wno-yacc
20	20
21	21	SRC += \

tests/pybin/test_run.py

-              rfc9bb79
+              rd34575b
                         else :                                          text = "FAILED with code %d" % retcode
                 text += "    C%s - R%s" % (cls.fmtDur(duration[0]), cls.fmtDur(duration[1]))
+                text += "    C%s - R%s" % (fmtDur(duration[0]), fmtDur(duration[1]))
                 return text
-        @staticmethod
-        def fmtDur( duration ):
-                if duration :
-                        hours, rem = divmod(duration, 3600)
-                        minutes, rem = divmod(rem, 60)
-                        seconds, millis = divmod(rem, 1)
-                        return "%2d:%02d.%03d" % (minutes, seconds, millis * 1000)
-                return " n/a"

tests/pybin/tools.py

-              rfc9bb79
+              rd34575b
                 while True:
                         yield i.next(max(expire - time.time(), 0))
+def fmtDur( duration ):
+        if duration :
+                hours, rem = divmod(duration, 3600)
+                minutes, rem = divmod(rem, 60)
+                seconds, millis = divmod(rem, 1)
+                return "%2d:%02d.%03d" % (minutes, seconds, millis * 1000)
+        return " n/a"

tests/test.py

-              rfc9bb79
+              rd34575b
                 # for each build configurations, run the test
+                for arch, debug, install in itertools.product(settings.all_arch, settings.all_debug, settings.all_install):
+                        settings.arch    = arch
+                        settings.debug   = debug
+                        settings.install = install
+                        # filter out the tests for a different architecture
+                        # tests are the same across debug/install
+                        local_tests = settings.arch.filter( tests )
+                        options.jobs, forceJobs = job_count( options, local_tests )
+                        settings.update_make_cmd(forceJobs, options.jobs)
+                        # check the build configuration works
+                        settings.validate()
+                        # print configuration
+                        print('%s %i tests on %i cores (%s:%s)' % (
+                                'Regenerating' if settings.generating else 'Running',
+                                len(local_tests),
+                                options.jobs,
+                                settings.arch.string,
+                                settings.debug.string
+                        ))
+                        # otherwise run all tests and make sure to return the correct error code
+                        failed = run_tests(local_tests, options.jobs)
+                        if failed:
+                                result = 1
+                                if not settings.continue_:
+                                        break
+                with Timed() as total_dur:
+                        for arch, debug, install in itertools.product(settings.all_arch, settings.all_debug, settings.all_install):
+                                settings.arch    = arch
+                                settings.debug   = debug
+                                settings.install = install
+                                # filter out the tests for a different architecture
+                                # tests are the same across debug/install
+                                local_tests = settings.arch.filter( tests )
+                                options.jobs, forceJobs = job_count( options, local_tests )
+                                settings.update_make_cmd(forceJobs, options.jobs)
+                                # check the build configuration works
+                                settings.validate()
+                                # print configuration
+                                print('%s %i tests on %i cores (%s:%s)' % (
+                                        'Regenerating' if settings.generating else 'Running',
+                                        len(local_tests),
+                                        options.jobs,
+                                        settings.arch.string,
+                                        settings.debug.string
+                                ))
+                                # otherwise run all tests and make sure to return the correct error code
+                                failed = run_tests(local_tests, options.jobs)
+                                if failed:
+                                        result = 1
+                                        if not settings.continue_:
+                                                break
+                print('Tests took %s' % fmtDur( total_dur.duration ))
                 sys.exit( failed )

Context Navigation

Changes in / [fc9bb79:d34575b]

Legend:

benchmark/io/batch-readv.c

benchmark/io/readv.cfa

libcfa/src/Makefile.am

libcfa/src/Makefile.in

libcfa/src/concurrency/io.cfa

libcfa/src/concurrency/iocall.cfa

libcfa/src/concurrency/kernel.hfa

libcfa/src/concurrency/kernel_private.hfa

libcfa/src/concurrency/preemption.cfa

libcfa/src/concurrency/stats.cfa

libcfa/src/concurrency/stats.hfa

src/Makefile.in

src/Parser/module.mk

tests/pybin/test_run.py

tests/pybin/tools.py

tests/test.py

Download in other formats: