Context Navigation

Reverse Diff

io.cfa [08a994e:05cfa4d]

File:

: 1 edited

libcfa/src/concurrency/io.cfa (modified) (20 diffs)

Legend:

: Unmodified
: Added
: Removed

libcfa/src/concurrency/io.cfa

-                      r08a994e
+                      r05cfa4d
 #if !defined(HAVE_LINUX_IO_URING_H)
         void __kernel_io_startup( cluster &, bool ) {
+        void __kernel_io_startup( cluster &, int, bool ) {
                 // Nothing to do without io_uring
+        }
 …
         #endif
+        #if defined(__CFA_IO_POLLING_USER__)
+                void ?{}( __io_poller_fast & this, struct cluster & cltr ) {
+                        this.ring = &cltr.io;
+                        (this.thrd){ "Fast I/O Poller", cltr };
+                }
+                void ^?{}( __io_poller_fast & mutex this );
+        void main( __io_poller_fast & this );
+        static inline $thread * get_thread( __io_poller_fast & this ) { return &this.thrd; }
+                void ^?{}( __io_poller_fast & mutex this ) {}
+        #endif
+        // Fast poller user-thread
+        // Not using the "thread" keyword because we want to control
+        // more carefully when to start/stop it
+        struct __io_poller_fast {
+                struct __io_data * ring;
+                bool waiting;
+                $thread thrd;
+        };
+        void ?{}( __io_poller_fast & this, struct cluster & cltr ) {
+                this.ring = cltr.io;
+                this.waiting = true;
+                (this.thrd){ "Fast I/O Poller", cltr };
+        }
+        void ^?{}( __io_poller_fast & mutex this );
+        void main( __io_poller_fast & this );
+        static inline $thread * get_thread( __io_poller_fast & this ) { return &this.thrd; }
+        void ^?{}( __io_poller_fast & mutex this ) {}
+        struct __submition_data {
+                // Head and tail of the ring (associated with array)
+                volatile uint32_t * head;
+                volatile uint32_t * tail;
+                // The actual kernel ring which uses head/tail
+                // indexes into the sqes arrays
+                uint32_t * array;
+                // number of entries and mask to go with it
+                const uint32_t * num;
+                const uint32_t * mask;
+                // Submission flags (Not sure what for)
+                uint32_t * flags;
+                // number of sqes not submitted (whatever that means)
+                uint32_t * dropped;
+                // Like head/tail but not seen by the kernel
+                volatile uint32_t alloc;
+                volatile uint32_t ready;
+                __spinlock_t lock;
+                // A buffer of sqes (not the actual ring)
+                struct io_uring_sqe * sqes;
+                // The location and size of the mmaped area
+                void * ring_ptr;
+                size_t ring_sz;
+                // Statistics
+                #if !defined(__CFA_NO_STATISTICS__)
+                        struct {
+                                struct {
+                                        volatile unsigned long long int val;
+                                        volatile unsigned long long int cnt;
+                                        volatile unsigned long long int block;
+                                } submit_avg;
+                        } stats;
+                #endif
+        };
+        struct __completion_data {
+                // Head and tail of the ring
+                volatile uint32_t * head;
+                volatile uint32_t * tail;
+                // number of entries and mask to go with it
+                const uint32_t * mask;
+                const uint32_t * num;
+                // number of cqes not submitted (whatever that means)
+                uint32_t * overflow;
+                // the kernel ring
+                struct io_uring_cqe * cqes;
+                // The location and size of the mmaped area
+                void * ring_ptr;
+                size_t ring_sz;
+                // Statistics
+                #if !defined(__CFA_NO_STATISTICS__)
+                        struct {
+                                struct {
+                                        unsigned long long int val;
+                                        unsigned long long int slow_cnt;
+                                        unsigned long long int fast_cnt;
+                                } completed_avg;
+                        } stats;
+                #endif
+        };
+        struct __io_data {
+                struct __submition_data submit_q;
+                struct __completion_data completion_q;
+                uint32_t ring_flags;
+                int cltr_flags;
+                int fd;
+                semaphore submit;
+                volatile bool done;
+                struct {
+                        struct {
+                                void * stack;
+                                pthread_t kthrd;
+                        } slow;
+                        __io_poller_fast fast;
+                        __bin_sem_t sem;
+                } poller;
+        };
 //=============================================================================================
 // I/O Startup / Shutdown logic
 //=============================================================================================
+        void __kernel_io_startup( cluster & this, bool main_cluster ) {
+        void __kernel_io_startup( cluster & this, int io_flags, bool main_cluster ) {
+                this.io = malloc();
                 // Step 1 : call to setup
                 struct io_uring_params params;
 …
                 // Step 2 : mmap result
                 memset(&this.io, 0, sizeof(struct io_ring));
                 struct io_uring_sq & sq = this.io.submit_q;
                 struct io_uring_cq & cq = this.io.completion_q;
+                memset( this.io, 0, sizeof(struct __io_data) );
+                struct __submition_data  & sq = this.io->submit_q;
+                struct __completion_data & cq = this.io->completion_q;
                 // calculate the right ring size
 …
                 // Update the global ring info
+                this.io.flags = params.flags;
+                this.io.fd    = fd;
+                this.io.done  = false;
+                (this.io.submit){ min(*sq.num, *cq.num) };
+                this.io->ring_flags = params.flags;
+                this.io->cltr_flags = io_flags;
+                this.io->fd         = fd;
+                this.io->done       = false;
+                (this.io->submit){ min(*sq.num, *cq.num) };
                 // Initialize statistics
                 #if !defined(__CFA_NO_STATISTICS__)
+                        this.io.submit_q.stats.submit_avg.val = 0;
+                        this.io.submit_q.stats.submit_avg.cnt = 0;
+                        this.io.completion_q.stats.completed_avg.val = 0;
+                        this.io.completion_q.stats.completed_avg.cnt = 0;
+                        this.io->submit_q.stats.submit_avg.val   = 0;
+                        this.io->submit_q.stats.submit_avg.cnt   = 0;
+                        this.io->submit_q.stats.submit_avg.block = 0;
+                        this.io->completion_q.stats.completed_avg.val = 0;
+                        this.io->completion_q.stats.completed_avg.slow_cnt = 0;
+                        this.io->completion_q.stats.completed_avg.fast_cnt = 0;
                 #endif
 …
         void __kernel_io_finish_start( cluster & this ) {
                 #if defined(__CFA_IO_POLLING_USER__)
+                if( this.io->cltr_flags & CFA_CLUSTER_IO_POLLER_USER_THREAD ) {
                         __cfadbg_print_safe(io_core, "Kernel I/O : Creating fast poller for cluter %p\n", &this);
                         (this.io.poller.fast){ this };
                         __thrd_start( this.io.poller.fast, main );
                 #endif
+                        (this.io->poller.fast){ this };
+                        __thrd_start( this.io->poller.fast, main );
+                }
                 // Create the poller thread
                 __cfadbg_print_safe(io_core, "Kernel I/O : Creating slow poller for cluter %p\n", &this);
                 this.io.poller.slow.stack = __create_pthread( &this.io.poller.slow.kthrd, __io_poller_slow, &this );
+                this.io->poller.slow.stack = __create_pthread( &this.io->poller.slow.kthrd, __io_poller_slow, &this );
+        }
 …
                 __cfadbg_print_safe(io_core, "Kernel I/O : Stopping pollers for cluster\n", &this);
                 // Notify the poller thread of the shutdown
                 __atomic_store_n(&this.io.done, true, __ATOMIC_SEQ_CST);
+                __atomic_store_n(&this.io->done, true, __ATOMIC_SEQ_CST);
                 // Stop the IO Poller
                 sigval val = { 1 };
+                pthread_sigqueue( this.io.poller.slow.kthrd, SIGUSR1, val );
+                #if defined(__CFA_IO_POLLING_USER__)
+                        post( this.io.poller.sem );
+                #endif
+                pthread_sigqueue( this.io->poller.slow.kthrd, SIGUSR1, val );
+                post( this.io->poller.sem );
                 // Wait for the poller thread to finish
                 pthread_join( this.io.poller.slow.kthrd, 0p );
                 free( this.io.poller.slow.stack );
+                pthread_join( this.io->poller.slow.kthrd, 0p );
+                free( this.io->poller.slow.stack );
                 __cfadbg_print_safe(io_core, "Kernel I/O : Slow poller stopped for cluster\n", &this);
+                #if defined(__CFA_IO_POLLING_USER__)
+                if( this.io->cltr_flags & CFA_CLUSTER_IO_POLLER_USER_THREAD ) {
+                        with( this.io->poller.fast ) {
+                                /* paranoid */ verify( waiting ); // The thread shouldn't be in a system call
+                                /* paranoid */ verify( this.procs.head == 0p || &this == mainCluster );
+                                /* paranoid */ verify( this.idles.head == 0p || &this == mainCluster );
+                                // We need to adjust the clean-up based on where the thread is
+                                if( thrd.preempted != __NO_PREEMPTION ) {
+                                        // This is the tricky case
+                                        // The thread was preempted and now it is on the ready queue
+                                        /* paranoid */ verify( thrd.state == Active );           // The thread better be in this state
+                                        /* paranoid */ verify( thrd.next == 1p );                // The thread should be the last on the list
+                                        /* paranoid */ verify( this.ready_queue.head == &thrd ); // The thread should be the only thing on the list
+                                        // Remove the thread from the ready queue of this cluster
+                                        this.ready_queue.head = 1p;
+                                        thrd.next = 0p;
+                                        // Fixup the thread state
+                                        thrd.state = Blocked;
+                                        thrd.preempted = __NO_PREEMPTION;
+                                        // Pretend like the thread was blocked all along
+                                }
+                                // !!! This is not an else if !!!
+                                if( thrd.state == Blocked ) {
+                                        // This is the "easy case"
+                                        // The thread is parked and can easily be moved to active cluster
+                                        verify( thrd.curr_cluster != active_cluster() || thrd.curr_cluster == mainCluster );
+                                        thrd.curr_cluster = active_cluster();
                         // unpark the fast io_poller
+                        unpark( &this.io.poller.fast.thrd __cfaabi_dbg_ctx2 );
+                        ^(this.io.poller.fast){};
+                                        unpark( &thrd __cfaabi_dbg_ctx2 );
+                                }
+                                else {
+                                        // The thread is in a weird state
+                                        // I don't know what to do here
+                                        abort("Fast poller thread is in unexpected state, cannot clean-up correctly\n");
+                                }
+                        }
+                        ^(this.io->poller.fast){};
                         __cfadbg_print_safe(io_core, "Kernel I/O : Fast poller stopped for cluster\n", &this);
                 #endif
+                }
+        }
 …
                 #if !defined(__CFA_NO_STATISTICS__)
                         if(this.print_stats) {
+                                __cfaabi_bits_print_safe( STDERR_FILENO,
+                                        "----- I/O uRing Stats -----\n"
+                                        "- total submit calls  : %llu\n"
+                                        "- avg submit          : %lf\n"
+                                        "- total wait calls    : %llu\n"
+                                        "- avg completion/wait : %lf\n",
+                                        this.io.submit_q.stats.submit_avg.cnt,
+                                        ((double)this.io.submit_q.stats.submit_avg.val) / this.io.submit_q.stats.submit_avg.cnt,
+                                        this.io.completion_q.stats.completed_avg.cnt,
+                                        ((double)this.io.completion_q.stats.completed_avg.val) / this.io.completion_q.stats.completed_avg.cnt
+                                );
+                                with(this.io->submit_q.stats, this.io->completion_q.stats) {
+                                        __cfaabi_bits_print_safe( STDERR_FILENO,
+                                                "----- I/O uRing Stats -----\n"
+                                                "- total submit calls  : %'llu\n"
+                                                "- avg submit          : %'.2lf\n"
+                                                "- pre-submit block %%  : %'.2lf\n"
+                                                "- total wait calls    : %'llu (%'llu slow, %'llu fast)\n"
+                                                "- avg completion/wait : %'.2lf\n",
+                                                submit_avg.cnt,
+                                                ((double)submit_avg.val) / submit_avg.cnt,
+                                                (100.0 * submit_avg.block) / submit_avg.cnt,
+                                                completed_avg.slow_cnt + completed_avg.fast_cnt,
+                                                completed_avg.slow_cnt,  completed_avg.fast_cnt,
+                                                ((double)completed_avg.val) / (completed_avg.slow_cnt + completed_avg.fast_cnt)
+                                        );
+                                }
+                        }
                 #endif
                 // Shutdown the io rings
                 struct io_uring_sq & sq = this.io.submit_q;
                 struct io_uring_cq & cq = this.io.completion_q;
+                struct __submition_data  & sq = this.io->submit_q;
+                struct __completion_data & cq = this.io->completion_q;
                 // unmap the submit queue entries
 …
                 // close the file descriptor
+                close(this.io.fd);
+                close(this.io->fd);
+                free( this.io );
+        }
 …
         // Process a single completion message from the io_uring
         // This is NOT thread-safe
         static int __drain_io( struct io_ring & ring, sigset_t * mask, int waitcnt, bool in_kernel ) {
+        static int __drain_io( struct __io_data & ring, sigset_t * mask, int waitcnt, bool in_kernel ) {
                 int ret = syscall( __NR_io_uring_enter, ring.fd, 0, waitcnt, IORING_ENTER_GETEVENTS, mask, _NSIG / 8);
                 if( ret < 0 ) {
 …
                 // Nothing was new return 0
                 if (head == tail) {
-                        #if !defined(__CFA_NO_STATISTICS__)
-                                ring.completion_q.stats.completed_avg.cnt += 1;
-                        #endif
                         return 0;
+                }
 …
                 __atomic_fetch_add( ring.completion_q.head, count, __ATOMIC_RELAXED );
-                // Update statistics
-                #if !defined(__CFA_NO_STATISTICS__)
-                        ring.completion_q.stats.completed_avg.val += count;
-                        ring.completion_q.stats.completed_avg.cnt += 1;
-                #endif
                 return count;
+        }
 …
         static void * __io_poller_slow( void * arg ) {
                 cluster * cltr = (cluster *)arg;
                 struct io_ring & ring = cltr->io;
+                struct __io_data & ring = *cltr->io;
                 sigset_t mask;
 …
                 verify( (*ring.completion_q.head) == (*ring.completion_q.tail) );
+                while(!__atomic_load_n(&ring.done, __ATOMIC_SEQ_CST)) {
+                        #if defined(__CFA_IO_POLLING_USER__)
+                __cfadbg_print_safe(io_core, "Kernel I/O : Slow poller for ring %p ready\n", &ring);
+                if( ring.cltr_flags & CFA_CLUSTER_IO_POLLER_USER_THREAD ) {
+                        while(!__atomic_load_n(&ring.done, __ATOMIC_SEQ_CST)) {
                                 // In the user-thread approach drain and if anything was drained,
                                 // batton pass to the user-thread
                                 int count = __drain_io( ring, &mask, 1, true );
+                                // Update statistics
+                                #if !defined(__CFA_NO_STATISTICS__)
+                                        ring.completion_q.stats.completed_avg.val += count;
+                                        ring.completion_q.stats.completed_avg.slow_cnt += 1;
+                                #endif
                                 if(count > 0) {
                                         __cfadbg_print_safe(io_core, "Kernel I/O : Moving to ring %p to fast poller\n", &ring);
 …
                                         wait( ring.poller.sem );
+                                }
+                        #else
+                        }
+                }
+                else {
+                        while(!__atomic_load_n(&ring.done, __ATOMIC_SEQ_CST)) {
                                 //In the naive approach, just poll the io completion queue directly
+                                __drain_io( ring, &mask, 1, true );
+                                int count = __drain_io( ring, &mask, 1, true );
+                                // Update statistics
+                                #if !defined(__CFA_NO_STATISTICS__)
+                                        ring.completion_q.stats.completed_avg.val += count;
+                                        ring.completion_q.stats.completed_avg.slow_cnt += 1;
+                                #endif
+                        }
+                }
+                __cfadbg_print_safe(io_core, "Kernel I/O : Slow poller for ring %p stopping\n", &ring);
+                return 0p;
+        }
+        void main( __io_poller_fast & this ) {
+                verify( this.ring->cltr_flags & CFA_CLUSTER_IO_POLLER_USER_THREAD );
+                // Start parked
+                park( __cfaabi_dbg_ctx );
+                __cfadbg_print_safe(io_core, "Kernel I/O : Fast poller for ring %p ready\n", &this.ring);
+                int reset = 0;
+                // Then loop until we need to start
+                while(!__atomic_load_n(&this.ring->done, __ATOMIC_SEQ_CST)) {
+                        // Drain the io
+                        this.waiting = false;
+                        int count = __drain_io( *this.ring, 0p, 0, false );
+                        reset += count > 0 ? 1 : 0;
+                        // Update statistics
+                        #if !defined(__CFA_NO_STATISTICS__)
+                                this.ring->completion_q.stats.completed_avg.val += count;
+                                this.ring->completion_q.stats.completed_avg.fast_cnt += 1;
                         #endif
+                }
+                return 0p;
+        }
+        #if defined(__CFA_IO_POLLING_USER__)
+                void main( __io_poller_fast & this ) {
+                        // Start parked
+                        park( __cfaabi_dbg_ctx );
+                        // Then loop until we need to start
+                        while(!__atomic_load_n(&this.ring->done, __ATOMIC_SEQ_CST)) {
+                                // Drain the io
+                                if(0 > __drain_io( *this.ring, 0p, 0, false )) {
+                                        // If we got something, just yield and check again
+                                        yield();
+                                }
+                                else {
+                                        // We didn't get anything baton pass to the slow poller
+                                        __cfadbg_print_safe(io_core, "Kernel I/O : Moving to ring %p to slow poller\n", &this.ring);
+                                        post( this.ring->poller.sem );
+                                        park( __cfaabi_dbg_ctx );
+                                }
+                        this.waiting = true;
+                        if(reset < 5) {
+                                // If we got something, just yield and check again
+                                yield();
+                        }
+                }
+        #endif
+                        else {
+                                // We didn't get anything baton pass to the slow poller
+                                __cfadbg_print_safe(io_core, "Kernel I/O : Moving to ring %p to slow poller\n", &this.ring);
+                                post( this.ring->poller.sem );
+                                park( __cfaabi_dbg_ctx );
+                                reset = 0;
+                        }
+                }
+                __cfadbg_print_safe(io_core, "Kernel I/O : Fast poller for ring %p stopping\n", &this.ring);
+        }
 //=============================================================================================
 …
 //
         static inline [* struct io_uring_sqe, uint32_t] __submit_alloc( struct io_ring & ring ) {
+        static inline [* struct io_uring_sqe, uint32_t] __submit_alloc( struct __io_data & ring ) {
                 // Wait for a spot to be available
+                P(ring.submit);
+                __attribute__((unused)) bool blocked = P(ring.submit);
+                #if !defined(__CFA_NO_STATISTICS__)
+                        __atomic_fetch_add( &ring.submit_q.stats.submit_avg.block, blocked ? 1ul64 : 0ul64, __ATOMIC_RELAXED );
+                #endif
                 // Allocate the sqe
 …
+        }
         static inline void __submit( struct io_ring & ring, uint32_t idx ) {
+        static inline void __submit( struct __io_data & ring, uint32_t idx ) {
                 // get mutual exclusion
                 lock(ring.submit_q.lock __cfaabi_dbg_ctx2);
 …
         #define __submit_prelude \
                 struct io_ring & ring = active_cluster()->io; \
+                struct __io_data & ring = *active_cluster()->io; \
                 struct io_uring_sqe * sqe; \
                 uint32_t idx; \
 …
         #include <sys/socket.h>
         #include <sys/syscall.h>
-#if defined(HAVE_PREADV2)
         struct iovec;
         extern ssize_t preadv2 (int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags);
-#endif
-#if defined(HAVE_PWRITEV2)
-        struct iovec;
         extern ssize_t pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags);
-#endif
         extern int fsync(int fd);
 …
 //-----------------------------------------------------------------------------
 // Asynchronous operations
+#if defined(HAVE_PREADV2)
+        ssize_t cfa_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags) {
+                #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_READV)
+                        return preadv2(fd, iov, iovcnt, offset, flags);
+                #else
+                        __submit_prelude
+                        (*sqe){ IORING_OP_READV, fd, iov, iovcnt, offset };
+                        __submit_wait
+                #endif
+        }
+#endif
+#if defined(HAVE_PWRITEV2)
+        ssize_t cfa_pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags) {
+                #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_WRITEV)
+                        return pwritev2(fd, iov, iovcnt, offset, flags);
+                #else
+                        __submit_prelude
+                        (*sqe){ IORING_OP_WRITEV, fd, iov, iovcnt, offset };
+                        __submit_wait
+                #endif
+        }
+#endif
+ssize_t cfa_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags) {
+        #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_READV)
+                return preadv2(fd, iov, iovcnt, offset, flags);
+        #else
+                __submit_prelude
+                (*sqe){ IORING_OP_READV, fd, iov, iovcnt, offset };
+                __submit_wait
+        #endif
+}
+ssize_t cfa_pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags) {
+        #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_WRITEV)
+                return pwritev2(fd, iov, iovcnt, offset, flags);
+        #else
+                __submit_prelude
+                (*sqe){ IORING_OP_WRITEV, fd, iov, iovcnt, offset };
+                __submit_wait
+        #endif
+}
 int cfa_fsync(int fd) {
 …
                         return IS_DEFINED(IORING_OP_READV);
+                #if defined(HAVE_PREADV2)
+                        if( /*func == (fptr_t)pwritev2 || */
+                                func == (fptr_t)cfa_pwritev2 )
+                                #define _CFA_IO_FEATURE_IORING_OP_WRITEV ,
+                                return IS_DEFINED(IORING_OP_WRITEV);
+                #endif
+                #if defined(HAVE_PWRITEV2)
+                        if( /*func == (fptr_t)fsync || */
+                                func == (fptr_t)cfa_fsync )
+                                #define _CFA_IO_FEATURE_IORING_OP_FSYNC ,
+                                return IS_DEFINED(IORING_OP_FSYNC);
+                #endif
+                if( /*func == (fptr_t)pwritev2 || */
+                        func == (fptr_t)cfa_pwritev2 )
+                        #define _CFA_IO_FEATURE_IORING_OP_WRITEV ,
+                        return IS_DEFINED(IORING_OP_WRITEV);
+                if( /*func == (fptr_t)fsync || */
+                        func == (fptr_t)cfa_fsync )
+                        #define _CFA_IO_FEATURE_IORING_OP_FSYNC ,
+                        return IS_DEFINED(IORING_OP_FSYNC);
                 if( /*func == (fptr_t)ync_file_range || */

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changes in libcfa/src/concurrency/io.cfa [08a994e:05cfa4d]

Legend:

libcfa/src/concurrency/io.cfa

Download in other formats: