Context Navigation

Reverse Diff

io.cfa [dd4e2d7:cb870e0]

File:

: 1 edited

libcfa/src/concurrency/io.cfa (modified) (21 diffs)

Legend:

: Unmodified
: Added
: Removed

libcfa/src/concurrency/io.cfa

-              rdd4e2d7
+              rcb870e0
 #if !defined(HAVE_LINUX_IO_URING_H)
         void __kernel_io_startup( cluster &, unsigned, bool ) {
+        void __kernel_io_startup( cluster &, int, bool ) {
                 // Nothing to do without io_uring
+        }
 …
         struct __io_poller_fast {
                 struct __io_data * ring;
+                bool waiting;
                 $thread thrd;
         };
 …
         void ?{}( __io_poller_fast & this, struct cluster & cltr ) {
                 this.ring = cltr.io;
+                this.waiting = true;
                 (this.thrd){ "Fast I/O Poller", cltr };
+        }
 …
                 // Like head/tail but not seen by the kernel
                 volatile uint32_t alloc;
+                volatile uint32_t * ready;
+                uint32_t ready_cnt;
+                volatile uint32_t ready;
                 __spinlock_t lock;
 …
                                         volatile unsigned long long int block;
                                 } submit_avg;
-                                struct {
-                                        volatile unsigned long long int val;
-                                        volatile unsigned long long int cnt;
-                                        volatile unsigned long long int block;
-                                } look_avg;
                         } stats;
                 #endif
 …
 // I/O Startup / Shutdown logic
 //=============================================================================================
         void __kernel_io_startup( cluster & this, unsigned io_flags, bool main_cluster ) {
+        void __kernel_io_startup( cluster & this, int io_flags, bool main_cluster ) {
                 this.io = malloc();
 …
                 sq.array   = (         uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.array);
                 sq.alloc = *sq.tail;
+                if( io_flags & CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS ) {
+                        sq.ready_cnt = max(io_flags >> CFA_CLUSTER_IO_BUFFLEN_OFFSET, 8);
+                        sq.ready = alloc_align( 64, sq.ready_cnt );
+                        for(i; sq.ready_cnt) {
+                                sq.ready[i] = -1ul32;
+                        }
+                }
+                else {
+                        sq.ready_cnt = 0;
+                        sq.ready = 0p;
+                }
+                sq.ready = *sq.tail;
                 // completion queue
 …
                         this.io->submit_q.stats.submit_avg.cnt   = 0;
                         this.io->submit_q.stats.submit_avg.block = 0;
-                        this.io->submit_q.stats.look_avg.val   = 0;
-                        this.io->submit_q.stats.look_avg.cnt   = 0;
-                        this.io->submit_q.stats.look_avg.block = 0;
                         this.io->completion_q.stats.completed_avg.val = 0;
                         this.io->completion_q.stats.completed_avg.slow_cnt = 0;
 …
                 if( this.io->cltr_flags & CFA_CLUSTER_IO_POLLER_USER_THREAD ) {
                         with( this.io->poller.fast ) {
+                                /* paranoid */ verify( waiting ); // The thread shouldn't be in a system call
                                 /* paranoid */ verify( this.procs.head == 0p || &this == mainCluster );
                                 /* paranoid */ verify( this.idles.head == 0p || &this == mainCluster );
                                 // We need to adjust the clean-up based on where the thread is
                                 if( thrd.state == Ready || thrd.preempted != __NO_PREEMPTION ) {
+                                if( thrd.preempted != __NO_PREEMPTION ) {
                                         // This is the tricky case
                                         // The thread was preempted and now it is on the ready queue
+                                        /* paranoid */ verify( thrd.state == Active );           // The thread better be in this state
                                         /* paranoid */ verify( thrd.next == 1p );                // The thread should be the last on the list
                                         /* paranoid */ verify( this.ready_queue.head == &thrd ); // The thread should be the only thing on the list
 …
                         if(this.print_stats) {
                                 with(this.io->submit_q.stats, this.io->completion_q.stats) {
-                                        double lavgv = 0;
-                                        double lavgb = 0;
-                                        if(look_avg.cnt != 0) {
-                                                lavgv = ((double)look_avg.val  ) / look_avg.cnt;
-                                                lavgb = ((double)look_avg.block) / look_avg.cnt;
+                                        }
                                         __cfaabi_bits_print_safe( STDERR_FILENO,
                                                 "----- I/O uRing Stats -----\n"
+                                                "- total submit calls     : %'15llu\n"
+                                                "- avg submit             : %'18.2lf\n"
+                                                "- pre-submit block %%     : %'18.2lf\n"
+                                                "- total ready search     : %'15llu\n"
+                                                "- avg ready search len   : %'18.2lf\n"
+                                                "- avg ready search block : %'18.2lf\n"
+                                                "- total wait calls       : %'15llu   (%'llu slow, %'llu fast)\n"
+                                                "- avg completion/wait    : %'18.2lf\n",
+                                                "- total submit calls  : %'15llu\n"
+                                                "- avg submit          : %'18.2lf\n"
+                                                "- pre-submit block %%  : %'18.2lf\n"
+                                                "- total wait calls    : %'15llu   (%'llu slow, %'llu fast)\n"
+                                                "- avg completion/wait : %'18.2lf\n",
                                                 submit_avg.cnt,
                                                 ((double)submit_avg.val) / submit_avg.cnt,
                                                 (100.0 * submit_avg.block) / submit_avg.cnt,
-                                                look_avg.cnt,
-                                                lavgv,
-                                                lavgb,
                                                 completed_avg.slow_cnt + completed_avg.fast_cnt,
                                                 completed_avg.slow_cnt,  completed_avg.fast_cnt,
 …
                 close(this.io->fd);
-                free( this.io->submit_q.ready ); // Maybe null, doesn't matter
                 free( this.io );
+        }
 …
         // Process a single completion message from the io_uring
         // This is NOT thread-safe
+        static [int, bool] __drain_io( & struct __io_data ring, * sigset_t mask, int waitcnt, bool in_kernel ) {
+                unsigned to_submit = 0;
+                if( ring.cltr_flags & CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS ) {
+                        // If the poller thread also submits, then we need to aggregate the submissions which are ready
+                        uint32_t * tail = ring.submit_q.tail;
+                        const uint32_t mask = *ring.submit_q.mask;
+                        // Go through the list of ready submissions
+                        for( i; ring.submit_q.ready_cnt ) {
+                                // replace any submission with the sentinel, to consume it.
+                                uint32_t idx = __atomic_exchange_n( &ring.submit_q.ready[i], -1ul32, __ATOMIC_RELAXED);
+                                // If it was already the sentinel, then we are done
+                                if( idx == -1ul32 ) continue;
+                                // If we got a real submission, append it to the list
+                                ring.submit_q.array[ ((*tail) + to_submit) & mask ] = idx & mask;
+                                to_submit++;
+                        }
+                        // Increment the tail based on how many we are ready to submit
+                        __atomic_fetch_add(tail, to_submit, __ATOMIC_SEQ_CST);
+                        // update statistics
+                        #if !defined(__CFA_NO_STATISTICS__)
+                                ring.submit_q.stats.submit_avg.val += to_submit;
+                                ring.submit_q.stats.submit_avg.cnt += 1;
+                        #endif
+                }
+                int ret = syscall( __NR_io_uring_enter, ring.fd, to_submit, waitcnt, IORING_ENTER_GETEVENTS, mask, _NSIG / 8);
+        static int __drain_io( struct __io_data & ring, sigset_t * mask, int waitcnt, bool in_kernel ) {
+                int ret = syscall( __NR_io_uring_enter, ring.fd, 0, waitcnt, IORING_ENTER_GETEVENTS, mask, _NSIG / 8);
                 if( ret < 0 ) {
                         switch((int)errno) {
 …
                 __atomic_fetch_add( ring.completion_q.head, count, __ATOMIC_RELAXED );
                 return [count, count > 0 || to_submit > 0];
+                return count;
+        }
 …
                 if( ring.cltr_flags & CFA_CLUSTER_IO_POLLER_USER_THREAD ) {
                         while(!__atomic_load_n(&ring.done, __ATOMIC_SEQ_CST)) {
                                 // In the user-thread approach drain and if anything was drained,
                                 // batton pass to the user-thread
+                                int count;
+                                bool again;
+                                [count, again] = __drain_io( ring, &mask, 0, true );
+                                int count = __drain_io( ring, &mask, 1, true );
                                 // Update statistics
 …
                                 #endif
                                 if(again) {
+                                if(count > 0) {
                                         __cfadbg_print_safe(io_core, "Kernel I/O : Moving to ring %p to fast poller\n", &ring);
                                         __unpark( &ring.poller.fast.thrd __cfaabi_dbg_ctx2 );
 …
                         while(!__atomic_load_n(&ring.done, __ATOMIC_SEQ_CST)) {
                                 //In the naive approach, just poll the io completion queue directly
+                                int count;
+                                bool again;
+                                [count, again] = __drain_io( ring, &mask, 1, true );
+                                int count = __drain_io( ring, &mask, 1, true );
                                 // Update statistics
 …
                 // Then loop until we need to start
                 while(!__atomic_load_n(&this.ring->done, __ATOMIC_SEQ_CST)) {
                         // Drain the io
+                        int count;
+                        bool again;
+                        [count, again] = __drain_io( *this.ring, 0p, 0, false );
+                        if(!again) reset++;
+                        this.waiting = false;
+                        int count = __drain_io( *this.ring, 0p, 0, false );
+                        reset += count > 0 ? 1 : 0;
                         // Update statistics
 …
                         #endif
                         // If we got something, just yield and check again
+                        this.waiting = true;
                         if(reset < 5) {
+                                // If we got something, just yield and check again
                                 yield();
+                        }
-                        // We didn't get anything baton pass to the slow poller
                         else {
+                                // We didn't get anything baton pass to the slow poller
                                 __cfadbg_print_safe(io_core, "Kernel I/O : Moving to ring %p to slow poller\n", &this.ring);
+                                post( this.ring->poller.sem );
+                                park( __cfaabi_dbg_ctx );
                                 reset = 0;
-                                // wake up the slow poller
-                                post( this.ring->poller.sem );
-                                // park this thread
-                                park( __cfaabi_dbg_ctx );
+                        }
+                }
                 __cfadbg_print_safe(io_core, "Kernel I/O : Fast poller for ring %p stopping\n", &this.ring);
+        }
-        static inline void __wake_poller( struct __io_data & ring ) __attribute__((artificial));
-        static inline void __wake_poller( struct __io_data & ring ) {
-                // sigval val = { 1 };
-                // pthread_sigqueue( ring.poller.slow.kthrd, SIGUSR1, val );
+        }
 …
                 uint32_t idx = __atomic_fetch_add(&ring.submit_q.alloc, 1ul32, __ATOMIC_SEQ_CST);
+                // Mask the idx now to allow make everything easier to check
+                idx &= *ring.submit_q.mask;
+                // Validate that we didn't overflow anything
+                // Check that nothing overflowed
+                /* paranoid */ verify( true );
+                // Check that it goes head -> tail -> alloc and never head -> alloc -> tail
+                /* paranoid */ verify( true );
                 // Return the sqe
                 return [&ring.submit_q.sqes[ idx ], idx];
+                return [&ring.submit_q.sqes[ idx & (*ring.submit_q.mask)], idx];
+        }
         static inline void __submit( struct __io_data & ring, uint32_t idx ) {
+                // Get now the data we definetely need
+                uint32_t * const tail = ring.submit_q.tail;
+                // get mutual exclusion
+                lock(ring.submit_q.lock __cfaabi_dbg_ctx2);
+                // Append to the list of ready entries
+                uint32_t * tail = ring.submit_q.tail;
                 const uint32_t mask = *ring.submit_q.mask;
+                // There are 2 submission schemes, check which one we are using
+                if( ring.cltr_flags & CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS ) {
+                        // If the poller thread submits, then we just need to add this to the ready array
+                        /* paranoid */ verify( idx <= mask   );
+                        /* paranoid */ verify( idx != -1ul32 );
+                        // We need to find a spot in the ready array
+                        __attribute((unused)) int len   = 0;
+                        __attribute((unused)) int block = 0;
+                        uint32_t expected = -1ul32;
+                        LOOKING: for(;;) {
+                                for(i; ring.submit_q.ready_cnt) {
+                                        if( __atomic_compare_exchange_n( &ring.submit_q.ready[i], &expected, idx, true, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED ) ) {
+                                                break LOOKING;
+                                        }
+                                        len ++;
+                                }
+                                block++;
+                                yield();
+                ring.submit_q.array[ (*tail) & mask ] = idx & mask;
+                __atomic_fetch_add(tail, 1ul32, __ATOMIC_SEQ_CST);
+                // Submit however, many entries need to be submitted
+                int ret = syscall( __NR_io_uring_enter, ring.fd, 1, 0, 0, 0p, 0);
+                if( ret < 0 ) {
+                        switch((int)errno) {
+                        default:
+                                abort( "KERNEL ERROR: IO_URING SUBMIT - %s\n", strerror(errno) );
+                        }
+                        __wake_poller( ring );
+                        // update statistics
+                        #if !defined(__CFA_NO_STATISTICS__)
+                                __atomic_fetch_add( &ring.submit_q.stats.look_avg.val,   len,   __ATOMIC_RELAXED );
+                                __atomic_fetch_add( &ring.submit_q.stats.look_avg.block, block, __ATOMIC_RELAXED );
+                                __atomic_fetch_add( &ring.submit_q.stats.look_avg.cnt,   1,     __ATOMIC_RELAXED );
+                        #endif
+                        __cfadbg_print_safe( io, "Kernel I/O : Added %u to ready for %p\n", idx, active_thread() );
+                }
+                else {
+                        // get mutual exclusion
+                        lock(ring.submit_q.lock __cfaabi_dbg_ctx2);
+                        // Append to the list of ready entries
+                        /* paranoid */ verify( idx <= mask );
+                        ring.submit_q.array[ (*tail) & mask ] = idx & mask;
+                        __atomic_fetch_add(tail, 1ul32, __ATOMIC_SEQ_CST);
+                        // Submit however, many entries need to be submitted
+                        int ret = syscall( __NR_io_uring_enter, ring.fd, 1, 0, 0, 0p, 0);
+                        if( ret < 0 ) {
+                                switch((int)errno) {
+                                default:
+                                        abort( "KERNEL ERROR: IO_URING SUBMIT - %s\n", strerror(errno) );
+                                }
+                        }
+                        // update statistics
+                        #if !defined(__CFA_NO_STATISTICS__)
+                                ring.submit_q.stats.submit_avg.val += 1;
+                                ring.submit_q.stats.submit_avg.cnt += 1;
+                        #endif
+                        unlock(ring.submit_q.lock);
+                        __cfadbg_print_safe( io, "Kernel I/O : Performed io_submit for %p, returned %d\n", active_thread(), ret );
+                }
+                }
+                // update statistics
+                #if !defined(__CFA_NO_STATISTICS__)
+                        ring.submit_q.stats.submit_avg.val += 1;
+                        ring.submit_q.stats.submit_avg.cnt += 1;
+                #endif
+                unlock(ring.submit_q.lock);
+                // Make sure that idx was submitted
+                // Be careful to not get false positive if we cycled the entire list or that someone else submitted for us
+                __cfadbg_print_safe( io, "Kernel I/O : Performed io_submit for %p, returned %d\n", active_thread(), ret );
+        }
 …
 bool has_user_level_blocking( fptr_t func ) {
         #if defined(HAVE_LINUX_IO_URING_H)
+                if( /*func == (fptr_t)preadv2 || */
+                        func == (fptr_t)cfa_preadv2 )
+                        #define _CFA_IO_FEATURE_IORING_OP_READV ,
+                        return IS_DEFINED(IORING_OP_READV);
                 #if defined(HAVE_PREADV2)
-                        if( /*func == (fptr_t)preadv2 || */
-                                func == (fptr_t)cfa_preadv2 )
-                                #define _CFA_IO_FEATURE_IORING_OP_READV ,
-                                return IS_DEFINED(IORING_OP_READV);
-                #endif
-                #if defined(HAVE_PWRITEV2)
                         if( /*func == (fptr_t)pwritev2 || */
                                 func == (fptr_t)cfa_pwritev2 )
 …
                 #endif
+                if( /*func == (fptr_t)fsync || */
+                        func == (fptr_t)cfa_fsync )
+                        #define _CFA_IO_FEATURE_IORING_OP_FSYNC ,
+                        return IS_DEFINED(IORING_OP_FSYNC);
+                #if defined(HAVE_PWRITEV2)
+                        if( /*func == (fptr_t)fsync || */
+                                func == (fptr_t)cfa_fsync )
+                                #define _CFA_IO_FEATURE_IORING_OP_FSYNC ,
+                                return IS_DEFINED(IORING_OP_FSYNC);
+                #endif
                 if( /*func == (fptr_t)ync_file_range || */

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changes in libcfa/src/concurrency/io.cfa [dd4e2d7:cb870e0]

Legend:

libcfa/src/concurrency/io.cfa

Download in other formats: