Context Navigation

← Previous Change
Next Change →

io.cfa

Timestamp:

Aug 18, 2020, 4:31:19 PM (4 years ago)

Author:

Thierry Delisle <tdelisle@…>

Branches:

ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast, new-ast-unique-expr, pthread-emulation, qualifiedEnum

Children:

8e9d567

Parents:

ef9988b (diff), f2384c9a (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.

Message:

Merge branch 'master' into new-ast

File:

: 1 edited

libcfa/src/concurrency/io.cfa (modified) (21 diffs)

Legend:

: Unmodified
: Added
: Removed

libcfa/src/concurrency/io.cfa

-                      ref9988b
+                      r13d33a75
         #include "kernel/fwd.hfa"
         #include "io/types.hfa"
+        // returns true of acquired as leader or second leader
+        static inline bool try_lock( __leaderlock_t & this ) {
+                const uintptr_t thrd = 1z | (uintptr_t)active_thread();
+                bool block;
+                disable_interrupts();
+                for() {
+                        struct $thread * expected = this.value;
+                        if( 1p != expected && 0p != expected ) {
+                                /* paranoid */ verify( thrd != (uintptr_t)expected ); // We better not already be the next leader
+                                enable_interrupts( __cfaabi_dbg_ctx );
+                                return false;
+                        }
+                        struct $thread * desired;
+                        if( 0p == expected ) {
+                                // If the lock isn't locked acquire it, no need to block
+                                desired = 1p;
+                                block = false;
+                        }
+                        else {
+                                // If the lock is already locked try becomming the next leader
+                                desired = (struct $thread *)thrd;
+                                block = true;
+                        }
+                        if( __atomic_compare_exchange_n(&this.value, &expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) ) break;
+                }
+                if( block ) {
+                        enable_interrupts( __cfaabi_dbg_ctx );
+                        park( __cfaabi_dbg_ctx );
+                        disable_interrupts();
+                }
+                return true;
+        }
+        static inline bool next( __leaderlock_t & this ) {
+                /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+                struct $thread * nextt;
+                for() {
+                        struct $thread * expected = this.value;
+                        /* paranoid */ verify( (1 & (uintptr_t)expected) == 1 ); // The lock better be locked
+                        struct $thread * desired;
+                        if( 1p == expected ) {
+                                // No next leader, just unlock
+                                desired = 0p;
+                                nextt   = 0p;
+                        }
+                        else {
+                                // There is a next leader, remove but keep locked
+                                desired = 1p;
+                                nextt   = (struct $thread *)(~1z & (uintptr_t)expected);
+                        }
+                        if( __atomic_compare_exchange_n(&this.value, &expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) ) break;
+                }
+                if(nextt) {
+                        unpark( nextt __cfaabi_dbg_ctx2 );
+                        enable_interrupts( __cfaabi_dbg_ctx );
+                        return true;
+                }
+                enable_interrupts( __cfaabi_dbg_ctx );
+                return false;
+        }
 //=============================================================================================
 …
 //=============================================================================================
         static unsigned __collect_submitions( struct __io_data & ring );
         static uint32_t __release_consumed_submission( struct __io_data & ring );
+        static __u32 __release_consumed_submission( struct __io_data & ring );
         static inline void process(struct io_uring_cqe & cqe ) {
 …
                 data->result = cqe.res;
                 unpark( data->thrd __cfaabi_dbg_ctx2 );
+                post( data->sem );
+        }
 …
                 unsigned head = *ring.completion_q.head;
                 unsigned tail = *ring.completion_q.tail;
                 const uint32_t mask = *ring.completion_q.mask;
+                const __u32 mask = *ring.completion_q.mask;
                 // Nothing was new return 0
 …
+                }
                 uint32_t count = tail - head;
+                __u32 count = tail - head;
                 /* paranoid */ verify( count != 0 );
                 for(i; count) {
 …
                                 __STATS__( true,
                                         io.complete_q.completed_avg.val += count;
                                         io.complete_q.completed_avg.fast_cnt += 1;
+                                        io.complete_q.completed_avg.cnt += 1;
+                                )
                         enable_interrupts( __cfaabi_dbg_ctx );
 …
                         // We didn't get anything baton pass to the slow poller
                         else {
+                                __STATS__( false,
+                                        io.complete_q.blocks += 1;
+                                )
                                 __cfadbg_print_safe(io_core, "Kernel I/O : Parking io poller %p\n", &this.self);
                                 reset = 0;
 …
 //
         [* struct io_uring_sqe, uint32_t] __submit_alloc( struct __io_data & ring, uint64_t data ) {
+        [* struct io_uring_sqe, __u32] __submit_alloc( struct __io_data & ring, __u64 data ) {
                 /* paranoid */ verify( data != 0 );
 …
                 __attribute((unused)) int len   = 0;
                 __attribute((unused)) int block = 0;
                 uint32_t cnt = *ring.submit_q.num;
                 uint32_t mask = *ring.submit_q.mask;
+                __u32 cnt = *ring.submit_q.num;
+                __u32 mask = *ring.submit_q.mask;
                 disable_interrupts();
                         uint32_t off = __tls_rand();
+                        __u32 off = __tls_rand();
                 enable_interrupts( __cfaabi_dbg_ctx );
 …
                         // Look through the list starting at some offset
                         for(i; cnt) {
                                 uint64_t expected = 0;
                                 uint32_t idx = (i + off) & mask;
+                                __u64 expected = 0;
+                                __u32 idx = (i + off) & mask;
                                 struct io_uring_sqe * sqe = &ring.submit_q.sqes[idx];
                                 volatile uint64_t * udata = (volatile uint64_t *)&sqe->user_data;
+                                volatile __u64 * udata = &sqe->user_data;
                                 if( *udata == expected &&
 …
+        }
         static inline uint32_t __submit_to_ready_array( struct __io_data & ring, uint32_t idx, const uint32_t mask ) {
+        static inline __u32 __submit_to_ready_array( struct __io_data & ring, __u32 idx, const __u32 mask ) {
                 /* paranoid */ verify( idx <= mask   );
                 /* paranoid */ verify( idx != -1ul32 );
 …
                 __attribute((unused)) int len   = 0;
                 __attribute((unused)) int block = 0;
                 uint32_t ready_mask = ring.submit_q.ready_cnt - 1;
+                __u32 ready_mask = ring.submit_q.ready_cnt - 1;
                 disable_interrupts();
                         uint32_t off = __tls_rand();
+                        __u32 off = __tls_rand();
                 enable_interrupts( __cfaabi_dbg_ctx );
                 uint32_t picked;
+                __u32 picked;
                 LOOKING: for() {
                         for(i; ring.submit_q.ready_cnt) {
                                 picked = (i + off) & ready_mask;
                                 uint32_t expected = -1ul32;
+                                __u32 expected = -1ul32;
                                 if( __atomic_compare_exchange_n( &ring.submit_q.ready[picked], &expected, idx, true, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED ) ) {
                                         break LOOKING;
 …
                         block++;
+                        if( try_lock(ring.submit_q.lock __cfaabi_dbg_ctx2) ) {
+                                __release_consumed_submission( ring );
+                                unlock( ring.submit_q.lock );
+                        }
+                        else {
+                        __u32 released = __release_consumed_submission( ring );
+                        if( released == 0 ) {
                                 yield();
+                        }
 …
+        }
         void __submit( struct io_context * ctx, uint32_t idx ) __attribute__((nonnull (1))) {
+        void __submit( struct io_context * ctx, __u32 idx ) __attribute__((nonnull (1))) {
                 __io_data & ring = *ctx->thrd.ring;
                 // Get now the data we definetely need
                 volatile uint32_t * const tail = ring.submit_q.tail;
                 const uint32_t mask  = *ring.submit_q.mask;
+                volatile __u32 * const tail = ring.submit_q.tail;
+                const __u32 mask  = *ring.submit_q.mask;
                 // There are 2 submission schemes, check which one we are using
 …
+                }
                 else if( ring.eager_submits ) {
+                        uint32_t picked = __submit_to_ready_array( ring, idx, mask );
+                        for() {
+                                yield();
+                                // If some one else collected our index, we are done
+                                #warning ABA problem
+                                if( ring.submit_q.ready[picked] != idx ) {
+                        __u32 picked = __submit_to_ready_array( ring, idx, mask );
+                        #if defined(LEADER_LOCK)
+                                if( !try_lock(ring.submit_q.submit_lock) ) {
                                         __STATS__( false,
                                                 io.submit_q.helped += 1;
 …
                                         return;
+                                }
+                                if( try_lock(ring.submit_q.lock __cfaabi_dbg_ctx2) ) {
+                                /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+                                __STATS__( true,
+                                        io.submit_q.leader += 1;
+                                )
+                        #else
+                                for() {
+                                        yield();
+                                        if( try_lock(ring.submit_q.submit_lock __cfaabi_dbg_ctx2) ) {
+                                                __STATS__( false,
+                                                        io.submit_q.leader += 1;
+                                                )
+                                                break;
+                                        }
+                                        // If some one else collected our index, we are done
+                                        #warning ABA problem
+                                        if( ring.submit_q.ready[picked] != idx ) {
+                                                __STATS__( false,
+                                                        io.submit_q.helped += 1;
+                                                )
+                                                return;
+                                        }
                                         __STATS__( false,
                                                 io.submit_q.leader += 1;
+                                                io.submit_q.busy += 1;
+                                        )
+                                        break;
+                                }
+                                __STATS__( false,
+                                        io.submit_q.busy += 1;
+                                )
+                        }
+                                }
+                        #endif
                         // We got the lock
+                        // Collect the submissions
                         unsigned to_submit = __collect_submitions( ring );
+                        // Actually submit
                         int ret = __io_uring_enter( ring, to_submit, false );
+                        if( ret < 0 ) {
+                                unlock(ring.submit_q.lock);
+                                return;
+                        }
+                        /* paranoid */ verify( ret > 0 || to_submit == 0 || (ring.ring_flags & IORING_SETUP_SQPOLL) );
+                        #if defined(LEADER_LOCK)
+                                /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+                                next(ring.submit_q.submit_lock);
+                        #else
+                                unlock(ring.submit_q.submit_lock);
+                        #endif
+                        if( ret < 0 ) return;
                         // Release the consumed SQEs
 …
                         // update statistics
                         __STATS__( true,
+                        __STATS__( false,
                                 io.submit_q.submit_avg.rdy += to_submit;
                                 io.submit_q.submit_avg.csm += ret;
                                 io.submit_q.submit_avg.cnt += 1;
+                        )
-                        unlock(ring.submit_q.lock);
+                }
                 else {
                         // get mutual exclusion
+                        lock(ring.submit_q.lock __cfaabi_dbg_ctx2);
+                        #if defined(LEADER_LOCK)
+                                while(!try_lock(ring.submit_q.submit_lock));
+                        #else
+                                lock(ring.submit_q.submit_lock __cfaabi_dbg_ctx2);
+                        #endif
                         /* paranoid */ verifyf( ring.submit_q.sqes[ idx ].user_data != 0,
 …
                         __release_consumed_submission( ring );
+                        unlock(ring.submit_q.lock);
+                        #if defined(LEADER_LOCK)
+                                next(ring.submit_q.submit_lock);
+                        #else
+                                unlock(ring.submit_q.submit_lock);
+                        #endif
                         __cfadbg_print_safe( io, "Kernel I/O : Performed io_submit for %p, returned %d\n", active_thread(), ret );
 …
+        }
+        // #define PARTIAL_SUBMIT 32
         static unsigned __collect_submitions( struct __io_data & ring ) {
                 /* paranoid */ verify( ring.submit_q.ready != 0p );
 …
                 unsigned to_submit = 0;
+                uint32_t tail = *ring.submit_q.tail;
+                const uint32_t mask = *ring.submit_q.mask;
+                __u32 tail = *ring.submit_q.tail;
+                const __u32 mask = *ring.submit_q.mask;
+                #if defined(PARTIAL_SUBMIT)
+                        #if defined(LEADER_LOCK)
+                                #error PARTIAL_SUBMIT and LEADER_LOCK cannot co-exist
+                        #endif
+                        const __u32 cnt = ring.submit_q.ready_cnt > PARTIAL_SUBMIT ? PARTIAL_SUBMIT : ring.submit_q.ready_cnt;
+                        const __u32 offset = ring.submit_q.prev_ready;
+                        ring.submit_q.prev_ready += cnt;
+                #else
+                        const __u32 cnt = ring.submit_q.ready_cnt;
+                        const __u32 offset = 0;
+                #endif
                 // Go through the list of ready submissions
+                for( i; ring.submit_q.ready_cnt ) {
+                for( c; cnt ) {
+                        __u32 i = (offset + c) % ring.submit_q.ready_cnt;
                         // replace any submission with the sentinel, to consume it.
                         uint32_t idx = __atomic_exchange_n( &ring.submit_q.ready[i], -1ul32, __ATOMIC_RELAXED);
+                        __u32 idx = __atomic_exchange_n( &ring.submit_q.ready[i], -1ul32, __ATOMIC_RELAXED);
                         // If it was already the sentinel, then we are done
 …
+        }
         static uint32_t __release_consumed_submission( struct __io_data & ring ) {
                 const uint32_t smask = *ring.submit_q.mask;
+        static __u32 __release_consumed_submission( struct __io_data & ring ) {
+                const __u32 smask = *ring.submit_q.mask;
                 if( !try_lock(ring.submit_q.release_lock __cfaabi_dbg_ctx2) ) return 0;
                 uint32_t chead = *ring.submit_q.head;
                 uint32_t phead = ring.submit_q.prev_head;
+                __u32 chead = *ring.submit_q.head;
+                __u32 phead = ring.submit_q.prev_head;
                 ring.submit_q.prev_head = chead;
                 unlock(ring.submit_q.release_lock);
                 uint32_t count = chead - phead;
+                __u32 count = chead - phead;
                 for( i; count ) {
                         uint32_t idx = ring.submit_q.array[ (phead + i) & smask ];
+                        __u32 idx = ring.submit_q.array[ (phead + i) & smask ];
                         ring.submit_q.sqes[ idx ].user_data = 0;
+                }

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 13d33a75 for libcfa/src/concurrency/io.cfa

Legend:

libcfa/src/concurrency/io.cfa

Download in other formats: