Diff [d3ab18345e7d56314e0a9b01494e226dbd444325:f90d10f6231d45b51da6ee971349890424aad7a5] for / – Cforall

benchmark/io/readv.cfa

-              rd3ab183
+              rf90d10f
 #include <thread.hfa>
 #include <time.hfa>
-#if !defined(HAVE_LINUX_IO_URING_H)
-#warning no io uring
-#endif
 extern bool traceHeapOn();

libcfa/prelude/defines.hfa.in

rd3ab183	rf90d10f
16	16	#undef HAVE_LINUX_IO_URING_H
17	17
18		// #define __CFA_IO_POLLING_USER__
19		// #define __CFA_IO_POLLING_KERNEL__
	18	#undef __CFA_NO_STATISTICS__

libcfa/src/concurrency/io.cfa

-              rd3ab183
+              rf90d10f
         #endif
+        #if defined(__CFA_IO_POLLING_USER__)
+                void ?{}( __io_poller_fast & this, struct cluster & cltr ) {
+                        this.ring = &cltr.io;
+                        (this.thrd){ "Fast I/O Poller", cltr };
+                }
+                void ^?{}( __io_poller_fast & mutex this );
+        void main( __io_poller_fast & this );
+        static inline $thread * get_thread( __io_poller_fast & this ) { return &this.thrd; }
+                void ^?{}( __io_poller_fast & mutex this ) {}
+        #endif
+        // Fast poller user-thread
+        // Not using the "thread" keyword because we want to control
+        // more carefully when to start/stop it
+        struct __io_poller_fast {
+                struct __io_data * ring;
+                bool waiting;
+                $thread thrd;
+        };
+        void ?{}( __io_poller_fast & this, struct cluster & cltr ) {
+                this.ring = cltr.io;
+                this.waiting = true;
+                (this.thrd){ "Fast I/O Poller", cltr };
+        }
+        void ^?{}( __io_poller_fast & mutex this );
+        void main( __io_poller_fast & this );
+        static inline $thread * get_thread( __io_poller_fast & this ) { return &this.thrd; }
+        void ^?{}( __io_poller_fast & mutex this ) {}
+        struct __submition_data {
+                // Head and tail of the ring (associated with array)
+                volatile uint32_t * head;
+                volatile uint32_t * tail;
+                // The actual kernel ring which uses head/tail
+                // indexes into the sqes arrays
+                uint32_t * array;
+                // number of entries and mask to go with it
+                const uint32_t * num;
+                const uint32_t * mask;
+                // Submission flags (Not sure what for)
+                uint32_t * flags;
+                // number of sqes not submitted (whatever that means)
+                uint32_t * dropped;
+                // Like head/tail but not seen by the kernel
+                volatile uint32_t alloc;
+                volatile uint32_t ready;
+                __spinlock_t lock;
+                // A buffer of sqes (not the actual ring)
+                struct io_uring_sqe * sqes;
+                // The location and size of the mmaped area
+                void * ring_ptr;
+                size_t ring_sz;
+                // Statistics
+                #if !defined(__CFA_NO_STATISTICS__)
+                        struct {
+                                struct {
+                                        unsigned long long int val;
+                                        unsigned long long int cnt;
+                                } submit_avg;
+                        } stats;
+                #endif
+        };
+        struct __completion_data {
+                // Head and tail of the ring
+                volatile uint32_t * head;
+                volatile uint32_t * tail;
+                // number of entries and mask to go with it
+                const uint32_t * mask;
+                const uint32_t * num;
+                // number of cqes not submitted (whatever that means)
+                uint32_t * overflow;
+                // the kernel ring
+                struct io_uring_cqe * cqes;
+                // The location and size of the mmaped area
+                void * ring_ptr;
+                size_t ring_sz;
+                // Statistics
+                #if !defined(__CFA_NO_STATISTICS__)
+                        struct {
+                                struct {
+                                        unsigned long long int val;
+                                        unsigned long long int slow_cnt;
+                                        unsigned long long int fast_cnt;
+                                } completed_avg;
+                        } stats;
+                #endif
+        };
+        struct __io_data {
+                struct __submition_data submit_q;
+                struct __completion_data completion_q;
+                uint32_t flags;
+                int fd;
+                semaphore submit;
+                volatile bool done;
+                struct {
+                        struct {
+                                void * stack;
+                                pthread_t kthrd;
+                        } slow;
+                        __io_poller_fast fast;
+                        __bin_sem_t sem;
+                } poller;
+        };
 //=============================================================================================
 …
 //=============================================================================================
         void __kernel_io_startup( cluster & this, bool main_cluster ) {
+                this.io = malloc();
                 // Step 1 : call to setup
                 struct io_uring_params params;
 …
                 // Step 2 : mmap result
                 memset(&this.io, 0, sizeof(struct io_ring));
                 struct io_uring_sq & sq = this.io.submit_q;
                 struct io_uring_cq & cq = this.io.completion_q;
+                memset( this.io, 0, sizeof(struct __io_data) );
+                struct __submition_data  & sq = this.io->submit_q;
+                struct __completion_data & cq = this.io->completion_q;
                 // calculate the right ring size
 …
                 // Update the global ring info
                 this.io.flags = params.flags;
                 this.io.fd    = fd;
                 this.io.done  = false;
                 (this.io.submit){ min(*sq.num, *cq.num) };
+                this.io->flags = params.flags;
+                this.io->fd    = fd;
+                this.io->done  = false;
+                (this.io->submit){ min(*sq.num, *cq.num) };
                 // Initialize statistics
                 #if !defined(__CFA_NO_STATISTICS__)
+                        this.io.submit_q.stats.submit_avg.val = 0;
+                        this.io.submit_q.stats.submit_avg.cnt = 0;
+                        this.io.completion_q.stats.completed_avg.val = 0;
+                        this.io.completion_q.stats.completed_avg.cnt = 0;
+                        this.io->submit_q.stats.submit_avg.val = 0;
+                        this.io->submit_q.stats.submit_avg.cnt = 0;
+                        this.io->completion_q.stats.completed_avg.val = 0;
+                        this.io->completion_q.stats.completed_avg.slow_cnt = 0;
+                        this.io->completion_q.stats.completed_avg.fast_cnt = 0;
                 #endif
 …
         void __kernel_io_finish_start( cluster & this ) {
+                #if defined(__CFA_IO_POLLING_USER__)
+                        __cfadbg_print_safe(io_core, "Kernel I/O : Creating fast poller for cluter %p\n", &this);
+                        (this.io.poller.fast){ this };
+                        __thrd_start( this.io.poller.fast, main );
+                #endif
+                __cfadbg_print_safe(io_core, "Kernel I/O : Creating fast poller for cluter %p\n", &this);
+                (this.io->poller.fast){ this };
+                __thrd_start( this.io->poller.fast, main );
                 // Create the poller thread
                 __cfadbg_print_safe(io_core, "Kernel I/O : Creating slow poller for cluter %p\n", &this);
                 this.io.poller.slow.stack = __create_pthread( &this.io.poller.slow.kthrd, __io_poller_slow, &this );
+                this.io->poller.slow.stack = __create_pthread( &this.io->poller.slow.kthrd, __io_poller_slow, &this );
+        }
 …
                 __cfadbg_print_safe(io_core, "Kernel I/O : Stopping pollers for cluster\n", &this);
                 // Notify the poller thread of the shutdown
                 __atomic_store_n(&this.io.done, true, __ATOMIC_SEQ_CST);
+                __atomic_store_n(&this.io->done, true, __ATOMIC_SEQ_CST);
                 // Stop the IO Poller
                 sigval val = { 1 };
+                pthread_sigqueue( this.io.poller.slow.kthrd, SIGUSR1, val );
+                pthread_sigqueue( this.io->poller.slow.kthrd, SIGUSR1, val );
+                post( this.io->poller.sem );
+                // Wait for the poller thread to finish
+                pthread_join( this.io->poller.slow.kthrd, 0p );
+                free( this.io->poller.slow.stack );
+                __cfadbg_print_safe(io_core, "Kernel I/O : Slow poller stopped for cluster\n", &this);
                 #if defined(__CFA_IO_POLLING_USER__)
+                        post( this.io.poller.sem );
+                #endif
+                // Wait for the poller thread to finish
+                pthread_join( this.io.poller.slow.kthrd, 0p );
+                free( this.io.poller.slow.stack );
+                __cfadbg_print_safe(io_core, "Kernel I/O : Slow poller stopped for cluster\n", &this);
+                #if defined(__CFA_IO_POLLING_USER__)
+                        verify( this.io->poller.fast.waiting );
+                        verify( this.io->poller.fast.thrd.state == Blocked );
+                        this.io->poller.fast.thrd.curr_cluster = mainCluster;
                         // unpark the fast io_poller
                         unpark( &this.io.poller.fast.thrd __cfaabi_dbg_ctx2 );
                         ^(this.io.poller.fast){};
+                        unpark( &this.io->poller.fast.thrd __cfaabi_dbg_ctx2 );
+                        ^(this.io->poller.fast){};
                         __cfadbg_print_safe(io_core, "Kernel I/O : Fast poller stopped for cluster\n", &this);
 …
                 #if !defined(__CFA_NO_STATISTICS__)
                         if(this.print_stats) {
+                                __cfaabi_bits_print_safe( STDERR_FILENO,
+                                        "----- I/O uRing Stats -----\n"
+                                        "- total submit calls  : %llu\n"
+                                        "- avg submit          : %lf\n"
+                                        "- total wait calls    : %llu\n"
+                                        "- avg completion/wait : %lf\n",
+                                        this.io.submit_q.stats.submit_avg.cnt,
+                                        ((double)this.io.submit_q.stats.submit_avg.val) / this.io.submit_q.stats.submit_avg.cnt,
+                                        this.io.completion_q.stats.completed_avg.cnt,
+                                        ((double)this.io.completion_q.stats.completed_avg.val) / this.io.completion_q.stats.completed_avg.cnt
+                                );
+                                with(this.io->submit_q.stats, this.io->completion_q.stats) {
+                                        __cfaabi_bits_print_safe( STDERR_FILENO,
+                                                "----- I/O uRing Stats -----\n"
+                                                "- total submit calls  : %llu\n"
+                                                "- avg submit          : %lf\n"
+                                                "- total wait calls    : %llu (%llu slow, %llu fast)\n"
+                                                "- avg completion/wait : %lf\n",
+                                                submit_avg.cnt,
+                                                ((double)submit_avg.val) / submit_avg.cnt,
+                                                completed_avg.slow_cnt + completed_avg.fast_cnt,
+                                                completed_avg.slow_cnt,  completed_avg.fast_cnt,
+                                                ((double)completed_avg.val) / (completed_avg.slow_cnt + completed_avg.fast_cnt)
+                                        );
+                                }
+                        }
                 #endif
                 // Shutdown the io rings
                 struct io_uring_sq & sq = this.io.submit_q;
                 struct io_uring_cq & cq = this.io.completion_q;
+                struct __submition_data  & sq = this.io->submit_q;
+                struct __completion_data & cq = this.io->completion_q;
                 // unmap the submit queue entries
 …
                 // close the file descriptor
+                close(this.io.fd);
+                close(this.io->fd);
+                free( this.io );
+        }
 …
         // Process a single completion message from the io_uring
         // This is NOT thread-safe
         static int __drain_io( struct io_ring & ring, sigset_t * mask, int waitcnt, bool in_kernel ) {
+        static int __drain_io( struct __io_data & ring, sigset_t * mask, int waitcnt, bool in_kernel ) {
                 int ret = syscall( __NR_io_uring_enter, ring.fd, 0, waitcnt, IORING_ENTER_GETEVENTS, mask, _NSIG / 8);
                 if( ret < 0 ) {
 …
                 // Nothing was new return 0
                 if (head == tail) {
-                        #if !defined(__CFA_NO_STATISTICS__)
-                                ring.completion_q.stats.completed_avg.cnt += 1;
-                        #endif
                         return 0;
+                }
 …
                 __atomic_fetch_add( ring.completion_q.head, count, __ATOMIC_RELAXED );
-                // Update statistics
-                #if !defined(__CFA_NO_STATISTICS__)
-                        ring.completion_q.stats.completed_avg.val += count;
-                        ring.completion_q.stats.completed_avg.cnt += 1;
-                #endif
                 return count;
+        }
 …
         static void * __io_poller_slow( void * arg ) {
                 cluster * cltr = (cluster *)arg;
                 struct io_ring & ring = cltr->io;
+                struct __io_data & ring = *cltr->io;
                 sigset_t mask;
 …
                 verify( (*ring.completion_q.head) == (*ring.completion_q.tail) );
+                __cfadbg_print_safe(io_core, "Kernel I/O : Slow poller for ring %p ready\n", &ring);
                 while(!__atomic_load_n(&ring.done, __ATOMIC_SEQ_CST)) {
                         #if defined(__CFA_IO_POLLING_USER__)
 …
                                 // batton pass to the user-thread
                                 int count = __drain_io( ring, &mask, 1, true );
+                                // Update statistics
+                                #if !defined(__CFA_NO_STATISTICS__)
+                                        ring.completion_q.stats.completed_avg.val += count;
+                                        ring.completion_q.stats.completed_avg.slow_cnt += 1;
+                                #endif
                                 if(count > 0) {
                                         __cfadbg_print_safe(io_core, "Kernel I/O : Moving to ring %p to fast poller\n", &ring);
 …
                                 //In the naive approach, just poll the io completion queue directly
+                                __drain_io( ring, &mask, 1, true );
+                                int count = __drain_io( ring, &mask, 1, true );
+                                // Update statistics
+                                #if !defined(__CFA_NO_STATISTICS__)
+                                        ring.completion_q.stats.completed_avg.val += count;
+                                        ring.completion_q.stats.completed_avg.slow_cnt += 1;
+                                #endif
                         #endif
+                }
+                __cfadbg_print_safe(io_core, "Kernel I/O : Slow poller for ring %p stopping\n", &ring);
                 return 0p;
+        }
+        #if defined(__CFA_IO_POLLING_USER__)
+                void main( __io_poller_fast & this ) {
+                        // Start parked
+                        park( __cfaabi_dbg_ctx );
+                        // Then loop until we need to start
+                        while(!__atomic_load_n(&this.ring->done, __ATOMIC_SEQ_CST)) {
+                                // Drain the io
+                                if(0 > __drain_io( *this.ring, 0p, 0, false )) {
+                                        // If we got something, just yield and check again
+                                        yield();
+                                }
+                                else {
+                                        // We didn't get anything baton pass to the slow poller
+                                        __cfadbg_print_safe(io_core, "Kernel I/O : Moving to ring %p to slow poller\n", &this.ring);
+                                        post( this.ring->poller.sem );
+                                        park( __cfaabi_dbg_ctx );
+                                }
+        void main( __io_poller_fast & this ) {
+                // Start parked
+                park( __cfaabi_dbg_ctx );
+                __cfadbg_print_safe(io_core, "Kernel I/O : Fast poller for ring %p ready\n", &this.ring);
+                // Then loop until we need to start
+                while(!__atomic_load_n(&this.ring->done, __ATOMIC_SEQ_CST)) {
+                        // Drain the io
+                        this.waiting = false;
+                        int count = __drain_io( *this.ring, 0p, 0, false );
+                        // Update statistics
+                        #if !defined(__CFA_NO_STATISTICS__)
+                                this.ring->completion_q.stats.completed_avg.val += count;
+                                this.ring->completion_q.stats.completed_avg.fast_cnt += 1;
+                        #endif
+                        this.waiting = true;
+                        if(0 > count) {
+                                // If we got something, just yield and check again
+                                yield();
+                        }
+                }
+        #endif
+                        else {
+                                // We didn't get anything baton pass to the slow poller
+                                __cfadbg_print_safe(io_core, "Kernel I/O : Moving to ring %p to slow poller\n", &this.ring);
+                                post( this.ring->poller.sem );
+                                park( __cfaabi_dbg_ctx );
+                        }
+                }
+                __cfadbg_print_safe(io_core, "Kernel I/O : Fast poller for ring %p stopping\n", &this.ring);
+        }
 //=============================================================================================
 …
 //
         static inline [* struct io_uring_sqe, uint32_t] __submit_alloc( struct io_ring & ring ) {
+        static inline [* struct io_uring_sqe, uint32_t] __submit_alloc( struct __io_data & ring ) {
                 // Wait for a spot to be available
                 P(ring.submit);
 …
+        }
         static inline void __submit( struct io_ring & ring, uint32_t idx ) {
+        static inline void __submit( struct __io_data & ring, uint32_t idx ) {
                 // get mutual exclusion
                 lock(ring.submit_q.lock __cfaabi_dbg_ctx2);
 …
         #define __submit_prelude \
                 struct io_ring & ring = active_cluster()->io; \
+                struct __io_data & ring = *active_cluster()->io; \
                 struct io_uring_sqe * sqe; \
                 uint32_t idx; \

libcfa/src/concurrency/kernel.hfa

-              rd3ab183
+              rf90d10f
 //-----------------------------------------------------------------------------
 // I/O
+#if defined(HAVE_LINUX_IO_URING_H)
+struct io_uring_sq {
+        // Head and tail of the ring (associated with array)
+        volatile uint32_t * head;
+        volatile uint32_t * tail;
+        // The actual kernel ring which uses head/tail
+        // indexes into the sqes arrays
+        uint32_t * array;
+        // number of entries and mask to go with it
+        const uint32_t * num;
+        const uint32_t * mask;
+        // Submission flags (Not sure what for)
+        uint32_t * flags;
+        // number of sqes not submitted (whatever that means)
+        uint32_t * dropped;
+        // Like head/tail but not seen by the kernel
+        volatile uint32_t alloc;
+        volatile uint32_t ready;
+        __spinlock_t lock;
+        // A buffer of sqes (not the actual ring)
+        struct io_uring_sqe * sqes;
+        // The location and size of the mmaped area
+        void * ring_ptr;
+        size_t ring_sz;
+        // Statistics
+        #if !defined(__CFA_NO_STATISTICS__)
+                struct {
+                        struct {
+                                unsigned long long int val;
+                                unsigned long long int cnt;
+                        } submit_avg;
+                } stats;
+        #endif
+};
+struct io_uring_cq {
+        // Head and tail of the ring
+        volatile uint32_t * head;
+        volatile uint32_t * tail;
+        // number of entries and mask to go with it
+        const uint32_t * mask;
+        const uint32_t * num;
+        // number of cqes not submitted (whatever that means)
+        uint32_t * overflow;
+        // the kernel ring
+        struct io_uring_cqe * cqes;
+        // The location and size of the mmaped area
+        void * ring_ptr;
+        size_t ring_sz;
+        // Statistics
+        #if !defined(__CFA_NO_STATISTICS__)
+                struct {
+                        struct {
+                                unsigned long long int val;
+                                unsigned long long int cnt;
+                        } completed_avg;
+                } stats;
+        #endif
+};
+#if defined(__CFA_IO_POLLING_USER__)
+        struct __io_poller_fast {
+                struct io_ring * ring;
+                $thread thrd;
+        };
+#endif
+struct io_ring {
+        struct io_uring_sq submit_q;
+        struct io_uring_cq completion_q;
+        uint32_t flags;
+        int fd;
+        semaphore submit;
+        volatile bool done;
+        struct {
+                struct {
+                        void * stack;
+                        pthread_t kthrd;
+                } slow;
+                #if defined(__CFA_IO_POLLING_USER__)
+                        __io_poller_fast fast;
+                        __bin_sem_t sem;
+                #endif
+        } poller;
+};
+#endif
+struct __io_data;
 //-----------------------------------------------------------------------------
 …
         } node;
+        #if defined(HAVE_LINUX_IO_URING_H)
+                struct io_ring io;
+        #endif
+        struct __io_data * io;
         #if !defined(__CFA_NO_STATISTICS__)

libcfa/src/concurrency/kernel_private.hfa

rd3ab183	rf90d10f
59	59	extern volatile thread_local __cfa_kernel_preemption_state_t preemption_state __attribute__ ((tls_model ( "initial-exec" )));
60	60
	61	extern cluster * mainCluster;
	62
61	63	//-----------------------------------------------------------------------------
62	64	// Threads

Context Navigation

Changes in / [d3ab183:f90d10f]

Legend:

benchmark/io/readv.cfa

libcfa/prelude/defines.hfa.in

libcfa/src/concurrency/io.cfa

libcfa/src/concurrency/kernel.hfa

libcfa/src/concurrency/kernel_private.hfa

Download in other formats: