| 1 | //
 | 
|---|
| 2 | // Cforall Version 1.0.0 Copyright (C) 2020 University of Waterloo
 | 
|---|
| 3 | //
 | 
|---|
| 4 | // The contents of this file are covered under the licence agreement in the
 | 
|---|
| 5 | // file "LICENCE" distributed with Cforall.
 | 
|---|
| 6 | //
 | 
|---|
| 7 | // io/setup.cfa --
 | 
|---|
| 8 | //
 | 
|---|
| 9 | // Author           : Thierry Delisle
 | 
|---|
| 10 | // Created On       : Fri Jul 31 16:25:51 2020
 | 
|---|
| 11 | // Last Modified By :
 | 
|---|
| 12 | // Last Modified On :
 | 
|---|
| 13 | // Update Count     :
 | 
|---|
| 14 | //
 | 
|---|
| 15 | 
 | 
|---|
| 16 | #define __cforall_thread__
 | 
|---|
| 17 | #define _GNU_SOURCE         /* See feature_test_macros(7) */
 | 
|---|
| 18 | 
 | 
|---|
| 19 | #include "io/types.hfa"
 | 
|---|
| 20 | #include "kernel.hfa"
 | 
|---|
| 21 | 
 | 
|---|
| 22 | #if !defined(CFA_HAVE_LINUX_IO_URING_H)
 | 
|---|
| 23 |         void __kernel_io_startup() {
 | 
|---|
| 24 |                 // Nothing to do without io_uring
 | 
|---|
| 25 |         }
 | 
|---|
| 26 | 
 | 
|---|
| 27 |         void __kernel_io_shutdown() {
 | 
|---|
| 28 |                 // Nothing to do without io_uring
 | 
|---|
| 29 |         }
 | 
|---|
| 30 | 
 | 
|---|
| 31 |         void ?{}(io_context_params & this) {}
 | 
|---|
| 32 | 
 | 
|---|
| 33 |         void ?{}(io_context & this, struct cluster & cl) {}
 | 
|---|
| 34 |         void ?{}(io_context & this, struct cluster & cl, const io_context_params & params) {}
 | 
|---|
| 35 | 
 | 
|---|
| 36 |         void ^?{}(io_context & this) {}
 | 
|---|
| 37 |         void ^?{}(io_context & this, bool cluster_context) {}
 | 
|---|
| 38 | 
 | 
|---|
| 39 | #else
 | 
|---|
| 40 |         #include <errno.h>
 | 
|---|
| 41 |         #include <stdint.h>
 | 
|---|
| 42 |         #include <string.h>
 | 
|---|
| 43 |         #include <signal.h>
 | 
|---|
| 44 |         #include <unistd.h>
 | 
|---|
| 45 | 
 | 
|---|
| 46 |         extern "C" {
 | 
|---|
| 47 |                 #include <pthread.h>
 | 
|---|
| 48 |                 #include <sys/epoll.h>
 | 
|---|
| 49 |                 #include <sys/mman.h>
 | 
|---|
| 50 |                 #include <sys/syscall.h>
 | 
|---|
| 51 | 
 | 
|---|
| 52 |                 #include <linux/io_uring.h>
 | 
|---|
| 53 |         }
 | 
|---|
| 54 | 
 | 
|---|
| 55 |         #include "bitmanip.hfa"
 | 
|---|
| 56 |         #include "kernel_private.hfa"
 | 
|---|
| 57 |         #include "thread.hfa"
 | 
|---|
| 58 | 
 | 
|---|
| 59 |         void ?{}(io_context_params & this) {
 | 
|---|
| 60 |                 this.num_entries = 256;
 | 
|---|
| 61 |                 this.num_ready = 256;
 | 
|---|
| 62 |                 this.submit_aff = -1;
 | 
|---|
| 63 |                 this.eager_submits = false;
 | 
|---|
| 64 |                 this.poller_submits = false;
 | 
|---|
| 65 |                 this.poll_submit = false;
 | 
|---|
| 66 |                 this.poll_complete = false;
 | 
|---|
| 67 |         }
 | 
|---|
| 68 | 
 | 
|---|
| 69 |         static void * __io_poller_slow( void * arg );
 | 
|---|
| 70 | 
 | 
|---|
| 71 |         // Weirdly, some systems that do support io_uring don't actually define these
 | 
|---|
| 72 |         #ifdef __alpha__
 | 
|---|
| 73 |                 /*
 | 
|---|
| 74 |                 * alpha is the only exception, all other architectures
 | 
|---|
| 75 |                 * have common numbers for new system calls.
 | 
|---|
| 76 |                 */
 | 
|---|
| 77 |                 #ifndef __NR_io_uring_setup
 | 
|---|
| 78 |                         #define __NR_io_uring_setup           535
 | 
|---|
| 79 |                 #endif
 | 
|---|
| 80 |                 #ifndef __NR_io_uring_enter
 | 
|---|
| 81 |                         #define __NR_io_uring_enter           536
 | 
|---|
| 82 |                 #endif
 | 
|---|
| 83 |                 #ifndef __NR_io_uring_register
 | 
|---|
| 84 |                         #define __NR_io_uring_register        537
 | 
|---|
| 85 |                 #endif
 | 
|---|
| 86 |         #else /* !__alpha__ */
 | 
|---|
| 87 |                 #ifndef __NR_io_uring_setup
 | 
|---|
| 88 |                         #define __NR_io_uring_setup           425
 | 
|---|
| 89 |                 #endif
 | 
|---|
| 90 |                 #ifndef __NR_io_uring_enter
 | 
|---|
| 91 |                         #define __NR_io_uring_enter           426
 | 
|---|
| 92 |                 #endif
 | 
|---|
| 93 |                 #ifndef __NR_io_uring_register
 | 
|---|
| 94 |                         #define __NR_io_uring_register        427
 | 
|---|
| 95 |                 #endif
 | 
|---|
| 96 |         #endif
 | 
|---|
| 97 | 
 | 
|---|
| 98 | //=============================================================================================
 | 
|---|
| 99 | // I/O Startup / Shutdown logic + Master Poller
 | 
|---|
| 100 | //=============================================================================================
 | 
|---|
| 101 | 
 | 
|---|
| 102 |         // IO Master poller loop forward
 | 
|---|
| 103 |         static void * iopoll_loop( __attribute__((unused)) void * args );
 | 
|---|
| 104 | 
 | 
|---|
| 105 |         static struct {
 | 
|---|
| 106 |                 pthread_t     thrd;    // pthread handle to io poller thread
 | 
|---|
| 107 |                 void *        stack;   // pthread stack for io poller thread
 | 
|---|
| 108 |                 int           epollfd; // file descriptor to the epoll instance
 | 
|---|
| 109 |                 volatile bool run;     // Whether or not to continue
 | 
|---|
| 110 |         } iopoll;
 | 
|---|
| 111 | 
 | 
|---|
| 112 |         void __kernel_io_startup(void) {
 | 
|---|
| 113 |                 __cfaabi_dbg_print_safe( "Kernel : Creating EPOLL instance\n" );
 | 
|---|
| 114 | 
 | 
|---|
| 115 |                 iopoll.epollfd = epoll_create1(0);
 | 
|---|
| 116 |                 if (iopoll.epollfd == -1) {
 | 
|---|
| 117 |                         abort( "internal error, epoll_create1\n");
 | 
|---|
| 118 |                 }
 | 
|---|
| 119 | 
 | 
|---|
| 120 |                 __cfaabi_dbg_print_safe( "Kernel : Starting io poller thread\n" );
 | 
|---|
| 121 | 
 | 
|---|
| 122 |                 iopoll.run = true;
 | 
|---|
| 123 |                 iopoll.stack = __create_pthread( &iopoll.thrd, iopoll_loop, 0p );
 | 
|---|
| 124 |         }
 | 
|---|
| 125 | 
 | 
|---|
| 126 |         void __kernel_io_shutdown(void) {
 | 
|---|
| 127 |                 // Notify the io poller thread of the shutdown
 | 
|---|
| 128 |                 iopoll.run = false;
 | 
|---|
| 129 |                 sigval val = { 1 };
 | 
|---|
| 130 |                 pthread_sigqueue( iopoll.thrd, SIGUSR1, val );
 | 
|---|
| 131 | 
 | 
|---|
| 132 |                 // Wait for the io poller thread to finish
 | 
|---|
| 133 | 
 | 
|---|
| 134 |                 pthread_join( iopoll.thrd, 0p );
 | 
|---|
| 135 |                 free( iopoll.stack );
 | 
|---|
| 136 | 
 | 
|---|
| 137 |                 int ret = close(iopoll.epollfd);
 | 
|---|
| 138 |                 if (ret == -1) {
 | 
|---|
| 139 |                         abort( "internal error, close epoll\n");
 | 
|---|
| 140 |                 }
 | 
|---|
| 141 | 
 | 
|---|
| 142 |                 // Io polling is now fully stopped
 | 
|---|
| 143 | 
 | 
|---|
| 144 |                 __cfaabi_dbg_print_safe( "Kernel : IO poller stopped\n" );
 | 
|---|
| 145 |         }
 | 
|---|
| 146 | 
 | 
|---|
| 147 |         static void * iopoll_loop( __attribute__((unused)) void * args ) {
 | 
|---|
| 148 |                 __processor_id_t id;
 | 
|---|
| 149 |                 id.full_proc = false;
 | 
|---|
| 150 |                 id.id = doregister(&id);
 | 
|---|
| 151 |                 __cfaabi_tls.this_proc_id = &id;
 | 
|---|
| 152 |                 __cfaabi_dbg_print_safe( "Kernel : IO poller thread starting\n" );
 | 
|---|
| 153 | 
 | 
|---|
| 154 |                 // Block signals to control when they arrive
 | 
|---|
| 155 |                 sigset_t mask;
 | 
|---|
| 156 |                 sigfillset(&mask);
 | 
|---|
| 157 |                 if ( pthread_sigmask( SIG_BLOCK, &mask, 0p ) == -1 ) {
 | 
|---|
| 158 |                 abort( "internal error, pthread_sigmask" );
 | 
|---|
| 159 |                 }
 | 
|---|
| 160 | 
 | 
|---|
| 161 |                 sigdelset( &mask, SIGUSR1 );
 | 
|---|
| 162 | 
 | 
|---|
| 163 |                 // Create sufficient events
 | 
|---|
| 164 |                 struct epoll_event events[10];
 | 
|---|
| 165 |                 // Main loop
 | 
|---|
| 166 |                 while( iopoll.run ) {
 | 
|---|
| 167 |                         // Wait for events
 | 
|---|
| 168 |                         int nfds = epoll_pwait( iopoll.epollfd, events, 10, -1, &mask );
 | 
|---|
| 169 | 
 | 
|---|
| 170 |                         // Check if an error occured
 | 
|---|
| 171 |                         if (nfds == -1) {
 | 
|---|
| 172 |                                 if( errno == EINTR ) continue;
 | 
|---|
| 173 |                                 abort( "internal error, pthread_sigmask" );
 | 
|---|
| 174 |                         }
 | 
|---|
| 175 | 
 | 
|---|
| 176 |                         for(i; nfds) {
 | 
|---|
| 177 |                                 $io_ctx_thread * io_ctx = ($io_ctx_thread *)(uintptr_t)events[i].data.u64;
 | 
|---|
| 178 |                                 /* paranoid */ verify( io_ctx );
 | 
|---|
| 179 |                                 __cfadbg_print_safe(io_core, "Kernel I/O : Unparking io poller %p\n", io_ctx);
 | 
|---|
| 180 |                                 #if !defined( __CFA_NO_STATISTICS__ )
 | 
|---|
| 181 |                                         __cfaabi_tls.this_stats = io_ctx->self.curr_cluster->stats;
 | 
|---|
| 182 |                                 #endif
 | 
|---|
| 183 |                                 post( io_ctx->sem );
 | 
|---|
| 184 |                         }
 | 
|---|
| 185 |                 }
 | 
|---|
| 186 | 
 | 
|---|
| 187 |                 __cfaabi_dbg_print_safe( "Kernel : IO poller thread stopping\n" );
 | 
|---|
| 188 |                 unregister(&id);
 | 
|---|
| 189 |                 return 0p;
 | 
|---|
| 190 |         }
 | 
|---|
| 191 | 
 | 
|---|
| 192 | //=============================================================================================
 | 
|---|
| 193 | // I/O Context Constrution/Destruction
 | 
|---|
| 194 | //=============================================================================================
 | 
|---|
| 195 | 
 | 
|---|
| 196 |         void ?{}($io_ctx_thread & this, struct cluster & cl) { (this.self){ "IO Poller", cl }; }
 | 
|---|
| 197 |         void main( $io_ctx_thread & this );
 | 
|---|
| 198 |         static inline $thread * get_thread( $io_ctx_thread & this ) { return &this.self; }
 | 
|---|
| 199 |         void ^?{}( $io_ctx_thread & mutex this ) {}
 | 
|---|
| 200 | 
 | 
|---|
| 201 |         static void __io_create ( __io_data & this, const io_context_params & params_in );
 | 
|---|
| 202 |         static void __io_destroy( __io_data & this );
 | 
|---|
| 203 | 
 | 
|---|
| 204 |         void ?{}(io_context & this, struct cluster & cl, const io_context_params & params) {
 | 
|---|
| 205 |                 (this.thrd){ cl };
 | 
|---|
| 206 |                 this.thrd.ring = malloc();
 | 
|---|
| 207 |                 __cfadbg_print_safe(io_core, "Kernel I/O : Creating ring for io_context %p\n", &this);
 | 
|---|
| 208 |                 __io_create( *this.thrd.ring, params );
 | 
|---|
| 209 | 
 | 
|---|
| 210 |                 __cfadbg_print_safe(io_core, "Kernel I/O : Starting poller thread for io_context %p\n", &this);
 | 
|---|
| 211 |                 this.thrd.done = false;
 | 
|---|
| 212 |                 __thrd_start( this.thrd, main );
 | 
|---|
| 213 | 
 | 
|---|
| 214 |                 __cfadbg_print_safe(io_core, "Kernel I/O : io_context %p ready\n", &this);
 | 
|---|
| 215 |         }
 | 
|---|
| 216 | 
 | 
|---|
| 217 |         void ?{}(io_context & this, struct cluster & cl) {
 | 
|---|
| 218 |                 io_context_params params;
 | 
|---|
| 219 |                 (this){ cl, params };
 | 
|---|
| 220 |         }
 | 
|---|
| 221 | 
 | 
|---|
| 222 |         void ^?{}(io_context & this, bool cluster_context) {
 | 
|---|
| 223 |                 __cfadbg_print_safe(io_core, "Kernel I/O : tearing down io_context %p\n", &this);
 | 
|---|
| 224 | 
 | 
|---|
| 225 |                 // Notify the thread of the shutdown
 | 
|---|
| 226 |                 __atomic_store_n(&this.thrd.done, true, __ATOMIC_SEQ_CST);
 | 
|---|
| 227 | 
 | 
|---|
| 228 |                 // If this is an io_context within a cluster, things get trickier
 | 
|---|
| 229 |                 $thread & thrd = this.thrd.self;
 | 
|---|
| 230 |                 if( cluster_context ) {
 | 
|---|
| 231 |                         cluster & cltr = *thrd.curr_cluster;
 | 
|---|
| 232 |                         /* paranoid */ verify( cltr.idles.total == 0 || &cltr == mainCluster );
 | 
|---|
| 233 |                         /* paranoid */ verify( !ready_mutate_islocked() );
 | 
|---|
| 234 | 
 | 
|---|
| 235 |                         // We need to adjust the clean-up based on where the thread is
 | 
|---|
| 236 |                         if( thrd.state == Ready || thrd.preempted != __NO_PREEMPTION ) {
 | 
|---|
| 237 | 
 | 
|---|
| 238 |                                 ready_schedule_lock();
 | 
|---|
| 239 | 
 | 
|---|
| 240 |                                         // This is the tricky case
 | 
|---|
| 241 |                                         // The thread was preempted and now it is on the ready queue
 | 
|---|
| 242 |                                         // The thread should be the last on the list
 | 
|---|
| 243 |                                         /* paranoid */ verify( thrd.link.next != 0p );
 | 
|---|
| 244 | 
 | 
|---|
| 245 |                                         // Remove the thread from the ready queue of this cluster
 | 
|---|
| 246 |                                         __attribute__((unused)) bool removed = remove_head( &cltr, &thrd );
 | 
|---|
| 247 |                                         /* paranoid */ verify( removed );
 | 
|---|
| 248 |                                         thrd.link.next = 0p;
 | 
|---|
| 249 |                                         thrd.link.prev = 0p;
 | 
|---|
| 250 | 
 | 
|---|
| 251 |                                         // Fixup the thread state
 | 
|---|
| 252 |                                         thrd.state = Blocked;
 | 
|---|
| 253 |                                         thrd.ticket = TICKET_BLOCKED;
 | 
|---|
| 254 |                                         thrd.preempted = __NO_PREEMPTION;
 | 
|---|
| 255 | 
 | 
|---|
| 256 |                                 ready_schedule_unlock();
 | 
|---|
| 257 | 
 | 
|---|
| 258 |                                 // Pretend like the thread was blocked all along
 | 
|---|
| 259 |                         }
 | 
|---|
| 260 |                         // !!! This is not an else if !!!
 | 
|---|
| 261 |                         if( thrd.state == Blocked ) {
 | 
|---|
| 262 | 
 | 
|---|
| 263 |                                 // This is the "easy case"
 | 
|---|
| 264 |                                 // The thread is parked and can easily be moved to active cluster
 | 
|---|
| 265 |                                 verify( thrd.curr_cluster != active_cluster() || thrd.curr_cluster == mainCluster );
 | 
|---|
| 266 |                                 thrd.curr_cluster = active_cluster();
 | 
|---|
| 267 | 
 | 
|---|
| 268 |                                 // unpark the fast io_poller
 | 
|---|
| 269 |                                 unpark( &thrd );
 | 
|---|
| 270 |                         }
 | 
|---|
| 271 |                         else {
 | 
|---|
| 272 | 
 | 
|---|
| 273 |                                 // The thread is in a weird state
 | 
|---|
| 274 |                                 // I don't know what to do here
 | 
|---|
| 275 |                                 abort("io_context poller thread is in unexpected state, cannot clean-up correctly\n");
 | 
|---|
| 276 |                         }
 | 
|---|
| 277 |                 } else {
 | 
|---|
| 278 |                         post( this.thrd.sem );
 | 
|---|
| 279 |                 }
 | 
|---|
| 280 | 
 | 
|---|
| 281 |                 ^(this.thrd){};
 | 
|---|
| 282 |                 __cfadbg_print_safe(io_core, "Kernel I/O : Stopped poller thread for io_context %p\n", &this);
 | 
|---|
| 283 | 
 | 
|---|
| 284 |                 __io_destroy( *this.thrd.ring );
 | 
|---|
| 285 |                 __cfadbg_print_safe(io_core, "Kernel I/O : Destroyed ring for io_context %p\n", &this);
 | 
|---|
| 286 | 
 | 
|---|
| 287 |                 free(this.thrd.ring);
 | 
|---|
| 288 |         }
 | 
|---|
| 289 | 
 | 
|---|
| 290 |         void ^?{}(io_context & this) {
 | 
|---|
| 291 |                 ^(this){ false };
 | 
|---|
| 292 |         }
 | 
|---|
| 293 | 
 | 
|---|
| 294 |         static void __io_create( __io_data & this, const io_context_params & params_in ) {
 | 
|---|
| 295 |                 // Step 1 : call to setup
 | 
|---|
| 296 |                 struct io_uring_params params;
 | 
|---|
| 297 |                 memset(¶ms, 0, sizeof(params));
 | 
|---|
| 298 |                 if( params_in.poll_submit   ) params.flags |= IORING_SETUP_SQPOLL;
 | 
|---|
| 299 |                 if( params_in.poll_complete ) params.flags |= IORING_SETUP_IOPOLL;
 | 
|---|
| 300 | 
 | 
|---|
| 301 |                 __u32 nentries = params_in.num_entries != 0 ? params_in.num_entries : 256;
 | 
|---|
| 302 |                 if( !is_pow2(nentries) ) {
 | 
|---|
| 303 |                         abort("ERROR: I/O setup 'num_entries' must be a power of 2\n");
 | 
|---|
| 304 |                 }
 | 
|---|
| 305 |                 if( params_in.poller_submits && params_in.eager_submits ) {
 | 
|---|
| 306 |                         abort("ERROR: I/O setup 'poller_submits' and 'eager_submits' cannot be used together\n");
 | 
|---|
| 307 |                 }
 | 
|---|
| 308 | 
 | 
|---|
| 309 |                 int fd = syscall(__NR_io_uring_setup, nentries, ¶ms );
 | 
|---|
| 310 |                 if(fd < 0) {
 | 
|---|
| 311 |                         abort("KERNEL ERROR: IO_URING SETUP - %s\n", strerror(errno));
 | 
|---|
| 312 |                 }
 | 
|---|
| 313 | 
 | 
|---|
| 314 |                 // Step 2 : mmap result
 | 
|---|
| 315 |                 memset( &this, 0, sizeof(struct __io_data) );
 | 
|---|
| 316 |                 struct __submition_data  & sq = this.submit_q;
 | 
|---|
| 317 |                 struct __completion_data & cq = this.completion_q;
 | 
|---|
| 318 | 
 | 
|---|
| 319 |                 // calculate the right ring size
 | 
|---|
| 320 |                 sq.ring_sz = params.sq_off.array + (params.sq_entries * sizeof(unsigned)           );
 | 
|---|
| 321 |                 cq.ring_sz = params.cq_off.cqes  + (params.cq_entries * sizeof(struct io_uring_cqe));
 | 
|---|
| 322 | 
 | 
|---|
| 323 |                 // Requires features
 | 
|---|
| 324 |                 #if defined(IORING_FEAT_SINGLE_MMAP)
 | 
|---|
| 325 |                         // adjust the size according to the parameters
 | 
|---|
| 326 |                         if ((params.features & IORING_FEAT_SINGLE_MMAP) != 0) {
 | 
|---|
| 327 |                                 cq.ring_sz = sq.ring_sz = max(cq.ring_sz, sq.ring_sz);
 | 
|---|
| 328 |                         }
 | 
|---|
| 329 |                 #endif
 | 
|---|
| 330 | 
 | 
|---|
| 331 |                 // mmap the Submit Queue into existence
 | 
|---|
| 332 |                 sq.ring_ptr = mmap(0, sq.ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING);
 | 
|---|
| 333 |                 if (sq.ring_ptr == (void*)MAP_FAILED) {
 | 
|---|
| 334 |                         abort("KERNEL ERROR: IO_URING MMAP1 - %s\n", strerror(errno));
 | 
|---|
| 335 |                 }
 | 
|---|
| 336 | 
 | 
|---|
| 337 |                 // Requires features
 | 
|---|
| 338 |                 #if defined(IORING_FEAT_SINGLE_MMAP)
 | 
|---|
| 339 |                         // mmap the Completion Queue into existence (may or may not be needed)
 | 
|---|
| 340 |                         if ((params.features & IORING_FEAT_SINGLE_MMAP) != 0) {
 | 
|---|
| 341 |                                 cq.ring_ptr = sq.ring_ptr;
 | 
|---|
| 342 |                         }
 | 
|---|
| 343 |                         else
 | 
|---|
| 344 |                 #endif
 | 
|---|
| 345 |                 {
 | 
|---|
| 346 |                         // We need multiple call to MMAP
 | 
|---|
| 347 |                         cq.ring_ptr = mmap(0, cq.ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING);
 | 
|---|
| 348 |                         if (cq.ring_ptr == (void*)MAP_FAILED) {
 | 
|---|
| 349 |                                 munmap(sq.ring_ptr, sq.ring_sz);
 | 
|---|
| 350 |                                 abort("KERNEL ERROR: IO_URING MMAP2 - %s\n", strerror(errno));
 | 
|---|
| 351 |                         }
 | 
|---|
| 352 |                 }
 | 
|---|
| 353 | 
 | 
|---|
| 354 |                 // mmap the submit queue entries
 | 
|---|
| 355 |                 size_t size = params.sq_entries * sizeof(struct io_uring_sqe);
 | 
|---|
| 356 |                 sq.sqes = (struct io_uring_sqe *)mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES);
 | 
|---|
| 357 |                 if (sq.sqes == (struct io_uring_sqe *)MAP_FAILED) {
 | 
|---|
| 358 |                         munmap(sq.ring_ptr, sq.ring_sz);
 | 
|---|
| 359 |                         if (cq.ring_ptr != sq.ring_ptr) munmap(cq.ring_ptr, cq.ring_sz);
 | 
|---|
| 360 |                         abort("KERNEL ERROR: IO_URING MMAP3 - %s\n", strerror(errno));
 | 
|---|
| 361 |                 }
 | 
|---|
| 362 | 
 | 
|---|
| 363 |                 // Get the pointers from the kernel to fill the structure
 | 
|---|
| 364 |                 // submit queue
 | 
|---|
| 365 |                 sq.head    = (volatile __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.head);
 | 
|---|
| 366 |                 sq.tail    = (volatile __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.tail);
 | 
|---|
| 367 |                 sq.mask    = (   const __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_mask);
 | 
|---|
| 368 |                 sq.num     = (   const __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_entries);
 | 
|---|
| 369 |                 sq.flags   = (         __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.flags);
 | 
|---|
| 370 |                 sq.dropped = (         __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.dropped);
 | 
|---|
| 371 |                 sq.array   = (         __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.array);
 | 
|---|
| 372 |                 sq.prev_head = *sq.head;
 | 
|---|
| 373 | 
 | 
|---|
| 374 |                 {
 | 
|---|
| 375 |                         const __u32 num = *sq.num;
 | 
|---|
| 376 |                         for( i; num ) {
 | 
|---|
| 377 |                                 sq.sqes[i].user_data = 0ul64;
 | 
|---|
| 378 |                         }
 | 
|---|
| 379 |                 }
 | 
|---|
| 380 | 
 | 
|---|
| 381 |                 (sq.submit_lock){};
 | 
|---|
| 382 |                 (sq.release_lock){};
 | 
|---|
| 383 | 
 | 
|---|
| 384 |                 if( params_in.poller_submits || params_in.eager_submits ) {
 | 
|---|
| 385 |                         /* paranoid */ verify( is_pow2( params_in.num_ready ) || (params_in.num_ready < 8) );
 | 
|---|
| 386 |                         sq.ready_cnt = max( params_in.num_ready, 8 );
 | 
|---|
| 387 |                         sq.ready = alloc( sq.ready_cnt, 64`align );
 | 
|---|
| 388 |                         for(i; sq.ready_cnt) {
 | 
|---|
| 389 |                                 sq.ready[i] = -1ul32;
 | 
|---|
| 390 |                         }
 | 
|---|
| 391 |                         sq.prev_ready = 0;
 | 
|---|
| 392 |                 }
 | 
|---|
| 393 |                 else {
 | 
|---|
| 394 |                         sq.ready_cnt = 0;
 | 
|---|
| 395 |                         sq.ready = 0p;
 | 
|---|
| 396 |                         sq.prev_ready = 0;
 | 
|---|
| 397 |                 }
 | 
|---|
| 398 | 
 | 
|---|
| 399 |                 // completion queue
 | 
|---|
| 400 |                 cq.head      = (volatile __u32 *)(((intptr_t)cq.ring_ptr) + params.cq_off.head);
 | 
|---|
| 401 |                 cq.tail      = (volatile __u32 *)(((intptr_t)cq.ring_ptr) + params.cq_off.tail);
 | 
|---|
| 402 |                 cq.mask      = (   const __u32 *)(((intptr_t)cq.ring_ptr) + params.cq_off.ring_mask);
 | 
|---|
| 403 |                 cq.num       = (   const __u32 *)(((intptr_t)cq.ring_ptr) + params.cq_off.ring_entries);
 | 
|---|
| 404 |                 cq.overflow  = (         __u32 *)(((intptr_t)cq.ring_ptr) + params.cq_off.overflow);
 | 
|---|
| 405 |                 cq.cqes = (struct io_uring_cqe *)(((intptr_t)cq.ring_ptr) + params.cq_off.cqes);
 | 
|---|
| 406 | 
 | 
|---|
| 407 |                 // some paranoid checks
 | 
|---|
| 408 |                 /* paranoid */ verifyf( (*cq.mask) == ((*cq.num) - 1ul32), "IO_URING Expected mask to be %u (%u entries), was %u", (*cq.num) - 1ul32, *cq.num, *cq.mask  );
 | 
|---|
| 409 |                 /* paranoid */ verifyf( (*cq.num)  >= nentries, "IO_URING Expected %u entries, got %u", nentries, *cq.num );
 | 
|---|
| 410 |                 /* paranoid */ verifyf( (*cq.head) == 0, "IO_URING Expected head to be 0, got %u", *cq.head );
 | 
|---|
| 411 |                 /* paranoid */ verifyf( (*cq.tail) == 0, "IO_URING Expected tail to be 0, got %u", *cq.tail );
 | 
|---|
| 412 | 
 | 
|---|
| 413 |                 /* paranoid */ verifyf( (*sq.mask) == ((*sq.num) - 1ul32), "IO_URING Expected mask to be %u (%u entries), was %u", (*sq.num) - 1ul32, *sq.num, *sq.mask );
 | 
|---|
| 414 |                 /* paranoid */ verifyf( (*sq.num) >= nentries, "IO_URING Expected %u entries, got %u", nentries, *sq.num );
 | 
|---|
| 415 |                 /* paranoid */ verifyf( (*sq.head) == 0, "IO_URING Expected head to be 0, got %u", *sq.head );
 | 
|---|
| 416 |                 /* paranoid */ verifyf( (*sq.tail) == 0, "IO_URING Expected tail to be 0, got %u", *sq.tail );
 | 
|---|
| 417 | 
 | 
|---|
| 418 |                 // Update the global ring info
 | 
|---|
| 419 |                 this.ring_flags = params.flags;
 | 
|---|
| 420 |                 this.fd         = fd;
 | 
|---|
| 421 |                 this.eager_submits  = params_in.eager_submits;
 | 
|---|
| 422 |                 this.poller_submits = params_in.poller_submits;
 | 
|---|
| 423 |         }
 | 
|---|
| 424 | 
 | 
|---|
| 425 |         static void __io_destroy( __io_data & this ) {
 | 
|---|
| 426 |                 // Shutdown the io rings
 | 
|---|
| 427 |                 struct __submition_data  & sq = this.submit_q;
 | 
|---|
| 428 |                 struct __completion_data & cq = this.completion_q;
 | 
|---|
| 429 | 
 | 
|---|
| 430 |                 // unmap the submit queue entries
 | 
|---|
| 431 |                 munmap(sq.sqes, (*sq.num) * sizeof(struct io_uring_sqe));
 | 
|---|
| 432 | 
 | 
|---|
| 433 |                 // unmap the Submit Queue ring
 | 
|---|
| 434 |                 munmap(sq.ring_ptr, sq.ring_sz);
 | 
|---|
| 435 | 
 | 
|---|
| 436 |                 // unmap the Completion Queue ring, if it is different
 | 
|---|
| 437 |                 if (cq.ring_ptr != sq.ring_ptr) {
 | 
|---|
| 438 |                         munmap(cq.ring_ptr, cq.ring_sz);
 | 
|---|
| 439 |                 }
 | 
|---|
| 440 | 
 | 
|---|
| 441 |                 // close the file descriptor
 | 
|---|
| 442 |                 close(this.fd);
 | 
|---|
| 443 | 
 | 
|---|
| 444 |                 free( this.submit_q.ready ); // Maybe null, doesn't matter
 | 
|---|
| 445 |         }
 | 
|---|
| 446 | 
 | 
|---|
| 447 | //=============================================================================================
 | 
|---|
| 448 | // I/O Context Sleep
 | 
|---|
| 449 | //=============================================================================================
 | 
|---|
| 450 | 
 | 
|---|
| 451 |         void __ioctx_register($io_ctx_thread & ctx, struct epoll_event & ev) {
 | 
|---|
| 452 |                 ev.events = EPOLLIN | EPOLLONESHOT;
 | 
|---|
| 453 |                 ev.data.u64 = (__u64)&ctx;
 | 
|---|
| 454 |                 int ret = epoll_ctl(iopoll.epollfd, EPOLL_CTL_ADD, ctx.ring->fd, &ev);
 | 
|---|
| 455 |                 if (ret < 0) {
 | 
|---|
| 456 |                         abort( "KERNEL ERROR: EPOLL ADD - (%d) %s\n", (int)errno, strerror(errno) );
 | 
|---|
| 457 |                 }
 | 
|---|
| 458 |         }
 | 
|---|
| 459 | 
 | 
|---|
| 460 |         void __ioctx_prepare_block($io_ctx_thread & ctx, struct epoll_event & ev) {
 | 
|---|
| 461 |                 int ret = epoll_ctl(iopoll.epollfd, EPOLL_CTL_MOD, ctx.ring->fd, &ev);
 | 
|---|
| 462 |                 if (ret < 0) {
 | 
|---|
| 463 |                         abort( "KERNEL ERROR: EPOLL REARM - (%d) %s\n", (int)errno, strerror(errno) );
 | 
|---|
| 464 |                 }
 | 
|---|
| 465 |         }
 | 
|---|
| 466 | 
 | 
|---|
| 467 | //=============================================================================================
 | 
|---|
| 468 | // I/O Context Misc Setup
 | 
|---|
| 469 | //=============================================================================================
 | 
|---|
| 470 |         void register_fixed_files( io_context & ctx, int * files, unsigned count ) {
 | 
|---|
| 471 |                 int ret = syscall( __NR_io_uring_register, ctx.thrd.ring->fd, IORING_REGISTER_FILES, files, count );
 | 
|---|
| 472 |                 if( ret < 0 ) {
 | 
|---|
| 473 |                         abort( "KERNEL ERROR: IO_URING SYSCALL - (%d) %s\n", (int)errno, strerror(errno) );
 | 
|---|
| 474 |                 }
 | 
|---|
| 475 | 
 | 
|---|
| 476 |                 __cfadbg_print_safe( io_core, "Kernel I/O : Performed io_register for %p, returned %d\n", active_thread(), ret );
 | 
|---|
| 477 |         }
 | 
|---|
| 478 | 
 | 
|---|
| 479 |         void register_fixed_files( cluster & cltr, int * files, unsigned count ) {
 | 
|---|
| 480 |                 for(i; cltr.io.cnt) {
 | 
|---|
| 481 |                         register_fixed_files( cltr.io.ctxs[i], files, count );
 | 
|---|
| 482 |                 }
 | 
|---|
| 483 |         }
 | 
|---|
| 484 | #endif
 | 
|---|