- File:
-
- 1 edited
-
libcfa/src/concurrency/io/setup.cfa (modified) (12 diffs)
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/concurrency/io/setup.cfa
r2fab24e3 r11054eb 26 26 27 27 #if !defined(CFA_HAVE_LINUX_IO_URING_H) 28 void __kernel_io_startup() {29 // Nothing to do without io_uring30 }31 32 void __kernel_io_shutdown() {33 // Nothing to do without io_uring34 }35 36 28 void ?{}(io_context_params & this) {} 37 29 38 void ?{}(io_context & this, struct cluster & cl) {} 39 void ?{}(io_context & this, struct cluster & cl, const io_context_params & params) {} 40 41 void ^?{}(io_context & this) {} 42 void ^?{}(io_context & this, bool cluster_context) {} 43 44 void register_fixed_files( io_context &, int *, unsigned ) {} 45 void register_fixed_files( cluster &, int *, unsigned ) {} 30 void ?{}($io_context & this, struct cluster & cl) {} 31 void ^?{}($io_context & this) {} 32 33 void __cfa_io_start( processor * proc ) {} 34 void __cfa_io_flush( processor * proc ) {} 35 void __cfa_io_stop ( processor * proc ) {} 36 37 $io_arbiter * create(void) { return 0p; } 38 void destroy($io_arbiter *) {} 46 39 47 40 #else … … 68 61 void ?{}(io_context_params & this) { 69 62 this.num_entries = 256; 70 this.num_ready = 256;71 this.submit_aff = -1;72 this.eager_submits = false;73 this.poller_submits = false;74 this.poll_submit = false;75 this.poll_complete = false;76 63 } 77 64 … … 106 93 107 94 //============================================================================================= 108 // I/O Startup / Shutdown logic + Master Poller109 //=============================================================================================110 111 // IO Master poller loop forward112 static void * iopoll_loop( __attribute__((unused)) void * args );113 114 static struct {115 pthread_t thrd; // pthread handle to io poller thread116 void * stack; // pthread stack for io poller thread117 int epollfd; // file descriptor to the epoll instance118 volatile bool run; // Whether or not to continue119 volatile bool stopped; // Whether the poller has finished running120 volatile uint64_t epoch; // Epoch used for memory reclamation121 } iopoll;122 123 void __kernel_io_startup(void) {124 __cfadbg_print_safe(io_core, "Kernel : Creating EPOLL instance\n" );125 126 iopoll.epollfd = epoll_create1(0);127 if (iopoll.epollfd == -1) {128 abort( "internal error, epoll_create1\n");129 }130 131 __cfadbg_print_safe(io_core, "Kernel : Starting io poller thread\n" );132 133 iopoll.stack = __create_pthread( &iopoll.thrd, iopoll_loop, 0p );134 iopoll.run = true;135 iopoll.stopped = false;136 iopoll.epoch = 0;137 }138 139 void __kernel_io_shutdown(void) {140 // Notify the io poller thread of the shutdown141 iopoll.run = false;142 sigval val = { 1 };143 pthread_sigqueue( iopoll.thrd, SIGUSR1, val );144 145 // Wait for the io poller thread to finish146 147 __destroy_pthread( iopoll.thrd, iopoll.stack, 0p );148 149 int ret = close(iopoll.epollfd);150 if (ret == -1) {151 abort( "internal error, close epoll\n");152 }153 154 // Io polling is now fully stopped155 156 __cfadbg_print_safe(io_core, "Kernel : IO poller stopped\n" );157 }158 159 static void * iopoll_loop( __attribute__((unused)) void * args ) {160 __processor_id_t id;161 id.full_proc = false;162 id.id = doregister(&id);163 __cfaabi_tls.this_proc_id = &id;164 __cfadbg_print_safe(io_core, "Kernel : IO poller thread starting\n" );165 166 // Block signals to control when they arrive167 sigset_t mask;168 sigfillset(&mask);169 if ( pthread_sigmask( SIG_BLOCK, &mask, 0p ) == -1 ) {170 abort( "internal error, pthread_sigmask" );171 }172 173 sigdelset( &mask, SIGUSR1 );174 175 // Create sufficient events176 struct epoll_event events[10];177 // Main loop178 while( iopoll.run ) {179 __cfadbg_print_safe(io_core, "Kernel I/O - epoll : waiting on io_uring contexts\n");180 181 // increment the epoch to notify any deleters we are starting a new cycle182 __atomic_fetch_add(&iopoll.epoch, 1, __ATOMIC_SEQ_CST);183 184 // Wait for events185 int nfds = epoll_pwait( iopoll.epollfd, events, 10, -1, &mask );186 187 __cfadbg_print_safe(io_core, "Kernel I/O - epoll : %d io contexts events, waking up\n", nfds);188 189 // Check if an error occured190 if (nfds == -1) {191 if( errno == EINTR ) continue;192 abort( "internal error, pthread_sigmask" );193 }194 195 for(i; nfds) {196 $io_ctx_thread * io_ctx = ($io_ctx_thread *)(uintptr_t)events[i].data.u64;197 /* paranoid */ verify( io_ctx );198 __cfadbg_print_safe(io_core, "Kernel I/O - epoll : Unparking io poller %d (%p)\n", io_ctx->ring->fd, io_ctx);199 #if !defined( __CFA_NO_STATISTICS__ )200 __cfaabi_tls.this_stats = io_ctx->self.curr_cluster->stats;201 #endif202 203 eventfd_t v;204 eventfd_read(io_ctx->ring->efd, &v);205 206 post( io_ctx->sem );207 }208 }209 210 __atomic_store_n(&iopoll.stopped, true, __ATOMIC_SEQ_CST);211 212 __cfadbg_print_safe(io_core, "Kernel : IO poller thread stopping\n" );213 unregister(&id);214 return 0p;215 }216 217 //=============================================================================================218 95 // I/O Context Constrution/Destruction 219 96 //============================================================================================= 220 97 221 void ?{}($io_ctx_thread & this, struct cluster & cl) { (this.self){ "IO Poller", cl }; } 222 void main( $io_ctx_thread & this ); 223 static inline $thread * get_thread( $io_ctx_thread & this ) { return &this.self; } 224 void ^?{}( $io_ctx_thread & mutex this ) {} 225 226 static void __io_create ( __io_data & this, const io_context_params & params_in ); 227 static void __io_destroy( __io_data & this ); 228 229 void ?{}(io_context & this, struct cluster & cl, const io_context_params & params) { 230 (this.thrd){ cl }; 231 this.thrd.ring = malloc(); 232 __cfadbg_print_safe(io_core, "Kernel I/O : Creating ring for io_context %p\n", &this); 233 __io_create( *this.thrd.ring, params ); 234 235 __cfadbg_print_safe(io_core, "Kernel I/O : Starting poller thread for io_context %p\n", &this); 236 this.thrd.done = false; 237 __thrd_start( this.thrd, main ); 238 239 __cfadbg_print_safe(io_core, "Kernel I/O : io_context %p ready\n", &this); 240 } 241 242 void ?{}(io_context & this, struct cluster & cl) { 243 io_context_params params; 244 (this){ cl, params }; 245 } 246 247 void ^?{}(io_context & this, bool cluster_context) { 248 __cfadbg_print_safe(io_core, "Kernel I/O : tearing down io_context %p\n", &this); 249 250 // Notify the thread of the shutdown 251 __atomic_store_n(&this.thrd.done, true, __ATOMIC_SEQ_CST); 252 253 // If this is an io_context within a cluster, things get trickier 254 $thread & thrd = this.thrd.self; 255 if( cluster_context ) { 256 // We are about to do weird things with the threads 257 // we don't need interrupts to complicate everything 258 disable_interrupts(); 259 260 // Get cluster info 261 cluster & cltr = *thrd.curr_cluster; 262 /* paranoid */ verify( cltr.idles.total == 0 || &cltr == mainCluster ); 263 /* paranoid */ verify( !ready_mutate_islocked() ); 264 265 // We need to adjust the clean-up based on where the thread is 266 if( thrd.state == Ready || thrd.preempted != __NO_PREEMPTION ) { 267 // This is the tricky case 268 // The thread was preempted or ready to run and now it is on the ready queue 269 // but the cluster is shutting down, so there aren't any processors to run the ready queue 270 // the solution is to steal the thread from the ready-queue and pretend it was blocked all along 271 272 ready_schedule_lock(); 273 // The thread should on the list 274 /* paranoid */ verify( thrd.link.next != 0p ); 275 276 // Remove the thread from the ready queue of this cluster 277 // The thread should be the last on the list 278 __attribute__((unused)) bool removed = remove_head( &cltr, &thrd ); 279 /* paranoid */ verify( removed ); 280 thrd.link.next = 0p; 281 thrd.link.prev = 0p; 282 283 // Fixup the thread state 284 thrd.state = Blocked; 285 thrd.ticket = TICKET_BLOCKED; 286 thrd.preempted = __NO_PREEMPTION; 287 288 ready_schedule_unlock(); 289 290 // Pretend like the thread was blocked all along 291 } 292 // !!! This is not an else if !!! 293 // Ok, now the thread is blocked (whether we cheated to get here or not) 294 if( thrd.state == Blocked ) { 295 // This is the "easy case" 296 // The thread is parked and can easily be moved to active cluster 297 verify( thrd.curr_cluster != active_cluster() || thrd.curr_cluster == mainCluster ); 298 thrd.curr_cluster = active_cluster(); 299 300 // unpark the fast io_poller 301 unpark( &thrd ); 302 } 303 else { 304 // The thread is in a weird state 305 // I don't know what to do here 306 abort("io_context poller thread is in unexpected state, cannot clean-up correctly\n"); 307 } 308 309 // The weird thread kidnapping stuff is over, restore interrupts. 310 enable_interrupts( __cfaabi_dbg_ctx ); 311 } else { 312 post( this.thrd.sem ); 313 } 314 315 ^(this.thrd){}; 316 __cfadbg_print_safe(io_core, "Kernel I/O : Stopped poller thread for io_context %p\n", &this); 317 318 __io_destroy( *this.thrd.ring ); 319 __cfadbg_print_safe(io_core, "Kernel I/O : Destroyed ring for io_context %p\n", &this); 320 321 free(this.thrd.ring); 322 } 323 324 void ^?{}(io_context & this) { 325 ^(this){ false }; 98 99 100 static void __io_uring_setup ( $io_context & this, const io_context_params & params_in, int procfd ); 101 static void __io_uring_teardown( $io_context & this ); 102 static void __epoll_register($io_context & ctx); 103 static void __epoll_unregister($io_context & ctx); 104 void __ioarbiter_register( $io_arbiter & mutex, $io_context & ctx ); 105 void __ioarbiter_unregister( $io_arbiter & mutex, $io_context & ctx ); 106 107 void ?{}($io_context & this, processor * proc, struct cluster & cl) { 108 /* paranoid */ verify( cl.io.arbiter ); 109 this.proc = proc; 110 this.arbiter = cl.io.arbiter; 111 this.ext_sq.empty = true; 112 (this.ext_sq.queue){}; 113 __io_uring_setup( this, cl.io.params, proc->idle ); 114 __cfadbg_print_safe(io_core, "Kernel I/O : Created ring for io_context %u (%p)\n", this.fd, &this); 115 } 116 117 void ^?{}($io_context & this) { 118 __cfadbg_print_safe(io_core, "Kernel I/O : tearing down io_context %u\n", this.fd); 119 120 __io_uring_teardown( this ); 121 __cfadbg_print_safe(io_core, "Kernel I/O : Destroyed ring for io_context %u\n", this.fd); 326 122 } 327 123 … … 329 125 extern void __enable_interrupts_hard(); 330 126 331 static void __io_ create( __io_data & this, const io_context_params & params_in) {127 static void __io_uring_setup( $io_context & this, const io_context_params & params_in, int procfd ) { 332 128 // Step 1 : call to setup 333 129 struct io_uring_params params; 334 130 memset(¶ms, 0, sizeof(params)); 335 if( params_in.poll_submit ) params.flags |= IORING_SETUP_SQPOLL;336 if( params_in.poll_complete ) params.flags |= IORING_SETUP_IOPOLL;131 // if( params_in.poll_submit ) params.flags |= IORING_SETUP_SQPOLL; 132 // if( params_in.poll_complete ) params.flags |= IORING_SETUP_IOPOLL; 337 133 338 134 __u32 nentries = params_in.num_entries != 0 ? params_in.num_entries : 256; … … 340 136 abort("ERROR: I/O setup 'num_entries' must be a power of 2\n"); 341 137 } 342 if( params_in.poller_submits && params_in.eager_submits ) {343 abort("ERROR: I/O setup 'poller_submits' and 'eager_submits' cannot be used together\n");344 }345 138 346 139 int fd = syscall(__NR_io_uring_setup, nentries, ¶ms ); … … 350 143 351 144 // Step 2 : mmap result 352 memset( &this, 0, sizeof(struct __io_data) ); 353 struct __submition_data & sq = this.submit_q; 354 struct __completion_data & cq = this.completion_q; 145 struct __sub_ring_t & sq = this.sq; 146 struct __cmp_ring_t & cq = this.cq; 355 147 356 148 // calculate the right ring size … … 401 193 // Get the pointers from the kernel to fill the structure 402 194 // submit queue 403 sq.head = (volatile __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.head); 404 sq.tail = (volatile __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.tail); 405 sq.mask = ( const __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_mask); 406 sq.num = ( const __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_entries); 407 sq.flags = ( __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.flags); 408 sq.dropped = ( __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.dropped); 409 sq.array = ( __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.array); 410 sq.prev_head = *sq.head; 411 412 { 413 const __u32 num = *sq.num; 414 for( i; num ) { 415 __sqe_clean( &sq.sqes[i] ); 416 } 417 } 418 419 (sq.submit_lock){}; 420 (sq.release_lock){}; 421 422 if( params_in.poller_submits || params_in.eager_submits ) { 423 /* paranoid */ verify( is_pow2( params_in.num_ready ) || (params_in.num_ready < 8) ); 424 sq.ready_cnt = max( params_in.num_ready, 8 ); 425 sq.ready = alloc( sq.ready_cnt, 64`align ); 426 for(i; sq.ready_cnt) { 427 sq.ready[i] = -1ul32; 428 } 429 sq.prev_ready = 0; 430 } 431 else { 432 sq.ready_cnt = 0; 433 sq.ready = 0p; 434 sq.prev_ready = 0; 435 } 195 sq.kring.head = (volatile __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.head); 196 sq.kring.tail = (volatile __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.tail); 197 sq.kring.array = ( __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.array); 198 sq.mask = ( const __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_mask); 199 sq.num = ( const __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_entries); 200 sq.flags = ( __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.flags); 201 sq.dropped = ( __u32 *)(((intptr_t)sq.ring_ptr) + params.sq_off.dropped); 202 203 sq.kring.released = 0; 204 205 sq.free_ring.head = 0; 206 sq.free_ring.tail = *sq.num; 207 sq.free_ring.array = alloc( *sq.num, 128`align ); 208 for(i; (__u32)*sq.num) { 209 sq.free_ring.array[i] = i; 210 } 211 212 sq.to_submit = 0; 436 213 437 214 // completion queue … … 446 223 // io_uring_register is so f*cking slow on some machine that it 447 224 // will never succeed if preemption isn't hard blocked 225 __cfadbg_print_safe(io_core, "Kernel I/O : registering %d for completion with ring %d\n", procfd, fd); 226 448 227 __disable_interrupts_hard(); 449 228 450 int efd = eventfd(0, 0); 451 if (efd < 0) { 452 abort("KERNEL ERROR: IO_URING EVENTFD - %s\n", strerror(errno)); 453 } 454 455 int ret = syscall( __NR_io_uring_register, fd, IORING_REGISTER_EVENTFD, &efd, 1); 229 int ret = syscall( __NR_io_uring_register, fd, IORING_REGISTER_EVENTFD, &procfd, 1); 456 230 if (ret < 0) { 457 231 abort("KERNEL ERROR: IO_URING EVENTFD REGISTER - %s\n", strerror(errno)); … … 459 233 460 234 __enable_interrupts_hard(); 235 236 __cfadbg_print_safe(io_core, "Kernel I/O : registered %d for completion with ring %d\n", procfd, fd); 461 237 462 238 // some paranoid checks … … 468 244 /* paranoid */ verifyf( (*sq.mask) == ((*sq.num) - 1ul32), "IO_URING Expected mask to be %u (%u entries), was %u", (*sq.num) - 1ul32, *sq.num, *sq.mask ); 469 245 /* paranoid */ verifyf( (*sq.num) >= nentries, "IO_URING Expected %u entries, got %u", nentries, *sq.num ); 470 /* paranoid */ verifyf( (*sq. head) == 0, "IO_URING Expected head to be 0, got %u", *sq.head );471 /* paranoid */ verifyf( (*sq. tail) == 0, "IO_URING Expected tail to be 0, got %u", *sq.tail );246 /* paranoid */ verifyf( (*sq.kring.head) == 0, "IO_URING Expected head to be 0, got %u", *sq.kring.head ); 247 /* paranoid */ verifyf( (*sq.kring.tail) == 0, "IO_URING Expected tail to be 0, got %u", *sq.kring.tail ); 472 248 473 249 // Update the global ring info 474 this.ring_flags = params.flags;250 this.ring_flags = 0; 475 251 this.fd = fd; 476 this.efd = efd; 477 this.eager_submits = params_in.eager_submits; 478 this.poller_submits = params_in.poller_submits; 479 } 480 481 static void __io_destroy( __io_data & this ) { 252 } 253 254 static void __io_uring_teardown( $io_context & this ) { 482 255 // Shutdown the io rings 483 struct __sub mition_data & sq = this.submit_q;484 struct __c ompletion_data & cq = this.completion_q;256 struct __sub_ring_t & sq = this.sq; 257 struct __cmp_ring_t & cq = this.cq; 485 258 486 259 // unmap the submit queue entries … … 497 270 // close the file descriptor 498 271 close(this.fd); 499 close(this.efd); 500 501 free( this.submit_q.ready ); // Maybe null, doesn't matter 272 273 free( this.sq.free_ring.array ); // Maybe null, doesn't matter 274 } 275 276 void __cfa_io_start( processor * proc ) { 277 proc->io.ctx = alloc(); 278 (*proc->io.ctx){proc, *proc->cltr}; 279 } 280 void __cfa_io_stop ( processor * proc ) { 281 ^(*proc->io.ctx){}; 282 free(proc->io.ctx); 502 283 } 503 284 … … 505 286 // I/O Context Sleep 506 287 //============================================================================================= 507 static inline void __ioctx_epoll_ctl($io_ctx_thread & ctx, int op, const char * error) { 508 struct epoll_event ev; 509 ev.events = EPOLLIN | EPOLLONESHOT; 510 ev.data.u64 = (__u64)&ctx; 511 int ret = epoll_ctl(iopoll.epollfd, op, ctx.ring->efd, &ev); 512 if (ret < 0) { 513 abort( "KERNEL ERROR: EPOLL %s - (%d) %s\n", error, (int)errno, strerror(errno) ); 514 } 515 } 516 517 void __ioctx_register($io_ctx_thread & ctx) { 518 __ioctx_epoll_ctl(ctx, EPOLL_CTL_ADD, "ADD"); 519 } 520 521 void __ioctx_prepare_block($io_ctx_thread & ctx) { 522 __cfadbg_print_safe(io_core, "Kernel I/O - epoll : Re-arming io poller %d (%p)\n", ctx.ring->fd, &ctx); 523 __ioctx_epoll_ctl(ctx, EPOLL_CTL_MOD, "REARM"); 524 } 525 526 void __ioctx_unregister($io_ctx_thread & ctx) { 527 // Read the current epoch so we know when to stop 528 size_t curr = __atomic_load_n(&iopoll.epoch, __ATOMIC_SEQ_CST); 529 530 // Remove the fd from the iopoller 531 __ioctx_epoll_ctl(ctx, EPOLL_CTL_DEL, "REMOVE"); 532 533 // Notify the io poller thread of the shutdown 534 iopoll.run = false; 535 sigval val = { 1 }; 536 pthread_sigqueue( iopoll.thrd, SIGUSR1, val ); 537 538 // Make sure all this is done 539 __atomic_thread_fence(__ATOMIC_SEQ_CST); 540 541 // Wait for the next epoch 542 while(curr == iopoll.epoch && !iopoll.stopped) Pause(); 543 } 288 // static inline void __epoll_ctl($io_context & ctx, int op, const char * error) { 289 // struct epoll_event ev; 290 // ev.events = EPOLLIN | EPOLLONESHOT; 291 // ev.data.u64 = (__u64)&ctx; 292 // int ret = epoll_ctl(iopoll.epollfd, op, ctx.efd, &ev); 293 // if (ret < 0) { 294 // abort( "KERNEL ERROR: EPOLL %s - (%d) %s\n", error, (int)errno, strerror(errno) ); 295 // } 296 // } 297 298 // static void __epoll_register($io_context & ctx) { 299 // __epoll_ctl(ctx, EPOLL_CTL_ADD, "ADD"); 300 // } 301 302 // static void __epoll_unregister($io_context & ctx) { 303 // // Read the current epoch so we know when to stop 304 // size_t curr = __atomic_load_n(&iopoll.epoch, __ATOMIC_SEQ_CST); 305 306 // // Remove the fd from the iopoller 307 // __epoll_ctl(ctx, EPOLL_CTL_DEL, "REMOVE"); 308 309 // // Notify the io poller thread of the shutdown 310 // iopoll.run = false; 311 // sigval val = { 1 }; 312 // pthread_sigqueue( iopoll.thrd, SIGUSR1, val ); 313 314 // // Make sure all this is done 315 // __atomic_thread_fence(__ATOMIC_SEQ_CST); 316 317 // // Wait for the next epoch 318 // while(curr == iopoll.epoch && !iopoll.stopped) Pause(); 319 // } 320 321 // void __ioctx_prepare_block($io_context & ctx) { 322 // __cfadbg_print_safe(io_core, "Kernel I/O - epoll : Re-arming io poller %d (%p)\n", ctx.fd, &ctx); 323 // __epoll_ctl(ctx, EPOLL_CTL_MOD, "REARM"); 324 // } 325 544 326 545 327 //============================================================================================= 546 328 // I/O Context Misc Setup 547 329 //============================================================================================= 548 void register_fixed_files( io_context & ctx, int * files, unsigned count ) { 549 int ret = syscall( __NR_io_uring_register, ctx.thrd.ring->fd, IORING_REGISTER_FILES, files, count ); 550 if( ret < 0 ) { 551 abort( "KERNEL ERROR: IO_URING REGISTER - (%d) %s\n", (int)errno, strerror(errno) ); 552 } 553 554 __cfadbg_print_safe( io_core, "Kernel I/O : Performed io_register for %p, returned %d\n", active_thread(), ret ); 555 } 556 557 void register_fixed_files( cluster & cltr, int * files, unsigned count ) { 558 for(i; cltr.io.cnt) { 559 register_fixed_files( cltr.io.ctxs[i], files, count ); 560 } 561 } 330 void ?{}( $io_arbiter & this ) { 331 this.pending.empty = true; 332 } 333 334 void ^?{}( $io_arbiter & this ) {} 335 336 $io_arbiter * create(void) { 337 return new(); 338 } 339 void destroy($io_arbiter * arbiter) { 340 delete(arbiter); 341 } 342 343 //============================================================================================= 344 // I/O Context Misc Setup 345 //============================================================================================= 346 562 347 #endif
Note:
See TracChangeset
for help on using the changeset viewer.