Changes in libcfa/src/concurrency/io.cfa [08a994e:05cfa4d]
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/concurrency/io.cfa
r08a994e r05cfa4d 20 20 21 21 #if !defined(HAVE_LINUX_IO_URING_H) 22 void __kernel_io_startup( cluster &, bool ) {22 void __kernel_io_startup( cluster &, int, bool ) { 23 23 // Nothing to do without io_uring 24 24 } … … 86 86 #endif 87 87 88 #if defined(__CFA_IO_POLLING_USER__) 89 void ?{}( __io_poller_fast & this, struct cluster & cltr ) { 90 this.ring = &cltr.io; 91 (this.thrd){ "Fast I/O Poller", cltr }; 92 } 93 void ^?{}( __io_poller_fast & mutex this ); 94 void main( __io_poller_fast & this ); 95 static inline $thread * get_thread( __io_poller_fast & this ) { return &this.thrd; } 96 void ^?{}( __io_poller_fast & mutex this ) {} 97 #endif 88 // Fast poller user-thread 89 // Not using the "thread" keyword because we want to control 90 // more carefully when to start/stop it 91 struct __io_poller_fast { 92 struct __io_data * ring; 93 bool waiting; 94 $thread thrd; 95 }; 96 97 void ?{}( __io_poller_fast & this, struct cluster & cltr ) { 98 this.ring = cltr.io; 99 this.waiting = true; 100 (this.thrd){ "Fast I/O Poller", cltr }; 101 } 102 void ^?{}( __io_poller_fast & mutex this ); 103 void main( __io_poller_fast & this ); 104 static inline $thread * get_thread( __io_poller_fast & this ) { return &this.thrd; } 105 void ^?{}( __io_poller_fast & mutex this ) {} 106 107 struct __submition_data { 108 // Head and tail of the ring (associated with array) 109 volatile uint32_t * head; 110 volatile uint32_t * tail; 111 112 // The actual kernel ring which uses head/tail 113 // indexes into the sqes arrays 114 uint32_t * array; 115 116 // number of entries and mask to go with it 117 const uint32_t * num; 118 const uint32_t * mask; 119 120 // Submission flags (Not sure what for) 121 uint32_t * flags; 122 123 // number of sqes not submitted (whatever that means) 124 uint32_t * dropped; 125 126 // Like head/tail but not seen by the kernel 127 volatile uint32_t alloc; 128 volatile uint32_t ready; 129 130 __spinlock_t lock; 131 132 // A buffer of sqes (not the actual ring) 133 struct io_uring_sqe * sqes; 134 135 // The location and size of the mmaped area 136 void * ring_ptr; 137 size_t ring_sz; 138 139 // Statistics 140 #if !defined(__CFA_NO_STATISTICS__) 141 struct { 142 struct { 143 volatile unsigned long long int val; 144 volatile unsigned long long int cnt; 145 volatile unsigned long long int block; 146 } submit_avg; 147 } stats; 148 #endif 149 }; 150 151 struct __completion_data { 152 // Head and tail of the ring 153 volatile uint32_t * head; 154 volatile uint32_t * tail; 155 156 // number of entries and mask to go with it 157 const uint32_t * mask; 158 const uint32_t * num; 159 160 // number of cqes not submitted (whatever that means) 161 uint32_t * overflow; 162 163 // the kernel ring 164 struct io_uring_cqe * cqes; 165 166 // The location and size of the mmaped area 167 void * ring_ptr; 168 size_t ring_sz; 169 170 // Statistics 171 #if !defined(__CFA_NO_STATISTICS__) 172 struct { 173 struct { 174 unsigned long long int val; 175 unsigned long long int slow_cnt; 176 unsigned long long int fast_cnt; 177 } completed_avg; 178 } stats; 179 #endif 180 }; 181 182 struct __io_data { 183 struct __submition_data submit_q; 184 struct __completion_data completion_q; 185 uint32_t ring_flags; 186 int cltr_flags; 187 int fd; 188 semaphore submit; 189 volatile bool done; 190 struct { 191 struct { 192 void * stack; 193 pthread_t kthrd; 194 } slow; 195 __io_poller_fast fast; 196 __bin_sem_t sem; 197 } poller; 198 }; 98 199 99 200 //============================================================================================= 100 201 // I/O Startup / Shutdown logic 101 202 //============================================================================================= 102 void __kernel_io_startup( cluster & this, bool main_cluster ) { 203 void __kernel_io_startup( cluster & this, int io_flags, bool main_cluster ) { 204 this.io = malloc(); 205 103 206 // Step 1 : call to setup 104 207 struct io_uring_params params; … … 113 216 114 217 // Step 2 : mmap result 115 memset( &this.io, 0, sizeof(struct io_ring));116 struct io_uring_sq & sq = this.io.submit_q;117 struct io_uring_cq & cq = this.io.completion_q;218 memset( this.io, 0, sizeof(struct __io_data) ); 219 struct __submition_data & sq = this.io->submit_q; 220 struct __completion_data & cq = this.io->completion_q; 118 221 119 222 // calculate the right ring size … … 193 296 194 297 // Update the global ring info 195 this.io.flags = params.flags; 196 this.io.fd = fd; 197 this.io.done = false; 198 (this.io.submit){ min(*sq.num, *cq.num) }; 298 this.io->ring_flags = params.flags; 299 this.io->cltr_flags = io_flags; 300 this.io->fd = fd; 301 this.io->done = false; 302 (this.io->submit){ min(*sq.num, *cq.num) }; 199 303 200 304 // Initialize statistics 201 305 #if !defined(__CFA_NO_STATISTICS__) 202 this.io.submit_q.stats.submit_avg.val = 0; 203 this.io.submit_q.stats.submit_avg.cnt = 0; 204 this.io.completion_q.stats.completed_avg.val = 0; 205 this.io.completion_q.stats.completed_avg.cnt = 0; 306 this.io->submit_q.stats.submit_avg.val = 0; 307 this.io->submit_q.stats.submit_avg.cnt = 0; 308 this.io->submit_q.stats.submit_avg.block = 0; 309 this.io->completion_q.stats.completed_avg.val = 0; 310 this.io->completion_q.stats.completed_avg.slow_cnt = 0; 311 this.io->completion_q.stats.completed_avg.fast_cnt = 0; 206 312 #endif 207 313 … … 212 318 213 319 void __kernel_io_finish_start( cluster & this ) { 214 #if defined(__CFA_IO_POLLING_USER__)320 if( this.io->cltr_flags & CFA_CLUSTER_IO_POLLER_USER_THREAD ) { 215 321 __cfadbg_print_safe(io_core, "Kernel I/O : Creating fast poller for cluter %p\n", &this); 216 (this.io .poller.fast){ this };217 __thrd_start( this.io .poller.fast, main );218 #endif322 (this.io->poller.fast){ this }; 323 __thrd_start( this.io->poller.fast, main ); 324 } 219 325 220 326 // Create the poller thread 221 327 __cfadbg_print_safe(io_core, "Kernel I/O : Creating slow poller for cluter %p\n", &this); 222 this.io .poller.slow.stack = __create_pthread( &this.io.poller.slow.kthrd, __io_poller_slow, &this );328 this.io->poller.slow.stack = __create_pthread( &this.io->poller.slow.kthrd, __io_poller_slow, &this ); 223 329 } 224 330 … … 226 332 __cfadbg_print_safe(io_core, "Kernel I/O : Stopping pollers for cluster\n", &this); 227 333 // Notify the poller thread of the shutdown 228 __atomic_store_n(&this.io .done, true, __ATOMIC_SEQ_CST);334 __atomic_store_n(&this.io->done, true, __ATOMIC_SEQ_CST); 229 335 230 336 // Stop the IO Poller 231 337 sigval val = { 1 }; 232 pthread_sigqueue( this.io.poller.slow.kthrd, SIGUSR1, val ); 233 #if defined(__CFA_IO_POLLING_USER__) 234 post( this.io.poller.sem ); 235 #endif 338 pthread_sigqueue( this.io->poller.slow.kthrd, SIGUSR1, val ); 339 post( this.io->poller.sem ); 236 340 237 341 // Wait for the poller thread to finish 238 pthread_join( this.io .poller.slow.kthrd, 0p );239 free( this.io .poller.slow.stack );342 pthread_join( this.io->poller.slow.kthrd, 0p ); 343 free( this.io->poller.slow.stack ); 240 344 241 345 __cfadbg_print_safe(io_core, "Kernel I/O : Slow poller stopped for cluster\n", &this); 242 346 243 #if defined(__CFA_IO_POLLING_USER__) 347 if( this.io->cltr_flags & CFA_CLUSTER_IO_POLLER_USER_THREAD ) { 348 with( this.io->poller.fast ) { 349 /* paranoid */ verify( waiting ); // The thread shouldn't be in a system call 350 /* paranoid */ verify( this.procs.head == 0p || &this == mainCluster ); 351 /* paranoid */ verify( this.idles.head == 0p || &this == mainCluster ); 352 353 // We need to adjust the clean-up based on where the thread is 354 if( thrd.preempted != __NO_PREEMPTION ) { 355 356 // This is the tricky case 357 // The thread was preempted and now it is on the ready queue 358 /* paranoid */ verify( thrd.state == Active ); // The thread better be in this state 359 /* paranoid */ verify( thrd.next == 1p ); // The thread should be the last on the list 360 /* paranoid */ verify( this.ready_queue.head == &thrd ); // The thread should be the only thing on the list 361 362 // Remove the thread from the ready queue of this cluster 363 this.ready_queue.head = 1p; 364 thrd.next = 0p; 365 366 // Fixup the thread state 367 thrd.state = Blocked; 368 thrd.preempted = __NO_PREEMPTION; 369 370 // Pretend like the thread was blocked all along 371 } 372 // !!! This is not an else if !!! 373 if( thrd.state == Blocked ) { 374 375 // This is the "easy case" 376 // The thread is parked and can easily be moved to active cluster 377 verify( thrd.curr_cluster != active_cluster() || thrd.curr_cluster == mainCluster ); 378 thrd.curr_cluster = active_cluster(); 379 244 380 // unpark the fast io_poller 245 unpark( &this.io.poller.fast.thrd __cfaabi_dbg_ctx2 ); 246 247 ^(this.io.poller.fast){}; 381 unpark( &thrd __cfaabi_dbg_ctx2 ); 382 } 383 else { 384 385 // The thread is in a weird state 386 // I don't know what to do here 387 abort("Fast poller thread is in unexpected state, cannot clean-up correctly\n"); 388 } 389 390 } 391 392 ^(this.io->poller.fast){}; 248 393 249 394 __cfadbg_print_safe(io_core, "Kernel I/O : Fast poller stopped for cluster\n", &this); 250 #endif395 } 251 396 } 252 397 … … 259 404 #if !defined(__CFA_NO_STATISTICS__) 260 405 if(this.print_stats) { 261 __cfaabi_bits_print_safe( STDERR_FILENO, 262 "----- I/O uRing Stats -----\n" 263 "- total submit calls : %llu\n" 264 "- avg submit : %lf\n" 265 "- total wait calls : %llu\n" 266 "- avg completion/wait : %lf\n", 267 this.io.submit_q.stats.submit_avg.cnt, 268 ((double)this.io.submit_q.stats.submit_avg.val) / this.io.submit_q.stats.submit_avg.cnt, 269 this.io.completion_q.stats.completed_avg.cnt, 270 ((double)this.io.completion_q.stats.completed_avg.val) / this.io.completion_q.stats.completed_avg.cnt 271 ); 406 with(this.io->submit_q.stats, this.io->completion_q.stats) { 407 __cfaabi_bits_print_safe( STDERR_FILENO, 408 "----- I/O uRing Stats -----\n" 409 "- total submit calls : %'llu\n" 410 "- avg submit : %'.2lf\n" 411 "- pre-submit block %% : %'.2lf\n" 412 "- total wait calls : %'llu (%'llu slow, %'llu fast)\n" 413 "- avg completion/wait : %'.2lf\n", 414 submit_avg.cnt, 415 ((double)submit_avg.val) / submit_avg.cnt, 416 (100.0 * submit_avg.block) / submit_avg.cnt, 417 completed_avg.slow_cnt + completed_avg.fast_cnt, 418 completed_avg.slow_cnt, completed_avg.fast_cnt, 419 ((double)completed_avg.val) / (completed_avg.slow_cnt + completed_avg.fast_cnt) 420 ); 421 } 272 422 } 273 423 #endif 274 424 275 425 // Shutdown the io rings 276 struct io_uring_sq & sq = this.io.submit_q;277 struct io_uring_cq & cq = this.io.completion_q;426 struct __submition_data & sq = this.io->submit_q; 427 struct __completion_data & cq = this.io->completion_q; 278 428 279 429 // unmap the submit queue entries … … 289 439 290 440 // close the file descriptor 291 close(this.io.fd); 441 close(this.io->fd); 442 443 free( this.io ); 292 444 } 293 445 … … 302 454 // Process a single completion message from the io_uring 303 455 // This is NOT thread-safe 304 static int __drain_io( struct io_ring& ring, sigset_t * mask, int waitcnt, bool in_kernel ) {456 static int __drain_io( struct __io_data & ring, sigset_t * mask, int waitcnt, bool in_kernel ) { 305 457 int ret = syscall( __NR_io_uring_enter, ring.fd, 0, waitcnt, IORING_ENTER_GETEVENTS, mask, _NSIG / 8); 306 458 if( ret < 0 ) { … … 320 472 // Nothing was new return 0 321 473 if (head == tail) { 322 #if !defined(__CFA_NO_STATISTICS__)323 ring.completion_q.stats.completed_avg.cnt += 1;324 #endif325 474 return 0; 326 475 } … … 348 497 __atomic_fetch_add( ring.completion_q.head, count, __ATOMIC_RELAXED ); 349 498 350 // Update statistics351 #if !defined(__CFA_NO_STATISTICS__)352 ring.completion_q.stats.completed_avg.val += count;353 ring.completion_q.stats.completed_avg.cnt += 1;354 #endif355 356 499 return count; 357 500 } … … 359 502 static void * __io_poller_slow( void * arg ) { 360 503 cluster * cltr = (cluster *)arg; 361 struct io_ring & ring =cltr->io;504 struct __io_data & ring = *cltr->io; 362 505 363 506 sigset_t mask; … … 372 515 verify( (*ring.completion_q.head) == (*ring.completion_q.tail) ); 373 516 374 while(!__atomic_load_n(&ring.done, __ATOMIC_SEQ_CST)) { 375 #if defined(__CFA_IO_POLLING_USER__) 376 517 __cfadbg_print_safe(io_core, "Kernel I/O : Slow poller for ring %p ready\n", &ring); 518 519 if( ring.cltr_flags & CFA_CLUSTER_IO_POLLER_USER_THREAD ) { 520 while(!__atomic_load_n(&ring.done, __ATOMIC_SEQ_CST)) { 377 521 // In the user-thread approach drain and if anything was drained, 378 522 // batton pass to the user-thread 379 523 int count = __drain_io( ring, &mask, 1, true ); 524 525 // Update statistics 526 #if !defined(__CFA_NO_STATISTICS__) 527 ring.completion_q.stats.completed_avg.val += count; 528 ring.completion_q.stats.completed_avg.slow_cnt += 1; 529 #endif 530 380 531 if(count > 0) { 381 532 __cfadbg_print_safe(io_core, "Kernel I/O : Moving to ring %p to fast poller\n", &ring); … … 383 534 wait( ring.poller.sem ); 384 535 } 385 386 #else 387 536 } 537 } 538 else { 539 while(!__atomic_load_n(&ring.done, __ATOMIC_SEQ_CST)) { 388 540 //In the naive approach, just poll the io completion queue directly 389 __drain_io( ring, &mask, 1, true ); 390 541 int count = __drain_io( ring, &mask, 1, true ); 542 543 // Update statistics 544 #if !defined(__CFA_NO_STATISTICS__) 545 ring.completion_q.stats.completed_avg.val += count; 546 ring.completion_q.stats.completed_avg.slow_cnt += 1; 547 #endif 548 } 549 } 550 551 __cfadbg_print_safe(io_core, "Kernel I/O : Slow poller for ring %p stopping\n", &ring); 552 553 return 0p; 554 } 555 556 void main( __io_poller_fast & this ) { 557 verify( this.ring->cltr_flags & CFA_CLUSTER_IO_POLLER_USER_THREAD ); 558 559 // Start parked 560 park( __cfaabi_dbg_ctx ); 561 562 __cfadbg_print_safe(io_core, "Kernel I/O : Fast poller for ring %p ready\n", &this.ring); 563 564 int reset = 0; 565 566 // Then loop until we need to start 567 while(!__atomic_load_n(&this.ring->done, __ATOMIC_SEQ_CST)) { 568 // Drain the io 569 this.waiting = false; 570 int count = __drain_io( *this.ring, 0p, 0, false ); 571 reset += count > 0 ? 1 : 0; 572 573 // Update statistics 574 #if !defined(__CFA_NO_STATISTICS__) 575 this.ring->completion_q.stats.completed_avg.val += count; 576 this.ring->completion_q.stats.completed_avg.fast_cnt += 1; 391 577 #endif 392 } 393 394 return 0p; 395 } 396 397 #if defined(__CFA_IO_POLLING_USER__) 398 void main( __io_poller_fast & this ) { 399 // Start parked 400 park( __cfaabi_dbg_ctx ); 401 402 // Then loop until we need to start 403 while(!__atomic_load_n(&this.ring->done, __ATOMIC_SEQ_CST)) { 404 // Drain the io 405 if(0 > __drain_io( *this.ring, 0p, 0, false )) { 406 // If we got something, just yield and check again 407 yield(); 408 } 409 else { 410 // We didn't get anything baton pass to the slow poller 411 __cfadbg_print_safe(io_core, "Kernel I/O : Moving to ring %p to slow poller\n", &this.ring); 412 post( this.ring->poller.sem ); 413 park( __cfaabi_dbg_ctx ); 414 } 578 579 this.waiting = true; 580 if(reset < 5) { 581 // If we got something, just yield and check again 582 yield(); 415 583 } 416 } 417 #endif 584 else { 585 // We didn't get anything baton pass to the slow poller 586 __cfadbg_print_safe(io_core, "Kernel I/O : Moving to ring %p to slow poller\n", &this.ring); 587 post( this.ring->poller.sem ); 588 park( __cfaabi_dbg_ctx ); 589 reset = 0; 590 } 591 } 592 593 __cfadbg_print_safe(io_core, "Kernel I/O : Fast poller for ring %p stopping\n", &this.ring); 594 } 418 595 419 596 //============================================================================================= … … 445 622 // 446 623 447 static inline [* struct io_uring_sqe, uint32_t] __submit_alloc( struct io_ring& ring ) {624 static inline [* struct io_uring_sqe, uint32_t] __submit_alloc( struct __io_data & ring ) { 448 625 // Wait for a spot to be available 449 P(ring.submit); 626 __attribute__((unused)) bool blocked = P(ring.submit); 627 #if !defined(__CFA_NO_STATISTICS__) 628 __atomic_fetch_add( &ring.submit_q.stats.submit_avg.block, blocked ? 1ul64 : 0ul64, __ATOMIC_RELAXED ); 629 #endif 450 630 451 631 // Allocate the sqe … … 463 643 } 464 644 465 static inline void __submit( struct io_ring& ring, uint32_t idx ) {645 static inline void __submit( struct __io_data & ring, uint32_t idx ) { 466 646 // get mutual exclusion 467 647 lock(ring.submit_q.lock __cfaabi_dbg_ctx2); … … 524 704 525 705 #define __submit_prelude \ 526 struct io_ring & ring =active_cluster()->io; \706 struct __io_data & ring = *active_cluster()->io; \ 527 707 struct io_uring_sqe * sqe; \ 528 708 uint32_t idx; \ … … 544 724 #include <sys/socket.h> 545 725 #include <sys/syscall.h> 546 547 #if defined(HAVE_PREADV2)548 726 struct iovec; 549 727 extern ssize_t preadv2 (int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags); 550 #endif551 #if defined(HAVE_PWRITEV2)552 struct iovec;553 728 extern ssize_t pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags); 554 #endif555 729 556 730 extern int fsync(int fd); … … 578 752 //----------------------------------------------------------------------------- 579 753 // Asynchronous operations 580 #if defined(HAVE_PREADV2) 581 ssize_t cfa_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags) { 582 #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_READV) 583 return preadv2(fd, iov, iovcnt, offset, flags); 584 #else 585 __submit_prelude 586 587 (*sqe){ IORING_OP_READV, fd, iov, iovcnt, offset }; 588 589 __submit_wait 590 #endif 591 } 592 #endif 593 594 #if defined(HAVE_PWRITEV2) 595 ssize_t cfa_pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags) { 596 #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_WRITEV) 597 return pwritev2(fd, iov, iovcnt, offset, flags); 598 #else 599 __submit_prelude 600 601 (*sqe){ IORING_OP_WRITEV, fd, iov, iovcnt, offset }; 602 603 __submit_wait 604 #endif 605 } 606 #endif 754 ssize_t cfa_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags) { 755 #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_READV) 756 return preadv2(fd, iov, iovcnt, offset, flags); 757 #else 758 __submit_prelude 759 760 (*sqe){ IORING_OP_READV, fd, iov, iovcnt, offset }; 761 762 __submit_wait 763 #endif 764 } 765 766 ssize_t cfa_pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags) { 767 #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_WRITEV) 768 return pwritev2(fd, iov, iovcnt, offset, flags); 769 #else 770 __submit_prelude 771 772 (*sqe){ IORING_OP_WRITEV, fd, iov, iovcnt, offset }; 773 774 __submit_wait 775 #endif 776 } 607 777 608 778 int cfa_fsync(int fd) { … … 832 1002 return IS_DEFINED(IORING_OP_READV); 833 1003 834 #if defined(HAVE_PREADV2) 835 if( /*func == (fptr_t)pwritev2 || */ 836 func == (fptr_t)cfa_pwritev2 ) 837 #define _CFA_IO_FEATURE_IORING_OP_WRITEV , 838 return IS_DEFINED(IORING_OP_WRITEV); 839 #endif 840 841 #if defined(HAVE_PWRITEV2) 842 if( /*func == (fptr_t)fsync || */ 843 func == (fptr_t)cfa_fsync ) 844 #define _CFA_IO_FEATURE_IORING_OP_FSYNC , 845 return IS_DEFINED(IORING_OP_FSYNC); 846 #endif 1004 if( /*func == (fptr_t)pwritev2 || */ 1005 func == (fptr_t)cfa_pwritev2 ) 1006 #define _CFA_IO_FEATURE_IORING_OP_WRITEV , 1007 return IS_DEFINED(IORING_OP_WRITEV); 1008 1009 if( /*func == (fptr_t)fsync || */ 1010 func == (fptr_t)cfa_fsync ) 1011 #define _CFA_IO_FEATURE_IORING_OP_FSYNC , 1012 return IS_DEFINED(IORING_OP_FSYNC); 847 1013 848 1014 if( /*func == (fptr_t)ync_file_range || */
Note: See TracChangeset
for help on using the changeset viewer.