Changes in / [794db28:36de20d]
- Files:
-
- 10 edited
-
benchmark/io/readv-posix.c (modified) (1 diff)
-
libcfa/src/bits/locks.hfa (modified) (1 diff)
-
libcfa/src/concurrency/io.cfa (modified) (22 diffs)
-
libcfa/src/concurrency/io/setup.cfa (modified) (5 diffs)
-
libcfa/src/concurrency/io/types.hfa (modified) (5 diffs)
-
libcfa/src/concurrency/iocall.cfa (modified) (19 diffs)
-
libcfa/src/concurrency/kernel.cfa (modified) (1 diff)
-
libcfa/src/concurrency/stats.cfa (modified) (4 diffs)
-
libcfa/src/concurrency/stats.hfa (modified) (1 diff)
-
libcfa/src/parseargs.cfa (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
benchmark/io/readv-posix.c
r794db28 r36de20d 44 44 45 45 int main(int argc, char * argv[]) { 46 BENCH_DECL 46 47 unsigned flags = 0; 47 48 unsigned sublen = 16; -
libcfa/src/bits/locks.hfa
r794db28 r36de20d 219 219 } 220 220 } 221 222 // Semaphore which only supports a single thread and one post223 // Semaphore which only supports a single thread224 struct oneshot {225 struct $thread * volatile ptr;226 };227 228 static inline {229 void ?{}(oneshot & this) {230 this.ptr = 0p;231 }232 233 void ^?{}(oneshot & this) {}234 235 bool wait(oneshot & this) {236 for() {237 struct $thread * expected = this.ptr;238 if(expected == 1p) return false;239 /* paranoid */ verify( expected == 0p );240 if(__atomic_compare_exchange_n(&this.ptr, &expected, active_thread(), false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {241 park( __cfaabi_dbg_ctx );242 /* paranoid */ verify( this.ptr == 1p );243 return true;244 }245 }246 }247 248 bool post(oneshot & this) {249 struct $thread * got = __atomic_exchange_n( &this.ptr, 1p, __ATOMIC_SEQ_CST);250 if( got == 0p ) return false;251 unpark( got __cfaabi_dbg_ctx2 );252 return true;253 }254 }255 221 #endif -
libcfa/src/concurrency/io.cfa
r794db28 r36de20d 41 41 #include "kernel/fwd.hfa" 42 42 #include "io/types.hfa" 43 44 // returns true of acquired as leader or second leader45 static inline bool try_lock( __leaderlock_t & this ) {46 const uintptr_t thrd = 1z | (uintptr_t)active_thread();47 bool block;48 disable_interrupts();49 for() {50 struct $thread * expected = this.value;51 if( 1p != expected && 0p != expected ) {52 /* paranoid */ verify( thrd != (uintptr_t)expected ); // We better not already be the next leader53 enable_interrupts( __cfaabi_dbg_ctx );54 return false;55 }56 struct $thread * desired;57 if( 0p == expected ) {58 // If the lock isn't locked acquire it, no need to block59 desired = 1p;60 block = false;61 }62 else {63 // If the lock is already locked try becomming the next leader64 desired = (struct $thread *)thrd;65 block = true;66 }67 if( __atomic_compare_exchange_n(&this.value, &expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) ) break;68 }69 if( block ) {70 enable_interrupts( __cfaabi_dbg_ctx );71 park( __cfaabi_dbg_ctx );72 disable_interrupts();73 }74 return true;75 }76 77 static inline bool next( __leaderlock_t & this ) {78 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );79 struct $thread * nextt;80 for() {81 struct $thread * expected = this.value;82 /* paranoid */ verify( (1 & (uintptr_t)expected) == 1 ); // The lock better be locked83 84 struct $thread * desired;85 if( 1p == expected ) {86 // No next leader, just unlock87 desired = 0p;88 nextt = 0p;89 }90 else {91 // There is a next leader, remove but keep locked92 desired = 1p;93 nextt = (struct $thread *)(~1z & (uintptr_t)expected);94 }95 if( __atomic_compare_exchange_n(&this.value, &expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) ) break;96 }97 98 if(nextt) {99 unpark( nextt __cfaabi_dbg_ctx2 );100 enable_interrupts( __cfaabi_dbg_ctx );101 return true;102 }103 enable_interrupts( __cfaabi_dbg_ctx );104 return false;105 }106 43 107 44 //============================================================================================= … … 156 93 //============================================================================================= 157 94 static unsigned __collect_submitions( struct __io_data & ring ); 158 static __u32__release_consumed_submission( struct __io_data & ring );95 static uint32_t __release_consumed_submission( struct __io_data & ring ); 159 96 160 97 static inline void process(struct io_uring_cqe & cqe ) { … … 163 100 164 101 data->result = cqe.res; 165 post( data->sem);102 unpark( data->thrd __cfaabi_dbg_ctx2 ); 166 103 } 167 104 … … 199 136 unsigned head = *ring.completion_q.head; 200 137 unsigned tail = *ring.completion_q.tail; 201 const __u32mask = *ring.completion_q.mask;138 const uint32_t mask = *ring.completion_q.mask; 202 139 203 140 // Nothing was new return 0 … … 206 143 } 207 144 208 __u32count = tail - head;145 uint32_t count = tail - head; 209 146 /* paranoid */ verify( count != 0 ); 210 147 for(i; count) { … … 245 182 __STATS__( true, 246 183 io.complete_q.completed_avg.val += count; 247 io.complete_q.completed_avg. cnt += 1;184 io.complete_q.completed_avg.fast_cnt += 1; 248 185 ) 249 186 enable_interrupts( __cfaabi_dbg_ctx ); … … 255 192 // We didn't get anything baton pass to the slow poller 256 193 else { 257 __STATS__( true,258 io.complete_q.blocks += 1;259 )260 194 __cfadbg_print_safe(io_core, "Kernel I/O : Parking io poller %p\n", &this.self); 261 195 reset = 0; … … 290 224 // 291 225 292 [* struct io_uring_sqe, __u32] __submit_alloc( struct __io_data & ring, __u64data ) {226 [* struct io_uring_sqe, uint32_t] __submit_alloc( struct __io_data & ring, uint64_t data ) { 293 227 /* paranoid */ verify( data != 0 ); 294 228 … … 296 230 __attribute((unused)) int len = 0; 297 231 __attribute((unused)) int block = 0; 298 __u32cnt = *ring.submit_q.num;299 __u32mask = *ring.submit_q.mask;232 uint32_t cnt = *ring.submit_q.num; 233 uint32_t mask = *ring.submit_q.mask; 300 234 301 235 disable_interrupts(); 302 __u32off = __tls_rand();236 uint32_t off = __tls_rand(); 303 237 enable_interrupts( __cfaabi_dbg_ctx ); 304 238 … … 307 241 // Look through the list starting at some offset 308 242 for(i; cnt) { 309 __u64expected = 0;310 __u32idx = (i + off) & mask;243 uint64_t expected = 0; 244 uint32_t idx = (i + off) & mask; 311 245 struct io_uring_sqe * sqe = &ring.submit_q.sqes[idx]; 312 volatile __u64 * udata =&sqe->user_data;246 volatile uint64_t * udata = (volatile uint64_t *)&sqe->user_data; 313 247 314 248 if( *udata == expected && … … 336 270 } 337 271 338 static inline __u32 __submit_to_ready_array( struct __io_data & ring, __u32 idx, const __u32mask ) {272 static inline uint32_t __submit_to_ready_array( struct __io_data & ring, uint32_t idx, const uint32_t mask ) { 339 273 /* paranoid */ verify( idx <= mask ); 340 274 /* paranoid */ verify( idx != -1ul32 ); … … 343 277 __attribute((unused)) int len = 0; 344 278 __attribute((unused)) int block = 0; 345 __u32ready_mask = ring.submit_q.ready_cnt - 1;279 uint32_t ready_mask = ring.submit_q.ready_cnt - 1; 346 280 347 281 disable_interrupts(); 348 __u32off = __tls_rand();282 uint32_t off = __tls_rand(); 349 283 enable_interrupts( __cfaabi_dbg_ctx ); 350 284 351 __u32picked;285 uint32_t picked; 352 286 LOOKING: for() { 353 287 for(i; ring.submit_q.ready_cnt) { 354 288 picked = (i + off) & ready_mask; 355 __u32expected = -1ul32;289 uint32_t expected = -1ul32; 356 290 if( __atomic_compare_exchange_n( &ring.submit_q.ready[picked], &expected, idx, true, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED ) ) { 357 291 break LOOKING; … … 363 297 364 298 block++; 365 366 __u32 released = __release_consumed_submission( ring ); 367 if( released == 0 ) { 299 if( try_lock(ring.submit_q.lock __cfaabi_dbg_ctx2) ) { 300 __release_consumed_submission( ring ); 301 unlock( ring.submit_q.lock ); 302 } 303 else { 368 304 yield(); 369 305 } … … 380 316 } 381 317 382 void __submit( struct io_context * ctx, __u32idx ) __attribute__((nonnull (1))) {318 void __submit( struct io_context * ctx, uint32_t idx ) __attribute__((nonnull (1))) { 383 319 __io_data & ring = *ctx->thrd.ring; 384 320 // Get now the data we definetely need 385 volatile __u32* const tail = ring.submit_q.tail;386 const __u32mask = *ring.submit_q.mask;321 volatile uint32_t * const tail = ring.submit_q.tail; 322 const uint32_t mask = *ring.submit_q.mask; 387 323 388 324 // There are 2 submission schemes, check which one we are using … … 396 332 } 397 333 else if( ring.eager_submits ) { 398 __u32 picked = __submit_to_ready_array( ring, idx, mask ); 399 400 #if defined(LEADER_LOCK) 401 if( !try_lock(ring.submit_q.submit_lock) ) { 334 uint32_t picked = __submit_to_ready_array( ring, idx, mask ); 335 336 for() { 337 yield(); 338 339 // If some one else collected our index, we are done 340 #warning ABA problem 341 if( ring.submit_q.ready[picked] != idx ) { 402 342 __STATS__( false, 403 343 io.submit_q.helped += 1; … … 405 345 return; 406 346 } 407 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled ); 408 __STATS__( true, 409 io.submit_q.leader += 1; 347 348 if( try_lock(ring.submit_q.lock __cfaabi_dbg_ctx2) ) { 349 __STATS__( false, 350 io.submit_q.leader += 1; 351 ) 352 break; 353 } 354 355 __STATS__( false, 356 io.submit_q.busy += 1; 410 357 ) 411 #else 412 for() { 413 yield(); 414 415 if( try_lock(ring.submit_q.submit_lock __cfaabi_dbg_ctx2) ) { 416 __STATS__( false, 417 io.submit_q.leader += 1; 418 ) 419 break; 420 } 421 422 // If some one else collected our index, we are done 423 #warning ABA problem 424 if( ring.submit_q.ready[picked] != idx ) { 425 __STATS__( false, 426 io.submit_q.helped += 1; 427 ) 428 return; 429 } 430 431 __STATS__( false, 432 io.submit_q.busy += 1; 433 ) 434 } 435 #endif 358 } 436 359 437 360 // We got the lock … … 442 365 int ret = __io_uring_enter( ring, to_submit, false ); 443 366 444 #if defined(LEADER_LOCK) 445 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled ); 446 next(ring.submit_q.submit_lock); 447 #else 448 unlock(ring.submit_q.submit_lock); 449 #endif 367 unlock(ring.submit_q.lock); 450 368 if( ret < 0 ) return; 451 369 … … 462 380 else { 463 381 // get mutual exclusion 464 #if defined(LEADER_LOCK) 465 while(!try_lock(ring.submit_q.submit_lock)); 466 #else 467 lock(ring.submit_q.submit_lock __cfaabi_dbg_ctx2); 468 #endif 382 lock(ring.submit_q.lock __cfaabi_dbg_ctx2); 469 383 470 384 /* paranoid */ verifyf( ring.submit_q.sqes[ idx ].user_data != 0, … … 504 418 __release_consumed_submission( ring ); 505 419 506 #if defined(LEADER_LOCK) 507 next(ring.submit_q.submit_lock); 508 #else 509 unlock(ring.submit_q.submit_lock); 510 #endif 420 unlock(ring.submit_q.lock); 511 421 512 422 __cfadbg_print_safe( io, "Kernel I/O : Performed io_submit for %p, returned %d\n", active_thread(), ret ); … … 514 424 } 515 425 516 // #define PARTIAL_SUBMIT 32517 426 static unsigned __collect_submitions( struct __io_data & ring ) { 518 427 /* paranoid */ verify( ring.submit_q.ready != 0p ); … … 520 429 521 430 unsigned to_submit = 0; 522 __u32 tail = *ring.submit_q.tail; 523 const __u32 mask = *ring.submit_q.mask; 524 #if defined(PARTIAL_SUBMIT) 525 #if defined(LEADER_LOCK) 526 #error PARTIAL_SUBMIT and LEADER_LOCK cannot co-exist 527 #endif 528 const __u32 cnt = ring.submit_q.ready_cnt > PARTIAL_SUBMIT ? PARTIAL_SUBMIT : ring.submit_q.ready_cnt; 529 const __u32 offset = ring.submit_q.prev_ready; 530 ring.submit_q.prev_ready += cnt; 531 #else 532 const __u32 cnt = ring.submit_q.ready_cnt; 533 const __u32 offset = 0; 534 #endif 431 uint32_t tail = *ring.submit_q.tail; 432 const uint32_t mask = *ring.submit_q.mask; 535 433 536 434 // Go through the list of ready submissions 537 for( c; cnt ) { 538 __u32 i = (offset + c) % ring.submit_q.ready_cnt; 539 435 for( i; ring.submit_q.ready_cnt ) { 540 436 // replace any submission with the sentinel, to consume it. 541 __u32idx = __atomic_exchange_n( &ring.submit_q.ready[i], -1ul32, __ATOMIC_RELAXED);437 uint32_t idx = __atomic_exchange_n( &ring.submit_q.ready[i], -1ul32, __ATOMIC_RELAXED); 542 438 543 439 // If it was already the sentinel, then we are done … … 555 451 } 556 452 557 static __u32__release_consumed_submission( struct __io_data & ring ) {558 const __u32smask = *ring.submit_q.mask;453 static uint32_t __release_consumed_submission( struct __io_data & ring ) { 454 const uint32_t smask = *ring.submit_q.mask; 559 455 560 456 if( !try_lock(ring.submit_q.release_lock __cfaabi_dbg_ctx2) ) return 0; 561 __u32chead = *ring.submit_q.head;562 __u32phead = ring.submit_q.prev_head;457 uint32_t chead = *ring.submit_q.head; 458 uint32_t phead = ring.submit_q.prev_head; 563 459 ring.submit_q.prev_head = chead; 564 460 unlock(ring.submit_q.release_lock); 565 461 566 __u32count = chead - phead;462 uint32_t count = chead - phead; 567 463 for( i; count ) { 568 __u32idx = ring.submit_q.array[ (phead + i) & smask ];464 uint32_t idx = ring.submit_q.array[ (phead + i) & smask ]; 569 465 ring.submit_q.sqes[ idx ].user_data = 0; 570 466 } -
libcfa/src/concurrency/io/setup.cfa
r794db28 r36de20d 298 298 if( params_in.poll_complete ) params.flags |= IORING_SETUP_IOPOLL; 299 299 300 __u32nentries = params_in.num_entries != 0 ? params_in.num_entries : 256;300 uint32_t nentries = params_in.num_entries != 0 ? params_in.num_entries : 256; 301 301 if( !is_pow2(nentries) ) { 302 302 abort("ERROR: I/O setup 'num_entries' must be a power of 2\n"); … … 362 362 // Get the pointers from the kernel to fill the structure 363 363 // submit queue 364 sq.head = (volatile __u32*)(((intptr_t)sq.ring_ptr) + params.sq_off.head);365 sq.tail = (volatile __u32*)(((intptr_t)sq.ring_ptr) + params.sq_off.tail);366 sq.mask = ( const __u32*)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_mask);367 sq.num = ( const __u32*)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_entries);368 sq.flags = ( __u32*)(((intptr_t)sq.ring_ptr) + params.sq_off.flags);369 sq.dropped = ( __u32*)(((intptr_t)sq.ring_ptr) + params.sq_off.dropped);370 sq.array = ( __u32*)(((intptr_t)sq.ring_ptr) + params.sq_off.array);364 sq.head = (volatile uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.head); 365 sq.tail = (volatile uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.tail); 366 sq.mask = ( const uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_mask); 367 sq.num = ( const uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_entries); 368 sq.flags = ( uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.flags); 369 sq.dropped = ( uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.dropped); 370 sq.array = ( uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.array); 371 371 sq.prev_head = *sq.head; 372 372 373 373 { 374 const __u32num = *sq.num;374 const uint32_t num = *sq.num; 375 375 for( i; num ) { 376 376 sq.sqes[i].user_data = 0ul64; … … 378 378 } 379 379 380 (sq. submit_lock){};380 (sq.lock){}; 381 381 (sq.release_lock){}; 382 382 … … 388 388 sq.ready[i] = -1ul32; 389 389 } 390 sq.prev_ready = 0;391 390 } 392 391 else { 393 392 sq.ready_cnt = 0; 394 393 sq.ready = 0p; 395 sq.prev_ready = 0;396 394 } 397 395 398 396 // completion queue 399 cq.head = (volatile __u32*)(((intptr_t)cq.ring_ptr) + params.cq_off.head);400 cq.tail = (volatile __u32*)(((intptr_t)cq.ring_ptr) + params.cq_off.tail);401 cq.mask = ( const __u32*)(((intptr_t)cq.ring_ptr) + params.cq_off.ring_mask);402 cq.num = ( const __u32*)(((intptr_t)cq.ring_ptr) + params.cq_off.ring_entries);403 cq.overflow = ( __u32*)(((intptr_t)cq.ring_ptr) + params.cq_off.overflow);404 cq.cqes = (struct io_uring_cqe *)(((intptr_t)cq.ring_ptr) + params.cq_off.cqes);397 cq.head = (volatile uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.head); 398 cq.tail = (volatile uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.tail); 399 cq.mask = ( const uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.ring_mask); 400 cq.num = ( const uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.ring_entries); 401 cq.overflow = ( uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.overflow); 402 cq.cqes = (struct io_uring_cqe *)(((intptr_t)cq.ring_ptr) + params.cq_off.cqes); 405 403 406 404 // some paranoid checks … … 450 448 void __ioctx_register($io_ctx_thread & ctx, struct epoll_event & ev) { 451 449 ev.events = EPOLLIN | EPOLLONESHOT; 452 ev.data.u64 = ( __u64)&ctx;450 ev.data.u64 = (uint64_t)&ctx; 453 451 int ret = epoll_ctl(iopoll.epollfd, EPOLL_CTL_ADD, ctx.ring->fd, &ev); 454 452 if (ret < 0) { -
libcfa/src/concurrency/io/types.hfa
r794db28 r36de20d 17 17 18 18 #if defined(CFA_HAVE_LINUX_IO_URING_H) 19 extern "C" {20 #include <linux/types.h>21 }22 23 19 #include "bits/locks.hfa" 24 25 #define LEADER_LOCK26 struct __leaderlock_t {27 struct $thread * volatile value; // ($thread) next_leader | (bool:1) is_locked28 };29 30 static inline void ?{}( __leaderlock_t & this ) { this.value = 0p; }31 20 32 21 //----------------------------------------------------------------------- … … 34 23 struct __submition_data { 35 24 // Head and tail of the ring (associated with array) 36 volatile __u32* head;37 volatile __u32* tail;38 volatile __u32prev_head;25 volatile uint32_t * head; 26 volatile uint32_t * tail; 27 volatile uint32_t prev_head; 39 28 40 29 // The actual kernel ring which uses head/tail 41 30 // indexes into the sqes arrays 42 __u32* array;31 uint32_t * array; 43 32 44 33 // number of entries and mask to go with it 45 const __u32* num;46 const __u32* mask;34 const uint32_t * num; 35 const uint32_t * mask; 47 36 48 37 // Submission flags (Not sure what for) 49 __u32* flags;38 uint32_t * flags; 50 39 51 40 // number of sqes not submitted (whatever that means) 52 __u32* dropped;41 uint32_t * dropped; 53 42 54 43 // Like head/tail but not seen by the kernel 55 volatile __u32 * ready; 56 __u32 ready_cnt; 57 __u32 prev_ready; 44 volatile uint32_t * ready; 45 uint32_t ready_cnt; 58 46 59 #if defined(LEADER_LOCK) 60 __leaderlock_t submit_lock; 61 #else 62 __spinlock_t submit_lock; 63 #endif 64 __spinlock_t release_lock; 47 __spinlock_t lock; 48 __spinlock_t release_lock; 65 49 66 50 // A buffer of sqes (not the actual ring) … … 74 58 struct __completion_data { 75 59 // Head and tail of the ring 76 volatile __u32* head;77 volatile __u32* tail;60 volatile uint32_t * head; 61 volatile uint32_t * tail; 78 62 79 63 // number of entries and mask to go with it 80 const __u32* mask;81 const __u32* num;64 const uint32_t * mask; 65 const uint32_t * num; 82 66 83 67 // number of cqes not submitted (whatever that means) 84 __u32* overflow;68 uint32_t * overflow; 85 69 86 70 // the kernel ring … … 95 79 struct __submition_data submit_q; 96 80 struct __completion_data completion_q; 97 __u32ring_flags;81 uint32_t ring_flags; 98 82 int fd; 99 83 bool eager_submits:1; … … 105 89 // IO user data 106 90 struct __io_user_data_t { 107 __s32result;108 oneshot sem;91 int32_t result; 92 $thread * thrd; 109 93 }; 110 94 -
libcfa/src/concurrency/iocall.cfa
r794db28 r36de20d 32 32 #include "io/types.hfa" 33 33 34 extern [* struct io_uring_sqe, __u32] __submit_alloc( struct __io_data & ring, __u64data );35 extern void __submit( struct io_context * ctx, __u32idx ) __attribute__((nonnull (1)));36 37 static inline void ?{}(struct io_uring_sqe & this, __u8opcode, int fd) {34 extern [* struct io_uring_sqe, uint32_t] __submit_alloc( struct __io_data & ring, uint64_t data ); 35 extern void __submit( struct io_context * ctx, uint32_t idx ) __attribute__((nonnull (1))); 36 37 static inline void ?{}(struct io_uring_sqe & this, uint8_t opcode, int fd) { 38 38 this.opcode = opcode; 39 39 #if !defined(IOSQE_ASYNC) … … 51 51 } 52 52 53 static inline void ?{}(struct io_uring_sqe & this, __u8 opcode, int fd, void * addr, __u32 len, __u64off ) {53 static inline void ?{}(struct io_uring_sqe & this, uint8_t opcode, int fd, void * addr, uint32_t len, uint64_t off ) { 54 54 (this){ opcode, fd }; 55 55 this.off = off; 56 this.addr = ( __u64)(uintptr_t)addr;56 this.addr = (uint64_t)(uintptr_t)addr; 57 57 this.len = len; 58 58 } … … 105 105 (void)timeout; (void)cancellation; \ 106 106 if( !context ) context = __get_io_context(); \ 107 __io_user_data_t data = { 0 }; \107 __io_user_data_t data = { 0, active_thread() }; \ 108 108 struct __io_data & ring = *context->thrd.ring; \ 109 109 struct io_uring_sqe * sqe; \ 110 __u32idx; \111 __u8sflags = REGULAR_FLAGS & submit_flags; \112 [sqe, idx] = __submit_alloc( ring, ( __u64)(uintptr_t)&data ); \110 uint32_t idx; \ 111 uint8_t sflags = REGULAR_FLAGS & submit_flags; \ 112 [sqe, idx] = __submit_alloc( ring, (uint64_t)(uintptr_t)&data ); \ 113 113 sqe->flags = sflags; 114 114 115 115 #define __submit_wait \ 116 116 /*__cfaabi_bits_print_safe( STDERR_FILENO, "Preparing user data %p for %p\n", &data, data.thrd );*/ \ 117 verify( sqe->user_data == ( __u64)(uintptr_t)&data ); \117 verify( sqe->user_data == (uint64_t)(uintptr_t)&data ); \ 118 118 __submit( context, idx ); \ 119 wait( data.sem); \119 park( __cfaabi_dbg_ctx ); \ 120 120 if( data.result < 0 ) { \ 121 121 errno = -data.result; \ … … 149 149 150 150 extern int fsync(int fd); 151 152 typedef __off64_t off_t; 153 typedef __off64_t off64_t; 154 extern int sync_file_range(int fd, off64_t offset, off64_t nbytes, unsigned int flags); 151 extern int sync_file_range(int fd, int64_t offset, int64_t nbytes, unsigned int flags); 155 152 156 153 struct msghdr; … … 163 160 extern int connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen); 164 161 165 extern int fallocate(int fd, int mode, off_t offset, off_t len);166 extern int posix_fadvise(int fd, off_t offset, off_t len, int advice);162 extern int fallocate(int fd, int mode, uint64_t offset, uint64_t len); 163 extern int posix_fadvise(int fd, uint64_t offset, uint64_t len, int advice); 167 164 extern int madvise(void *addr, size_t length, int advice); 168 165 … … 193 190 sqe->fd = fd; 194 191 sqe->off = offset; 195 sqe->addr = ( __u64)iov;192 sqe->addr = (uint64_t)(uintptr_t)iov; 196 193 sqe->len = iovcnt; 197 194 sqe->rw_flags = 0; … … 210 207 __submit_prelude 211 208 212 sqe->opcode = IORING_OP_WRITEV; 213 sqe->ioprio = 0; 214 sqe->fd = fd; 215 sqe->off = offset; 216 sqe->addr = (__u64)iov; 217 sqe->len = iovcnt; 218 sqe->rw_flags = 0; 219 sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0; 209 (*sqe){ IORING_OP_WRITEV, fd, iov, iovcnt, offset }; 220 210 221 211 __submit_wait … … 230 220 __submit_prelude 231 221 232 sqe->opcode = IORING_OP_FSYNC; 233 sqe->ioprio = 0; 234 sqe->fd = fd; 235 sqe->off = 0; 236 sqe->addr = 0; 237 sqe->len = 0; 238 sqe->rw_flags = 0; 239 sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0; 240 241 __submit_wait 242 #endif 243 } 244 245 int cfa_sync_file_range(int fd, off64_t offset, off64_t nbytes, unsigned int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) { 222 (*sqe){ IORING_OP_FSYNC, fd }; 223 224 __submit_wait 225 #endif 226 } 227 228 int cfa_sync_file_range(int fd, int64_t offset, int64_t nbytes, unsigned int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) { 246 229 #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_SYNC_FILE_RANGE) 247 230 return sync_file_range(fd, offset, nbytes, flags); … … 292 275 293 276 (*sqe){ IORING_OP_SEND, sockfd }; 294 sqe->addr = ( __u64)buf;277 sqe->addr = (uint64_t)buf; 295 278 sqe->len = len; 296 279 sqe->msg_flags = flags; … … 307 290 308 291 (*sqe){ IORING_OP_RECV, sockfd }; 309 sqe->addr = ( __u64)buf;292 sqe->addr = (uint64_t)buf; 310 293 sqe->len = len; 311 294 sqe->msg_flags = flags; … … 322 305 323 306 (*sqe){ IORING_OP_ACCEPT, sockfd }; 324 sqe->addr = (__u64)addr;325 sqe->addr2 = ( __u64)addrlen;307 sqe->addr = (uint64_t)(uintptr_t)addr; 308 sqe->addr2 = (uint64_t)(uintptr_t)addrlen; 326 309 sqe->accept_flags = flags; 327 310 … … 337 320 338 321 (*sqe){ IORING_OP_CONNECT, sockfd }; 339 sqe->addr = ( __u64)addr;340 sqe->off = ( __u64)addrlen;341 342 __submit_wait 343 #endif 344 } 345 346 int cfa_fallocate(int fd, int mode, off_t offset, off_t len, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {322 sqe->addr = (uint64_t)(uintptr_t)addr; 323 sqe->off = (uint64_t)(uintptr_t)addrlen; 324 325 __submit_wait 326 #endif 327 } 328 329 int cfa_fallocate(int fd, int mode, uint64_t offset, uint64_t len, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) { 347 330 #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_FALLOCATE) 348 331 return fallocate( fd, mode, offset, len ); … … 361 344 } 362 345 363 int cfa_fadvise(int fd, off_t offset, off_t len, int advice, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) {346 int cfa_fadvise(int fd, uint64_t offset, uint64_t len, int advice, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context) { 364 347 #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_FADVISE) 365 348 return posix_fadvise( fd, offset, len, advice ); … … 368 351 369 352 (*sqe){ IORING_OP_FADVISE, fd }; 370 sqe->off = ( __u64)offset;353 sqe->off = (uint64_t)offset; 371 354 sqe->len = len; 372 355 sqe->fadvise_advice = advice; … … 383 366 384 367 (*sqe){ IORING_OP_MADVISE, 0 }; 385 sqe->addr = ( __u64)addr;368 sqe->addr = (uint64_t)addr; 386 369 sqe->len = length; 387 370 sqe->fadvise_advice = advice; … … 398 381 399 382 (*sqe){ IORING_OP_OPENAT, dirfd }; 400 sqe->addr = ( __u64)pathname;383 sqe->addr = (uint64_t)pathname; 401 384 sqe->open_flags = flags; 402 385 sqe->len = mode; … … 431 414 __submit_prelude 432 415 433 (*sqe){ IORING_OP_STATX, dirfd, pathname, mask, ( __u64)statxbuf };416 (*sqe){ IORING_OP_STATX, dirfd, pathname, mask, (uint64_t)statxbuf }; 434 417 sqe->statx_flags = flags; 435 418 … … 473 456 } 474 457 else { 475 sqe->off = ( __u64)-1;458 sqe->off = (uint64_t)-1; 476 459 } 477 460 sqe->len = len; … … 481 464 } 482 465 else { 483 sqe->splice_off_in = ( __u64)-1;466 sqe->splice_off_in = (uint64_t)-1; 484 467 } 485 468 sqe->splice_flags = flags | (SPLICE_FLAGS & submit_flags); -
libcfa/src/concurrency/kernel.cfa
r794db28 r36de20d 532 532 unsigned total = this.total; 533 533 processor * proc = &this.list`first; 534 // Compilerfence is unnecessary, but gcc-8 and older incorrectly reorder code without it535 asm volatile("": : :"memory");534 // Thread fence is unnecessary, but gcc-8 and older incorrectly reorder code without it 535 __atomic_thread_fence(__ATOMIC_SEQ_CST); 536 536 if(l != __atomic_load_n(&this.lock, __ATOMIC_SEQ_CST)) { Pause(); continue; } 537 537 return [idle, total, proc]; -
libcfa/src/concurrency/stats.cfa
r794db28 r36de20d 38 38 stats->io.submit_q.busy = 0; 39 39 stats->io.complete_q.completed_avg.val = 0; 40 stats->io.complete_q.completed_avg. cnt = 0;41 stats->io.complete_q. blocks= 0;40 stats->io.complete_q.completed_avg.slow_cnt = 0; 41 stats->io.complete_q.completed_avg.fast_cnt = 0; 42 42 #endif 43 43 } … … 60 60 61 61 #if defined(CFA_HAVE_LINUX_IO_URING_H) 62 __atomic_fetch_add( &cltr->io.submit_q.submit_avg.rdy , proc->io.submit_q.submit_avg.rdy, __ATOMIC_SEQ_CST );63 __atomic_fetch_add( &cltr->io.submit_q.submit_avg.csm , proc->io.submit_q.submit_avg.csm, __ATOMIC_SEQ_CST );64 __atomic_fetch_add( &cltr->io.submit_q.submit_avg.avl , proc->io.submit_q.submit_avg.avl, __ATOMIC_SEQ_CST );65 __atomic_fetch_add( &cltr->io.submit_q.submit_avg.cnt , proc->io.submit_q.submit_avg.cnt, __ATOMIC_SEQ_CST );66 __atomic_fetch_add( &cltr->io.submit_q.look_avg.val , proc->io.submit_q.look_avg.val, __ATOMIC_SEQ_CST );67 __atomic_fetch_add( &cltr->io.submit_q.look_avg.cnt , proc->io.submit_q.look_avg.cnt, __ATOMIC_SEQ_CST );68 __atomic_fetch_add( &cltr->io.submit_q.look_avg.block , proc->io.submit_q.look_avg.block, __ATOMIC_SEQ_CST );69 __atomic_fetch_add( &cltr->io.submit_q.alloc_avg.val , proc->io.submit_q.alloc_avg.val, __ATOMIC_SEQ_CST );70 __atomic_fetch_add( &cltr->io.submit_q.alloc_avg.cnt , proc->io.submit_q.alloc_avg.cnt, __ATOMIC_SEQ_CST );71 __atomic_fetch_add( &cltr->io.submit_q.alloc_avg.block , proc->io.submit_q.alloc_avg.block, __ATOMIC_SEQ_CST );72 __atomic_fetch_add( &cltr->io.submit_q.helped , proc->io.submit_q.helped, __ATOMIC_SEQ_CST );73 __atomic_fetch_add( &cltr->io.submit_q.leader , proc->io.submit_q.leader, __ATOMIC_SEQ_CST );74 __atomic_fetch_add( &cltr->io.submit_q.busy , proc->io.submit_q.busy, __ATOMIC_SEQ_CST );75 __atomic_fetch_add( &cltr->io.complete_q.completed_avg.val , proc->io.complete_q.completed_avg.val, __ATOMIC_SEQ_CST );76 __atomic_fetch_add( &cltr->io.complete_q.completed_avg. cnt, proc->io.complete_q.completed_avg.cnt, __ATOMIC_SEQ_CST );77 __atomic_fetch_add( &cltr->io.complete_q. blocks , proc->io.complete_q.blocks, __ATOMIC_SEQ_CST );62 __atomic_fetch_add( &cltr->io.submit_q.submit_avg.rdy , proc->io.submit_q.submit_avg.rdy , __ATOMIC_SEQ_CST ); 63 __atomic_fetch_add( &cltr->io.submit_q.submit_avg.csm , proc->io.submit_q.submit_avg.csm , __ATOMIC_SEQ_CST ); 64 __atomic_fetch_add( &cltr->io.submit_q.submit_avg.avl , proc->io.submit_q.submit_avg.avl , __ATOMIC_SEQ_CST ); 65 __atomic_fetch_add( &cltr->io.submit_q.submit_avg.cnt , proc->io.submit_q.submit_avg.cnt , __ATOMIC_SEQ_CST ); 66 __atomic_fetch_add( &cltr->io.submit_q.look_avg.val , proc->io.submit_q.look_avg.val , __ATOMIC_SEQ_CST ); 67 __atomic_fetch_add( &cltr->io.submit_q.look_avg.cnt , proc->io.submit_q.look_avg.cnt , __ATOMIC_SEQ_CST ); 68 __atomic_fetch_add( &cltr->io.submit_q.look_avg.block , proc->io.submit_q.look_avg.block , __ATOMIC_SEQ_CST ); 69 __atomic_fetch_add( &cltr->io.submit_q.alloc_avg.val , proc->io.submit_q.alloc_avg.val , __ATOMIC_SEQ_CST ); 70 __atomic_fetch_add( &cltr->io.submit_q.alloc_avg.cnt , proc->io.submit_q.alloc_avg.cnt , __ATOMIC_SEQ_CST ); 71 __atomic_fetch_add( &cltr->io.submit_q.alloc_avg.block , proc->io.submit_q.alloc_avg.block , __ATOMIC_SEQ_CST ); 72 __atomic_fetch_add( &cltr->io.submit_q.helped , proc->io.submit_q.helped , __ATOMIC_SEQ_CST ); 73 __atomic_fetch_add( &cltr->io.submit_q.leader , proc->io.submit_q.leader , __ATOMIC_SEQ_CST ); 74 __atomic_fetch_add( &cltr->io.submit_q.busy , proc->io.submit_q.busy , __ATOMIC_SEQ_CST ); 75 __atomic_fetch_add( &cltr->io.complete_q.completed_avg.val , proc->io.complete_q.completed_avg.val , __ATOMIC_SEQ_CST ); 76 __atomic_fetch_add( &cltr->io.complete_q.completed_avg.slow_cnt, proc->io.complete_q.completed_avg.slow_cnt, __ATOMIC_SEQ_CST ); 77 __atomic_fetch_add( &cltr->io.complete_q.completed_avg.fast_cnt, proc->io.complete_q.completed_avg.fast_cnt, __ATOMIC_SEQ_CST ); 78 78 #endif 79 79 } … … 154 154 "- avg alloc search len : %'18.2lf\n" 155 155 "- avg alloc search block : %'18.2lf\n" 156 "- total wait calls : %'15" PRIu64 " \n"156 "- total wait calls : %'15" PRIu64 " (%'" PRIu64 " slow, %'" PRIu64 " fast)\n" 157 157 "- avg completion/wait : %'18.2lf\n" 158 "- total completion blocks: %'15" PRIu64 "\n"159 158 "\n" 160 159 , cluster ? "Cluster" : "Processor", name, id … … 166 165 , io.submit_q.alloc_avg.cnt 167 166 , aavgv, aavgb 168 , io.complete_q.completed_avg. cnt169 , ((double)io.complete_q.completed_avg.val) / io.complete_q.completed_avg.cnt170 , io.complete_q.blocks167 , io.complete_q.completed_avg.slow_cnt + io.complete_q.completed_avg.fast_cnt 168 , io.complete_q.completed_avg.slow_cnt, io.complete_q.completed_avg.fast_cnt 169 , ((double)io.complete_q.completed_avg.val) / (io.complete_q.completed_avg.slow_cnt + io.complete_q.completed_avg.fast_cnt) 171 170 ); 172 171 } -
libcfa/src/concurrency/stats.hfa
r794db28 r36de20d 90 90 struct { 91 91 volatile uint64_t val; 92 volatile uint64_t cnt; 92 volatile uint64_t slow_cnt; 93 volatile uint64_t fast_cnt; 93 94 } completed_avg; 94 volatile uint64_t blocks;95 95 } complete_q; 96 96 }; -
libcfa/src/parseargs.cfa
r794db28 r36de20d 137 137 int width = 0; 138 138 { 139 int idx = 0; 139 140 for(i; opt_count) { 140 141 if(options[i].long_name) {
Note:
See TracChangeset
for help on using the changeset viewer.