| [ecf6b46] | 1 | //
 | 
|---|
 | 2 | // Cforall Version 1.0.0 Copyright (C) 2020 University of Waterloo
 | 
|---|
 | 3 | //
 | 
|---|
 | 4 | // The contents of this file are covered under the licence agreement in the
 | 
|---|
 | 5 | // file "LICENCE" distributed with Cforall.
 | 
|---|
 | 6 | //
 | 
|---|
 | 7 | // io.cfa --
 | 
|---|
 | 8 | //
 | 
|---|
 | 9 | // Author           : Thierry Delisle
 | 
|---|
 | 10 | // Created On       : Thu Apr 23 17:31:00 2020
 | 
|---|
 | 11 | // Last Modified By :
 | 
|---|
 | 12 | // Last Modified On :
 | 
|---|
 | 13 | // Update Count     :
 | 
|---|
 | 14 | //
 | 
|---|
 | 15 | 
 | 
|---|
| [3e2b9c9] | 16 | #define __cforall_thread__
 | 
|---|
| [43784ac] | 17 | #define _GNU_SOURCE
 | 
|---|
| [3e2b9c9] | 18 | 
 | 
|---|
| [20ab637] | 19 | #if defined(__CFA_DEBUG__)
 | 
|---|
| [d60d30e] | 20 |         // #define __CFA_DEBUG_PRINT_IO__
 | 
|---|
 | 21 |         // #define __CFA_DEBUG_PRINT_IO_CORE__
 | 
|---|
| [20ab637] | 22 | #endif
 | 
|---|
| [4069faad] | 23 | 
 | 
|---|
| [f6660520] | 24 | 
 | 
|---|
| [3e2b9c9] | 25 | #if defined(CFA_HAVE_LINUX_IO_URING_H)
 | 
|---|
| [31bb2e1] | 26 |         #include <errno.h>
 | 
|---|
| [3e2b9c9] | 27 |         #include <signal.h>
 | 
|---|
| [31bb2e1] | 28 |         #include <stdint.h>
 | 
|---|
 | 29 |         #include <string.h>
 | 
|---|
 | 30 |         #include <unistd.h>
 | 
|---|
 | 31 | 
 | 
|---|
| [92976d9] | 32 |         extern "C" {
 | 
|---|
 | 33 |                 #include <sys/syscall.h>
 | 
|---|
| [dddb3dd0] | 34 |                 #include <sys/eventfd.h>
 | 
|---|
| [d3605f8] | 35 |                 #include <sys/uio.h>
 | 
|---|
| [92976d9] | 36 | 
 | 
|---|
 | 37 |                 #include <linux/io_uring.h>
 | 
|---|
 | 38 |         }
 | 
|---|
 | 39 | 
 | 
|---|
| [3e2b9c9] | 40 |         #include "stats.hfa"
 | 
|---|
 | 41 |         #include "kernel.hfa"
 | 
|---|
 | 42 |         #include "kernel/fwd.hfa"
 | 
|---|
| [708ae38] | 43 |         #include "kernel/private.hfa"
 | 
|---|
| [78a580d] | 44 |         #include "kernel/cluster.hfa"
 | 
|---|
| [3e2b9c9] | 45 |         #include "io/types.hfa"
 | 
|---|
| [185efe6] | 46 | 
 | 
|---|
| [2fab24e3] | 47 |         __attribute__((unused)) static const char * opcodes[] = {
 | 
|---|
| [426f60c] | 48 |                 "OP_NOP",
 | 
|---|
 | 49 |                 "OP_READV",
 | 
|---|
 | 50 |                 "OP_WRITEV",
 | 
|---|
 | 51 |                 "OP_FSYNC",
 | 
|---|
 | 52 |                 "OP_READ_FIXED",
 | 
|---|
 | 53 |                 "OP_WRITE_FIXED",
 | 
|---|
 | 54 |                 "OP_POLL_ADD",
 | 
|---|
 | 55 |                 "OP_POLL_REMOVE",
 | 
|---|
 | 56 |                 "OP_SYNC_FILE_RANGE",
 | 
|---|
 | 57 |                 "OP_SENDMSG",
 | 
|---|
 | 58 |                 "OP_RECVMSG",
 | 
|---|
 | 59 |                 "OP_TIMEOUT",
 | 
|---|
 | 60 |                 "OP_TIMEOUT_REMOVE",
 | 
|---|
 | 61 |                 "OP_ACCEPT",
 | 
|---|
 | 62 |                 "OP_ASYNC_CANCEL",
 | 
|---|
 | 63 |                 "OP_LINK_TIMEOUT",
 | 
|---|
 | 64 |                 "OP_CONNECT",
 | 
|---|
 | 65 |                 "OP_FALLOCATE",
 | 
|---|
 | 66 |                 "OP_OPENAT",
 | 
|---|
 | 67 |                 "OP_CLOSE",
 | 
|---|
 | 68 |                 "OP_FILES_UPDATE",
 | 
|---|
 | 69 |                 "OP_STATX",
 | 
|---|
 | 70 |                 "OP_READ",
 | 
|---|
 | 71 |                 "OP_WRITE",
 | 
|---|
 | 72 |                 "OP_FADVISE",
 | 
|---|
 | 73 |                 "OP_MADVISE",
 | 
|---|
 | 74 |                 "OP_SEND",
 | 
|---|
 | 75 |                 "OP_RECV",
 | 
|---|
 | 76 |                 "OP_OPENAT2",
 | 
|---|
 | 77 |                 "OP_EPOLL_CTL",
 | 
|---|
 | 78 |                 "OP_SPLICE",
 | 
|---|
 | 79 |                 "OP_PROVIDE_BUFFERS",
 | 
|---|
 | 80 |                 "OP_REMOVE_BUFFERS",
 | 
|---|
 | 81 |                 "OP_TEE",
 | 
|---|
 | 82 |                 "INVALID_OP"
 | 
|---|
 | 83 |         };
 | 
|---|
 | 84 | 
 | 
|---|
| [11054eb] | 85 |         static $io_context * __ioarbiter_allocate( $io_arbiter & this, __u32 idxs[], __u32 want );
 | 
|---|
 | 86 |         static void __ioarbiter_submit( $io_context * , __u32 idxs[], __u32 have, bool lazy );
 | 
|---|
 | 87 |         static void __ioarbiter_flush ( $io_context & );
 | 
|---|
| [dddb3dd0] | 88 |         static inline void __ioarbiter_notify( $io_context & ctx );
 | 
|---|
| [92976d9] | 89 | //=============================================================================================
 | 
|---|
 | 90 | // I/O Polling
 | 
|---|
 | 91 | //=============================================================================================
 | 
|---|
| [78da4ab] | 92 |         static inline unsigned __flush( struct $io_context & );
 | 
|---|
 | 93 |         static inline __u32 __release_sqes( struct $io_context & );
 | 
|---|
| [24e321c] | 94 |         extern void __kernel_unpark( thread$ * thrd, unpark_hint );
 | 
|---|
| [1d5e4711] | 95 | 
 | 
|---|
| [18f7858] | 96 |         static void ioring_syscsll( struct $io_context & ctx, unsigned int min_comp, unsigned int flags ) {
 | 
|---|
 | 97 |                 __STATS__( true, io.calls.flush++; )
 | 
|---|
| [bdfd0bd] | 98 |                 int ret;
 | 
|---|
 | 99 |                 for() {
 | 
|---|
 | 100 |                         ret = syscall( __NR_io_uring_enter, ctx.fd, ctx.sq.to_submit, min_comp, flags, (sigset_t *)0p, _NSIG / 8);
 | 
|---|
 | 101 |                         if( ret < 0 ) {
 | 
|---|
 | 102 |                                 switch((int)errno) {
 | 
|---|
 | 103 |                                 case EINTR:
 | 
|---|
 | 104 |                                         continue;
 | 
|---|
 | 105 |                                 case EAGAIN:
 | 
|---|
 | 106 |                                 case EBUSY:
 | 
|---|
 | 107 |                                         // Update statistics
 | 
|---|
 | 108 |                                         __STATS__( false, io.calls.errors.busy ++; )
 | 
|---|
 | 109 |                                         return false;
 | 
|---|
 | 110 |                                 default:
 | 
|---|
 | 111 |                                         abort( "KERNEL ERROR: IO_URING SYSCALL - (%d) %s\n", (int)errno, strerror(errno) );
 | 
|---|
 | 112 |                                 }
 | 
|---|
| [18f7858] | 113 |                         }
 | 
|---|
| [bdfd0bd] | 114 |                         break;
 | 
|---|
| [18f7858] | 115 |                 }
 | 
|---|
 | 116 | 
 | 
|---|
 | 117 |                 __cfadbg_print_safe(io, "Kernel I/O : %u submitted to io_uring %d\n", ret, ctx.fd);
 | 
|---|
 | 118 |                 __STATS__( true, io.calls.submitted += ret; )
 | 
|---|
 | 119 |                 /* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num );
 | 
|---|
 | 120 |                 /* paranoid */ verify( ctx.sq.to_submit >= ret );
 | 
|---|
 | 121 | 
 | 
|---|
 | 122 |                 ctx.sq.to_submit -= ret;
 | 
|---|
 | 123 | 
 | 
|---|
 | 124 |                 /* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num );
 | 
|---|
 | 125 | 
 | 
|---|
 | 126 |                 // Release the consumed SQEs
 | 
|---|
 | 127 |                 __release_sqes( ctx );
 | 
|---|
 | 128 | 
 | 
|---|
| [dddb3dd0] | 129 |                 /* paranoid */ verify( ! __preemption_enabled() );
 | 
|---|
| [6f121b8] | 130 | 
 | 
|---|
| [18f7858] | 131 |                 __atomic_store_n(&ctx.proc->io.pending, false, __ATOMIC_RELAXED);
 | 
|---|
 | 132 |         }
 | 
|---|
 | 133 | 
 | 
|---|
 | 134 |         static bool try_acquire( $io_context * ctx ) __attribute__((nonnull(1))) {
 | 
|---|
 | 135 |                 /* paranoid */ verify( ! __preemption_enabled() );
 | 
|---|
 | 136 |                 /* paranoid */ verify( ready_schedule_islocked() );
 | 
|---|
| [92976d9] | 137 | 
 | 
|---|
| [d60d30e] | 138 | 
 | 
|---|
| [3caf5e3] | 139 |                 {
 | 
|---|
 | 140 |                         const __u32 head = *ctx->cq.head;
 | 
|---|
 | 141 |                         const __u32 tail = *ctx->cq.tail;
 | 
|---|
 | 142 | 
 | 
|---|
 | 143 |                         if(head == tail) return false;
 | 
|---|
 | 144 |                 }
 | 
|---|
| [c1c95b1] | 145 | 
 | 
|---|
| [3caf5e3] | 146 |                 // Drain the queue
 | 
|---|
| [4ecc35a] | 147 |                 if(!__atomic_try_acquire(&ctx->cq.lock)) {
 | 
|---|
| [54c1196] | 148 |                         __STATS__( false, io.calls.locked++; )
 | 
|---|
| [4ecc35a] | 149 |                         return false;
 | 
|---|
 | 150 |                 }
 | 
|---|
 | 151 | 
 | 
|---|
| [18f7858] | 152 |                 return true;
 | 
|---|
 | 153 |         }
 | 
|---|
 | 154 | 
 | 
|---|
 | 155 |         static bool __cfa_do_drain( $io_context * ctx, cluster * cltr ) __attribute__((nonnull(1, 2))) {
 | 
|---|
 | 156 |                 /* paranoid */ verify( ! __preemption_enabled() );
 | 
|---|
 | 157 |                 /* paranoid */ verify( ready_schedule_islocked() );
 | 
|---|
 | 158 |                 /* paranoid */ verify( ctx->cq.lock == true );
 | 
|---|
 | 159 | 
 | 
|---|
 | 160 |                 const __u32 mask = *ctx->cq.mask;
 | 
|---|
| [78a580d] | 161 |                 unsigned long long ts_prev = ctx->cq.ts;
 | 
|---|
 | 162 | 
 | 
|---|
| [3caf5e3] | 163 |                 // re-read the head and tail in case it already changed.
 | 
|---|
 | 164 |                 const __u32 head = *ctx->cq.head;
 | 
|---|
 | 165 |                 const __u32 tail = *ctx->cq.tail;
 | 
|---|
 | 166 |                 const __u32 count = tail - head;
 | 
|---|
 | 167 |                 __STATS__( false, io.calls.drain++; io.calls.completed += count; )
 | 
|---|
 | 168 | 
 | 
|---|
| [d384787] | 169 |                 for(i; count) {
 | 
|---|
| [6f121b8] | 170 |                         unsigned idx = (head + i) & mask;
 | 
|---|
| [dddb3dd0] | 171 |                         volatile struct io_uring_cqe & cqe = ctx->cq.cqes[idx];
 | 
|---|
| [92976d9] | 172 | 
 | 
|---|
| [d384787] | 173 |                         /* paranoid */ verify(&cqe);
 | 
|---|
| [92976d9] | 174 | 
 | 
|---|
| [78da4ab] | 175 |                         struct io_future_t * future = (struct io_future_t *)(uintptr_t)cqe.user_data;
 | 
|---|
| [1e6ffb44] | 176 |                         // __cfadbg_print_safe( io, "Kernel I/O : Syscall completed : cqe %p, result %d for %p\n", &cqe, cqe.res, future );
 | 
|---|
| [78da4ab] | 177 | 
 | 
|---|
| [24e321c] | 178 |                         __kernel_unpark( fulfil( *future, cqe.res, false ), UNPARK_LOCAL );
 | 
|---|
| [78da4ab] | 179 |                 }
 | 
|---|
 | 180 | 
 | 
|---|
| [78a580d] | 181 |                 unsigned long long ts_next = ctx->cq.ts = rdtscl();
 | 
|---|
| [2d8f7b0] | 182 | 
 | 
|---|
| [92976d9] | 183 |                 // Mark to the kernel that the cqe has been seen
 | 
|---|
 | 184 |                 // Ensure that the kernel only sees the new value of the head index after the CQEs have been read.
 | 
|---|
| [dddb3dd0] | 185 |                 __atomic_store_n( ctx->cq.head, head + count, __ATOMIC_SEQ_CST );
 | 
|---|
| [d28b70a] | 186 |                 ctx->proc->idle_wctx.drain_time = ts_next;
 | 
|---|
| [92976d9] | 187 | 
 | 
|---|
| [1e6ffb44] | 188 |                 __cfadbg_print_safe(io, "Kernel I/O : %u completed age %llu\n", count, ts_next);
 | 
|---|
| [e9c0b4c] | 189 |                 /* paranoid */ verify( ready_schedule_islocked() );
 | 
|---|
| [dddb3dd0] | 190 |                 /* paranoid */ verify( ! __preemption_enabled() );
 | 
|---|
 | 191 | 
 | 
|---|
| [4ecc35a] | 192 |                 __atomic_unlock(&ctx->cq.lock);
 | 
|---|
 | 193 | 
 | 
|---|
| [78a580d] | 194 |                 touch_tsc( cltr->sched.io.tscs, ctx->cq.id, ts_prev, ts_next );
 | 
|---|
 | 195 | 
 | 
|---|
| [c1c95b1] | 196 |                 return true;
 | 
|---|
| [92976d9] | 197 |         }
 | 
|---|
 | 198 | 
 | 
|---|
| [4479890] | 199 |         bool __cfa_io_drain( processor * proc ) {
 | 
|---|
 | 200 |                 bool local = false;
 | 
|---|
 | 201 |                 bool remote = false;
 | 
|---|
 | 202 | 
 | 
|---|
| [18f7858] | 203 |                 ready_schedule_lock();
 | 
|---|
 | 204 | 
 | 
|---|
| [4479890] | 205 |                 cluster * const cltr = proc->cltr;
 | 
|---|
 | 206 |                 $io_context * const ctx = proc->io.ctx;
 | 
|---|
 | 207 |                 /* paranoid */ verify( cltr );
 | 
|---|
 | 208 |                 /* paranoid */ verify( ctx );
 | 
|---|
 | 209 | 
 | 
|---|
 | 210 |                 with(cltr->sched) {
 | 
|---|
 | 211 |                         const size_t ctxs_count = io.count;
 | 
|---|
 | 212 | 
 | 
|---|
 | 213 |                         /* paranoid */ verify( ready_schedule_islocked() );
 | 
|---|
 | 214 |                         /* paranoid */ verify( ! __preemption_enabled() );
 | 
|---|
 | 215 |                         /* paranoid */ verify( active_processor() == proc );
 | 
|---|
 | 216 |                         /* paranoid */ verify( __shard_factor.io > 0 );
 | 
|---|
 | 217 |                         /* paranoid */ verify( ctxs_count > 0 );
 | 
|---|
 | 218 |                         /* paranoid */ verify( ctx->cq.id < ctxs_count );
 | 
|---|
 | 219 | 
 | 
|---|
 | 220 |                         const unsigned this_cache = cache_id(cltr, ctx->cq.id / __shard_factor.io);
 | 
|---|
 | 221 |                         const unsigned long long ctsc = rdtscl();
 | 
|---|
 | 222 | 
 | 
|---|
 | 223 |                         if(proc->io.target == MAX) {
 | 
|---|
 | 224 |                                 uint64_t chaos = __tls_rand();
 | 
|---|
 | 225 |                                 unsigned ext = chaos & 0xff;
 | 
|---|
 | 226 |                                 unsigned other  = (chaos >> 8) % (ctxs_count);
 | 
|---|
 | 227 | 
 | 
|---|
 | 228 |                                 if(ext < 3 || __atomic_load_n(&caches[other / __shard_factor.io].id, __ATOMIC_RELAXED) == this_cache) {
 | 
|---|
 | 229 |                                         proc->io.target = other;
 | 
|---|
 | 230 |                                 }
 | 
|---|
 | 231 |                         }
 | 
|---|
 | 232 |                         else {
 | 
|---|
 | 233 |                                 const unsigned target = proc->io.target;
 | 
|---|
 | 234 |                                 /* paranoid */ verify( io.tscs[target].tv != MAX );
 | 
|---|
| [18f7858] | 235 |                                 HELP: if(target < ctxs_count) {
 | 
|---|
| [4479890] | 236 |                                         const unsigned long long cutoff = calc_cutoff(ctsc, ctx->cq.id, ctxs_count, io.data, io.tscs, __shard_factor.io);
 | 
|---|
 | 237 |                                         const unsigned long long age = moving_average(ctsc, io.tscs[target].tv, io.tscs[target].ma);
 | 
|---|
| [edf247b] | 238 |                                         __cfadbg_print_safe(io, "Kernel I/O: Help attempt on %u from %u, age %'llu vs cutoff %'llu, %s\n", target, ctx->cq.id, age, cutoff, age > cutoff ? "yes" : "no");
 | 
|---|
| [18f7858] | 239 |                                         if(age <= cutoff) break HELP;
 | 
|---|
 | 240 | 
 | 
|---|
 | 241 |                                         if(!try_acquire(io.data[target])) break HELP;
 | 
|---|
 | 242 | 
 | 
|---|
 | 243 |                                         if(!__cfa_do_drain( io.data[target], cltr )) break HELP;
 | 
|---|
 | 244 | 
 | 
|---|
 | 245 |                                         remote = true;
 | 
|---|
 | 246 |                                         __STATS__( false, io.calls.helped++; )
 | 
|---|
| [4479890] | 247 |                                 }
 | 
|---|
 | 248 |                                 proc->io.target = MAX;
 | 
|---|
 | 249 |                         }
 | 
|---|
 | 250 |                 }
 | 
|---|
 | 251 | 
 | 
|---|
 | 252 | 
 | 
|---|
 | 253 |                 // Drain the local queue
 | 
|---|
| [18f7858] | 254 |                 if(try_acquire( proc->io.ctx )) {
 | 
|---|
 | 255 |                         local = __cfa_do_drain( proc->io.ctx, cltr );
 | 
|---|
 | 256 |                 }
 | 
|---|
| [4479890] | 257 | 
 | 
|---|
 | 258 |                 /* paranoid */ verify( ready_schedule_islocked() );
 | 
|---|
 | 259 |                 /* paranoid */ verify( ! __preemption_enabled() );
 | 
|---|
 | 260 |                 /* paranoid */ verify( active_processor() == proc );
 | 
|---|
| [18f7858] | 261 | 
 | 
|---|
 | 262 |                 ready_schedule_unlock();
 | 
|---|
| [4479890] | 263 |                 return local || remote;
 | 
|---|
 | 264 |         }
 | 
|---|
 | 265 | 
 | 
|---|
| [18f7858] | 266 |         bool __cfa_io_flush( processor * proc ) {
 | 
|---|
| [dddb3dd0] | 267 |                 /* paranoid */ verify( ! __preemption_enabled() );
 | 
|---|
 | 268 |                 /* paranoid */ verify( proc );
 | 
|---|
 | 269 |                 /* paranoid */ verify( proc->io.ctx );
 | 
|---|
| [1539bbd] | 270 | 
 | 
|---|
| [dddb3dd0] | 271 |                 $io_context & ctx = *proc->io.ctx;
 | 
|---|
| [78da4ab] | 272 | 
 | 
|---|
| [11054eb] | 273 |                 __ioarbiter_flush( ctx );
 | 
|---|
| [3c039b0] | 274 | 
 | 
|---|
| [18f7858] | 275 |                 if(ctx.sq.to_submit != 0) {
 | 
|---|
 | 276 |                         ioring_syscsll(ctx, 0, 0);
 | 
|---|
| [21a5bfb7] | 277 | 
 | 
|---|
 | 278 |                 }
 | 
|---|
| [61dd73d] | 279 | 
 | 
|---|
| [18f7858] | 280 |                 return __cfa_io_drain( proc );
 | 
|---|
| [61dd73d] | 281 |         }
 | 
|---|
| [f6660520] | 282 | 
 | 
|---|
| [92976d9] | 283 | //=============================================================================================
 | 
|---|
 | 284 | // I/O Submissions
 | 
|---|
 | 285 | //=============================================================================================
 | 
|---|
 | 286 | 
 | 
|---|
| [2d8f7b0] | 287 | // Submition steps :
 | 
|---|
| [e46c753] | 288 | // 1 - Allocate a queue entry. The ring already has memory for all entries but only the ones
 | 
|---|
| [2d8f7b0] | 289 | //     listed in sq.array are visible by the kernel. For those not listed, the kernel does not
 | 
|---|
 | 290 | //     offer any assurance that an entry is not being filled by multiple flags. Therefore, we
 | 
|---|
 | 291 | //     need to write an allocator that allows allocating concurrently.
 | 
|---|
 | 292 | //
 | 
|---|
| [e46c753] | 293 | // 2 - Actually fill the submit entry, this is the only simple and straightforward step.
 | 
|---|
| [2d8f7b0] | 294 | //
 | 
|---|
| [e46c753] | 295 | // 3 - Append the entry index to the array and adjust the tail accordingly. This operation
 | 
|---|
| [2d8f7b0] | 296 | //     needs to arrive to two concensus at the same time:
 | 
|---|
 | 297 | //     A - The order in which entries are listed in the array: no two threads must pick the
 | 
|---|
 | 298 | //         same index for their entries
 | 
|---|
 | 299 | //     B - When can the tail be update for the kernel. EVERY entries in the array between
 | 
|---|
 | 300 | //         head and tail must be fully filled and shouldn't ever be touched again.
 | 
|---|
 | 301 | //
 | 
|---|
| [78da4ab] | 302 |         //=============================================================================================
 | 
|---|
 | 303 |         // Allocation
 | 
|---|
 | 304 |         // for user's convenience fill the sqes from the indexes
 | 
|---|
 | 305 |         static inline void __fill(struct io_uring_sqe * out_sqes[], __u32 want, __u32 idxs[], struct $io_context * ctx)  {
 | 
|---|
 | 306 |                 struct io_uring_sqe * sqes = ctx->sq.sqes;
 | 
|---|
 | 307 |                 for(i; want) {
 | 
|---|
| [1e6ffb44] | 308 |                         // __cfadbg_print_safe(io, "Kernel I/O : filling loop\n");
 | 
|---|
| [78da4ab] | 309 |                         out_sqes[i] = &sqes[idxs[i]];
 | 
|---|
 | 310 |                 }
 | 
|---|
 | 311 |         }
 | 
|---|
| [2489d31] | 312 | 
 | 
|---|
| [78da4ab] | 313 |         // Try to directly allocate from the a given context
 | 
|---|
 | 314 |         // Not thread-safe
 | 
|---|
 | 315 |         static inline bool __alloc(struct $io_context * ctx, __u32 idxs[], __u32 want) {
 | 
|---|
 | 316 |                 __sub_ring_t & sq = ctx->sq;
 | 
|---|
 | 317 |                 const __u32 mask  = *sq.mask;
 | 
|---|
 | 318 |                 __u32 fhead = sq.free_ring.head;    // get the current head of the queue
 | 
|---|
 | 319 |                 __u32 ftail = sq.free_ring.tail;    // get the current tail of the queue
 | 
|---|
| [2489d31] | 320 | 
 | 
|---|
| [78da4ab] | 321 |                 // If we don't have enough sqes, fail
 | 
|---|
 | 322 |                 if((ftail - fhead) < want) { return false; }
 | 
|---|
| [426f60c] | 323 | 
 | 
|---|
| [78da4ab] | 324 |                 // copy all the indexes we want from the available list
 | 
|---|
 | 325 |                 for(i; want) {
 | 
|---|
| [1e6ffb44] | 326 |                         // __cfadbg_print_safe(io, "Kernel I/O : allocating loop\n");
 | 
|---|
| [78da4ab] | 327 |                         idxs[i] = sq.free_ring.array[(fhead + i) & mask];
 | 
|---|
| [6f121b8] | 328 |                 }
 | 
|---|
| [2489d31] | 329 | 
 | 
|---|
| [78da4ab] | 330 |                 // Advance the head to mark the indexes as consumed
 | 
|---|
 | 331 |                 __atomic_store_n(&sq.free_ring.head, fhead + want, __ATOMIC_RELEASE);
 | 
|---|
| [df40a56] | 332 | 
 | 
|---|
| [78da4ab] | 333 |                 // return success
 | 
|---|
 | 334 |                 return true;
 | 
|---|
 | 335 |         }
 | 
|---|
| [df40a56] | 336 | 
 | 
|---|
| [78da4ab] | 337 |         // Allocate an submit queue entry.
 | 
|---|
 | 338 |         // The kernel cannot see these entries until they are submitted, but other threads must be
 | 
|---|
 | 339 |         // able to see which entries can be used and which are already un used by an other thread
 | 
|---|
 | 340 |         // for convenience, return both the index and the pointer to the sqe
 | 
|---|
 | 341 |         // sqe == &sqes[idx]
 | 
|---|
 | 342 |         struct $io_context * cfa_io_allocate(struct io_uring_sqe * sqes[], __u32 idxs[], __u32 want) {
 | 
|---|
| [1e6ffb44] | 343 |                 // __cfadbg_print_safe(io, "Kernel I/O : attempting to allocate %u\n", want);
 | 
|---|
| [df40a56] | 344 | 
 | 
|---|
| [78da4ab] | 345 |                 disable_interrupts();
 | 
|---|
 | 346 |                 processor * proc = __cfaabi_tls.this_processor;
 | 
|---|
| [dddb3dd0] | 347 |                 $io_context * ctx = proc->io.ctx;
 | 
|---|
| [78da4ab] | 348 |                 /* paranoid */ verify( __cfaabi_tls.this_processor );
 | 
|---|
| [dddb3dd0] | 349 |                 /* paranoid */ verify( ctx );
 | 
|---|
| [78da4ab] | 350 | 
 | 
|---|
| [1e6ffb44] | 351 |                 // __cfadbg_print_safe(io, "Kernel I/O : attempting to fast allocation\n");
 | 
|---|
| [78da4ab] | 352 | 
 | 
|---|
| [dddb3dd0] | 353 |                 // We can proceed to the fast path
 | 
|---|
 | 354 |                 if( __alloc(ctx, idxs, want) ) {
 | 
|---|
 | 355 |                         // Allocation was successful
 | 
|---|
 | 356 |                         __STATS__( true, io.alloc.fast += 1; )
 | 
|---|
| [a3821fa] | 357 |                         enable_interrupts();
 | 
|---|
| [df40a56] | 358 | 
 | 
|---|
| [1e6ffb44] | 359 |                         // __cfadbg_print_safe(io, "Kernel I/O : fast allocation successful from ring %d\n", ctx->fd);
 | 
|---|
| [2fafe7e] | 360 | 
 | 
|---|
| [dddb3dd0] | 361 |                         __fill( sqes, want, idxs, ctx );
 | 
|---|
 | 362 |                         return ctx;
 | 
|---|
| [df40a56] | 363 |                 }
 | 
|---|
| [dddb3dd0] | 364 |                 // The fast path failed, fallback
 | 
|---|
 | 365 |                 __STATS__( true, io.alloc.fail += 1; )
 | 
|---|
| [df40a56] | 366 | 
 | 
|---|
| [78da4ab] | 367 |                 // Fast path failed, fallback on arbitration
 | 
|---|
| [d60d30e] | 368 |                 __STATS__( true, io.alloc.slow += 1; )
 | 
|---|
| [a3821fa] | 369 |                 enable_interrupts();
 | 
|---|
| [78da4ab] | 370 | 
 | 
|---|
| [dddb3dd0] | 371 |                 $io_arbiter * ioarb = proc->cltr->io.arbiter;
 | 
|---|
 | 372 |                 /* paranoid */ verify( ioarb );
 | 
|---|
 | 373 | 
 | 
|---|
| [1e6ffb44] | 374 |                 // __cfadbg_print_safe(io, "Kernel I/O : falling back on arbiter for allocation\n");
 | 
|---|
| [78da4ab] | 375 | 
 | 
|---|
| [11054eb] | 376 |                 struct $io_context * ret = __ioarbiter_allocate(*ioarb, idxs, want);
 | 
|---|
| [78da4ab] | 377 | 
 | 
|---|
| [1e6ffb44] | 378 |                 // __cfadbg_print_safe(io, "Kernel I/O : slow allocation completed from ring %d\n", ret->fd);
 | 
|---|
| [df40a56] | 379 | 
 | 
|---|
| [78da4ab] | 380 |                 __fill( sqes, want, idxs,ret );
 | 
|---|
 | 381 |                 return ret;
 | 
|---|
| [df40a56] | 382 |         }
 | 
|---|
 | 383 | 
 | 
|---|
| [78da4ab] | 384 |         //=============================================================================================
 | 
|---|
 | 385 |         // submission
 | 
|---|
| [2432e8e] | 386 |         static inline void __submit_only( struct $io_context * ctx, __u32 idxs[], __u32 have) {
 | 
|---|
| [78da4ab] | 387 |                 // We can proceed to the fast path
 | 
|---|
 | 388 |                 // Get the right objects
 | 
|---|
 | 389 |                 __sub_ring_t & sq = ctx->sq;
 | 
|---|
 | 390 |                 const __u32 mask  = *sq.mask;
 | 
|---|
| [dddb3dd0] | 391 |                 __u32 tail = *sq.kring.tail;
 | 
|---|
| [78da4ab] | 392 | 
 | 
|---|
 | 393 |                 // Add the sqes to the array
 | 
|---|
 | 394 |                 for( i; have ) {
 | 
|---|
| [1e6ffb44] | 395 |                         // __cfadbg_print_safe(io, "Kernel I/O : __submit loop\n");
 | 
|---|
| [78da4ab] | 396 |                         sq.kring.array[ (tail + i) & mask ] = idxs[i];
 | 
|---|
| [426f60c] | 397 |                 }
 | 
|---|
 | 398 | 
 | 
|---|
| [78da4ab] | 399 |                 // Make the sqes visible to the submitter
 | 
|---|
| [dddb3dd0] | 400 |                 __atomic_store_n(sq.kring.tail, tail + have, __ATOMIC_RELEASE);
 | 
|---|
| [e8ac228] | 401 |                 sq.to_submit += have;
 | 
|---|
| [426f60c] | 402 | 
 | 
|---|
| [d529ad0] | 403 |                 __atomic_store_n(&ctx->proc->io.pending, true, __ATOMIC_RELAXED);
 | 
|---|
 | 404 |                 __atomic_store_n(&ctx->proc->io.dirty  , true, __ATOMIC_RELAXED);
 | 
|---|
| [2432e8e] | 405 |         }
 | 
|---|
 | 406 | 
 | 
|---|
 | 407 |         static inline void __submit( struct $io_context * ctx, __u32 idxs[], __u32 have, bool lazy) {
 | 
|---|
 | 408 |                 __sub_ring_t & sq = ctx->sq;
 | 
|---|
 | 409 |                 __submit_only(ctx, idxs, have);
 | 
|---|
 | 410 | 
 | 
|---|
| [70b4aeb9] | 411 |                 if(sq.to_submit > 30) {
 | 
|---|
 | 412 |                         __tls_stats()->io.flush.full++;
 | 
|---|
| [18f7858] | 413 |                         __cfa_io_flush( ctx->proc );
 | 
|---|
| [70b4aeb9] | 414 |                 }
 | 
|---|
 | 415 |                 if(!lazy) {
 | 
|---|
 | 416 |                         __tls_stats()->io.flush.eager++;
 | 
|---|
| [18f7858] | 417 |                         __cfa_io_flush( ctx->proc );
 | 
|---|
| [dddb3dd0] | 418 |                 }
 | 
|---|
| [78da4ab] | 419 |         }
 | 
|---|
| [2489d31] | 420 | 
 | 
|---|
| [dddb3dd0] | 421 |         void cfa_io_submit( struct $io_context * inctx, __u32 idxs[], __u32 have, bool lazy ) __attribute__((nonnull (1))) {
 | 
|---|
| [1e6ffb44] | 422 |                 // __cfadbg_print_safe(io, "Kernel I/O : attempting to submit %u (%s)\n", have, lazy ? "lazy" : "eager");
 | 
|---|
| [5dadc9b7] | 423 | 
 | 
|---|
| [78da4ab] | 424 |                 disable_interrupts();
 | 
|---|
 | 425 |                 processor * proc = __cfaabi_tls.this_processor;
 | 
|---|
 | 426 |                 $io_context * ctx = proc->io.ctx;
 | 
|---|
| [dddb3dd0] | 427 |                 /* paranoid */ verify( __cfaabi_tls.this_processor );
 | 
|---|
 | 428 |                 /* paranoid */ verify( ctx );
 | 
|---|
| [e46c753] | 429 | 
 | 
|---|
| [78da4ab] | 430 |                 // Can we proceed to the fast path
 | 
|---|
| [dddb3dd0] | 431 |                 if( ctx == inctx )              // We have the right instance?
 | 
|---|
| [78da4ab] | 432 |                 {
 | 
|---|
| [dddb3dd0] | 433 |                         __submit(ctx, idxs, have, lazy);
 | 
|---|
| [e46c753] | 434 | 
 | 
|---|
| [78da4ab] | 435 |                         // Mark the instance as no longer in-use, re-enable interrupts and return
 | 
|---|
| [d60d30e] | 436 |                         __STATS__( true, io.submit.fast += 1; )
 | 
|---|
| [a3821fa] | 437 |                         enable_interrupts();
 | 
|---|
| [ece0e80] | 438 | 
 | 
|---|
| [1e6ffb44] | 439 |                         // __cfadbg_print_safe(io, "Kernel I/O : submitted on fast path\n");
 | 
|---|
| [78da4ab] | 440 |                         return;
 | 
|---|
| [e46c753] | 441 |                 }
 | 
|---|
| [d384787] | 442 | 
 | 
|---|
| [78da4ab] | 443 |                 // Fast path failed, fallback on arbitration
 | 
|---|
| [d60d30e] | 444 |                 __STATS__( true, io.submit.slow += 1; )
 | 
|---|
| [a3821fa] | 445 |                 enable_interrupts();
 | 
|---|
| [5dadc9b7] | 446 | 
 | 
|---|
| [1e6ffb44] | 447 |                 // __cfadbg_print_safe(io, "Kernel I/O : falling back on arbiter for submission\n");
 | 
|---|
| [426f60c] | 448 | 
 | 
|---|
| [11054eb] | 449 |                 __ioarbiter_submit(inctx, idxs, have, lazy);
 | 
|---|
| [78da4ab] | 450 |         }
 | 
|---|
| [2fab24e3] | 451 | 
 | 
|---|
| [78da4ab] | 452 |         //=============================================================================================
 | 
|---|
 | 453 |         // Flushing
 | 
|---|
| [426f60c] | 454 |         // Go through the ring's submit queue and release everything that has already been consumed
 | 
|---|
 | 455 |         // by io_uring
 | 
|---|
| [78da4ab] | 456 |         // This cannot be done by multiple threads
 | 
|---|
 | 457 |         static __u32 __release_sqes( struct $io_context & ctx ) {
 | 
|---|
 | 458 |                 const __u32 mask = *ctx.sq.mask;
 | 
|---|
| [732b406] | 459 | 
 | 
|---|
| [426f60c] | 460 |                 __attribute__((unused))
 | 
|---|
| [78da4ab] | 461 |                 __u32 ctail = *ctx.sq.kring.tail;    // get the current tail of the queue
 | 
|---|
 | 462 |                 __u32 chead = *ctx.sq.kring.head;        // get the current head of the queue
 | 
|---|
 | 463 |                 __u32 phead = ctx.sq.kring.released; // get the head the last time we were here
 | 
|---|
 | 464 | 
 | 
|---|
 | 465 |                 __u32 ftail = ctx.sq.free_ring.tail;  // get the current tail of the queue
 | 
|---|
| [732b406] | 466 | 
 | 
|---|
| [426f60c] | 467 |                 // the 3 fields are organized like this diagram
 | 
|---|
 | 468 |                 // except it's are ring
 | 
|---|
 | 469 |                 // ---+--------+--------+----
 | 
|---|
 | 470 |                 // ---+--------+--------+----
 | 
|---|
 | 471 |                 //    ^        ^        ^
 | 
|---|
 | 472 |                 // phead    chead    ctail
 | 
|---|
 | 473 | 
 | 
|---|
 | 474 |                 // make sure ctail doesn't wrap around and reach phead
 | 
|---|
 | 475 |                 /* paranoid */ verify(
 | 
|---|
 | 476 |                            (ctail >= chead && chead >= phead)
 | 
|---|
 | 477 |                         || (chead >= phead && phead >= ctail)
 | 
|---|
 | 478 |                         || (phead >= ctail && ctail >= chead)
 | 
|---|
 | 479 |                 );
 | 
|---|
 | 480 | 
 | 
|---|
 | 481 |                 // find the range we need to clear
 | 
|---|
| [4998155] | 482 |                 __u32 count = chead - phead;
 | 
|---|
| [426f60c] | 483 | 
 | 
|---|
| [78da4ab] | 484 |                 if(count == 0) {
 | 
|---|
 | 485 |                         return 0;
 | 
|---|
 | 486 |                 }
 | 
|---|
 | 487 | 
 | 
|---|
| [426f60c] | 488 |                 // We acquired an previous-head/current-head range
 | 
|---|
 | 489 |                 // go through the range and release the sqes
 | 
|---|
| [34b61882] | 490 |                 for( i; count ) {
 | 
|---|
| [1e6ffb44] | 491 |                         // __cfadbg_print_safe(io, "Kernel I/O : release loop\n");
 | 
|---|
| [78da4ab] | 492 |                         __u32 idx = ctx.sq.kring.array[ (phead + i) & mask ];
 | 
|---|
 | 493 |                         ctx.sq.free_ring.array[ (ftail + i) & mask ] = idx;
 | 
|---|
| [34b61882] | 494 |                 }
 | 
|---|
| [78da4ab] | 495 | 
 | 
|---|
 | 496 |                 ctx.sq.kring.released = chead;          // note up to were we processed
 | 
|---|
 | 497 |                 __atomic_store_n(&ctx.sq.free_ring.tail, ftail + count, __ATOMIC_SEQ_CST);
 | 
|---|
 | 498 | 
 | 
|---|
 | 499 |                 __ioarbiter_notify(ctx);
 | 
|---|
 | 500 | 
 | 
|---|
| [34b61882] | 501 |                 return count;
 | 
|---|
 | 502 |         }
 | 
|---|
| [35285fd] | 503 | 
 | 
|---|
| [78da4ab] | 504 | //=============================================================================================
 | 
|---|
 | 505 | // I/O Arbiter
 | 
|---|
 | 506 | //=============================================================================================
 | 
|---|
| [9f5a71eb] | 507 |         static inline bool enqueue(__outstanding_io_queue & queue, __outstanding_io & item) {
 | 
|---|
 | 508 |                 bool was_empty;
 | 
|---|
 | 509 | 
 | 
|---|
| [11054eb] | 510 |                 // Lock the list, it's not thread safe
 | 
|---|
 | 511 |                 lock( queue.lock __cfaabi_dbg_ctx2 );
 | 
|---|
 | 512 |                 {
 | 
|---|
| [9f5a71eb] | 513 |                         was_empty = empty(queue.queue);
 | 
|---|
 | 514 | 
 | 
|---|
| [11054eb] | 515 |                         // Add our request to the list
 | 
|---|
 | 516 |                         add( queue.queue, item );
 | 
|---|
 | 517 | 
 | 
|---|
 | 518 |                         // Mark as pending
 | 
|---|
 | 519 |                         __atomic_store_n( &queue.empty, false, __ATOMIC_SEQ_CST );
 | 
|---|
 | 520 |                 }
 | 
|---|
 | 521 |                 unlock( queue.lock );
 | 
|---|
 | 522 | 
 | 
|---|
| [9f5a71eb] | 523 |                 return was_empty;
 | 
|---|
| [11054eb] | 524 |         }
 | 
|---|
 | 525 | 
 | 
|---|
 | 526 |         static inline bool empty(__outstanding_io_queue & queue ) {
 | 
|---|
 | 527 |                 return __atomic_load_n( &queue.empty, __ATOMIC_SEQ_CST);
 | 
|---|
 | 528 |         }
 | 
|---|
 | 529 | 
 | 
|---|
 | 530 |         static $io_context * __ioarbiter_allocate( $io_arbiter & this, __u32 idxs[], __u32 want ) {
 | 
|---|
| [1e6ffb44] | 531 |                 // __cfadbg_print_safe(io, "Kernel I/O : arbiter allocating\n");
 | 
|---|
| [78da4ab] | 532 | 
 | 
|---|
| [d60d30e] | 533 |                 __STATS__( false, io.alloc.block += 1; )
 | 
|---|
 | 534 | 
 | 
|---|
| [78da4ab] | 535 |                 // No one has any resources left, wait for something to finish
 | 
|---|
| [11054eb] | 536 |                 // We need to add ourself to a list of pending allocs and wait for an answer
 | 
|---|
 | 537 |                 __pending_alloc pa;
 | 
|---|
 | 538 |                 pa.idxs = idxs;
 | 
|---|
 | 539 |                 pa.want = want;
 | 
|---|
| [78da4ab] | 540 | 
 | 
|---|
| [9f5a71eb] | 541 |                 enqueue(this.pending, (__outstanding_io&)pa);
 | 
|---|
 | 542 | 
 | 
|---|
 | 543 |                 wait( pa.sem );
 | 
|---|
| [78da4ab] | 544 | 
 | 
|---|
| [11054eb] | 545 |                 return pa.ctx;
 | 
|---|
| [dddb3dd0] | 546 | 
 | 
|---|
| [78da4ab] | 547 |         }
 | 
|---|
 | 548 | 
 | 
|---|
| [11054eb] | 549 |         static void __ioarbiter_notify( $io_arbiter & this, $io_context * ctx ) {
 | 
|---|
 | 550 |                 /* paranoid */ verify( !empty(this.pending.queue) );
 | 
|---|
| [78da4ab] | 551 | 
 | 
|---|
| [11054eb] | 552 |                 lock( this.pending.lock __cfaabi_dbg_ctx2 );
 | 
|---|
 | 553 |                 {
 | 
|---|
 | 554 |                         while( !empty(this.pending.queue) ) {
 | 
|---|
 | 555 |                                 __cfadbg_print_safe(io, "Kernel I/O : notifying\n");
 | 
|---|
 | 556 |                                 __u32 have = ctx->sq.free_ring.tail - ctx->sq.free_ring.head;
 | 
|---|
 | 557 |                                 __pending_alloc & pa = (__pending_alloc&)head( this.pending.queue );
 | 
|---|
| [78da4ab] | 558 | 
 | 
|---|
| [11054eb] | 559 |                                 if( have > pa.want ) goto DONE;
 | 
|---|
 | 560 |                                 drop( this.pending.queue );
 | 
|---|
| [78da4ab] | 561 | 
 | 
|---|
| [11054eb] | 562 |                                 /* paranoid */__attribute__((unused)) bool ret =
 | 
|---|
| [78da4ab] | 563 | 
 | 
|---|
| [11054eb] | 564 |                                 __alloc(ctx, pa.idxs, pa.want);
 | 
|---|
 | 565 | 
 | 
|---|
 | 566 |                                 /* paranoid */ verify( ret );
 | 
|---|
 | 567 | 
 | 
|---|
 | 568 |                                 pa.ctx = ctx;
 | 
|---|
 | 569 | 
 | 
|---|
 | 570 |                                 post( pa.sem );
 | 
|---|
 | 571 |                         }
 | 
|---|
 | 572 | 
 | 
|---|
 | 573 |                         this.pending.empty = true;
 | 
|---|
 | 574 |                         DONE:;
 | 
|---|
 | 575 |                 }
 | 
|---|
 | 576 |                 unlock( this.pending.lock );
 | 
|---|
| [78da4ab] | 577 |         }
 | 
|---|
 | 578 | 
 | 
|---|
 | 579 |         static void __ioarbiter_notify( $io_context & ctx ) {
 | 
|---|
| [11054eb] | 580 |                 if(!empty( ctx.arbiter->pending )) {
 | 
|---|
| [78da4ab] | 581 |                         __ioarbiter_notify( *ctx.arbiter, &ctx );
 | 
|---|
 | 582 |                 }
 | 
|---|
 | 583 |         }
 | 
|---|
 | 584 | 
 | 
|---|
 | 585 |         // Simply append to the pending
 | 
|---|
| [11054eb] | 586 |         static void __ioarbiter_submit( $io_context * ctx, __u32 idxs[], __u32 have, bool lazy ) {
 | 
|---|
| [78da4ab] | 587 |                 __cfadbg_print_safe(io, "Kernel I/O : submitting %u from the arbiter to context %u\n", have, ctx->fd);
 | 
|---|
 | 588 | 
 | 
|---|
 | 589 |                 __cfadbg_print_safe(io, "Kernel I/O : waiting to submit %u\n", have);
 | 
|---|
 | 590 | 
 | 
|---|
| [11054eb] | 591 |                 __external_io ei;
 | 
|---|
 | 592 |                 ei.idxs = idxs;
 | 
|---|
 | 593 |                 ei.have = have;
 | 
|---|
 | 594 |                 ei.lazy = lazy;
 | 
|---|
| [78da4ab] | 595 | 
 | 
|---|
| [9f5a71eb] | 596 |                 bool we = enqueue(ctx->ext_sq, (__outstanding_io&)ei);
 | 
|---|
 | 597 | 
 | 
|---|
| [d529ad0] | 598 |                 __atomic_store_n(&ctx->proc->io.pending, true, __ATOMIC_SEQ_CST);
 | 
|---|
| [9f5a71eb] | 599 | 
 | 
|---|
 | 600 |                 if( we ) {
 | 
|---|
 | 601 |                         sigval_t value = { PREEMPT_IO };
 | 
|---|
 | 602 |                         pthread_sigqueue(ctx->proc->kernel_thread, SIGUSR1, value);
 | 
|---|
 | 603 |                 }
 | 
|---|
 | 604 | 
 | 
|---|
 | 605 |                 wait( ei.sem );
 | 
|---|
| [78da4ab] | 606 | 
 | 
|---|
 | 607 |                 __cfadbg_print_safe(io, "Kernel I/O : %u submitted from arbiter\n", have);
 | 
|---|
 | 608 |         }
 | 
|---|
 | 609 | 
 | 
|---|
| [11054eb] | 610 |         static void __ioarbiter_flush( $io_context & ctx ) {
 | 
|---|
 | 611 |                 if(!empty( ctx.ext_sq )) {
 | 
|---|
 | 612 |                         __STATS__( false, io.flush.external += 1; )
 | 
|---|
| [78da4ab] | 613 | 
 | 
|---|
| [11054eb] | 614 |                         __cfadbg_print_safe(io, "Kernel I/O : arbiter flushing\n");
 | 
|---|
| [d60d30e] | 615 | 
 | 
|---|
| [11054eb] | 616 |                         lock( ctx.ext_sq.lock __cfaabi_dbg_ctx2 );
 | 
|---|
 | 617 |                         {
 | 
|---|
 | 618 |                                 while( !empty(ctx.ext_sq.queue) ) {
 | 
|---|
 | 619 |                                         __external_io & ei = (__external_io&)drop( ctx.ext_sq.queue );
 | 
|---|
| [78da4ab] | 620 | 
 | 
|---|
| [2432e8e] | 621 |                                         __submit_only(&ctx, ei.idxs, ei.have);
 | 
|---|
| [78da4ab] | 622 | 
 | 
|---|
| [11054eb] | 623 |                                         post( ei.sem );
 | 
|---|
 | 624 |                                 }
 | 
|---|
 | 625 | 
 | 
|---|
 | 626 |                                 ctx.ext_sq.empty = true;
 | 
|---|
 | 627 |                         }
 | 
|---|
 | 628 |                         unlock(ctx.ext_sq.lock );
 | 
|---|
 | 629 |                 }
 | 
|---|
| [78da4ab] | 630 |         }
 | 
|---|
| [7ef162b2] | 631 | 
 | 
|---|
| [d3605f8] | 632 |         #if defined(CFA_WITH_IO_URING_IDLE)
 | 
|---|
 | 633 |                 bool __kernel_read(processor * proc, io_future_t & future, iovec & iov, int fd) {
 | 
|---|
| [6ddef36] | 634 |                         $io_context * ctx = proc->io.ctx;
 | 
|---|
 | 635 |                         /* paranoid */ verify( ! __preemption_enabled() );
 | 
|---|
 | 636 |                         /* paranoid */ verify( proc == __cfaabi_tls.this_processor );
 | 
|---|
 | 637 |                         /* paranoid */ verify( ctx );
 | 
|---|
| [7ef162b2] | 638 | 
 | 
|---|
| [6ddef36] | 639 |                         __u32 idx;
 | 
|---|
 | 640 |                         struct io_uring_sqe * sqe;
 | 
|---|
| [7ef162b2] | 641 | 
 | 
|---|
| [6ddef36] | 642 |                         // We can proceed to the fast path
 | 
|---|
| [010636f] | 643 |                         if( !__alloc(ctx, &idx, 1) ) {
 | 
|---|
 | 644 |                                 /* paranoid */ verify( false ); // for now check if this happens, next time just abort the sleep.
 | 
|---|
 | 645 |                                 return false;
 | 
|---|
 | 646 |                         }
 | 
|---|
| [6ddef36] | 647 | 
 | 
|---|
 | 648 |                         // Allocation was successful
 | 
|---|
 | 649 |                         __fill( &sqe, 1, &idx, ctx );
 | 
|---|
 | 650 | 
 | 
|---|
 | 651 |                         sqe->user_data = (uintptr_t)&future;
 | 
|---|
 | 652 |                         sqe->flags = 0;
 | 
|---|
| [a1f3d93] | 653 |                         sqe->fd = fd;
 | 
|---|
| [6ddef36] | 654 |                         sqe->off = 0;
 | 
|---|
| [d3605f8] | 655 |                         sqe->ioprio = 0;
 | 
|---|
| [6ddef36] | 656 |                         sqe->fsync_flags = 0;
 | 
|---|
 | 657 |                         sqe->__pad2[0] = 0;
 | 
|---|
 | 658 |                         sqe->__pad2[1] = 0;
 | 
|---|
 | 659 |                         sqe->__pad2[2] = 0;
 | 
|---|
| [d3605f8] | 660 | 
 | 
|---|
 | 661 |                         #if defined(CFA_HAVE_IORING_OP_READ)
 | 
|---|
 | 662 |                                 sqe->opcode = IORING_OP_READ;
 | 
|---|
 | 663 |                                 sqe->addr = (uint64_t)iov.iov_base;
 | 
|---|
 | 664 |                                 sqe->len = iov.iov_len;
 | 
|---|
 | 665 |                         #elif defined(CFA_HAVE_READV) && defined(CFA_HAVE_IORING_OP_READV)
 | 
|---|
 | 666 |                                 sqe->opcode = IORING_OP_READV;
 | 
|---|
 | 667 |                                 sqe->addr = (uintptr_t)&iov;
 | 
|---|
 | 668 |                                 sqe->len = 1;
 | 
|---|
 | 669 |                         #else
 | 
|---|
 | 670 |                                 #error CFA_WITH_IO_URING_IDLE but none of CFA_HAVE_READV, CFA_HAVE_IORING_OP_READV or CFA_HAVE_IORING_OP_READ defined
 | 
|---|
 | 671 |                         #endif
 | 
|---|
| [6ddef36] | 672 | 
 | 
|---|
 | 673 |                         asm volatile("": : :"memory");
 | 
|---|
 | 674 | 
 | 
|---|
 | 675 |                         /* paranoid */ verify( sqe->user_data == (uintptr_t)&future );
 | 
|---|
| [010636f] | 676 |                         __submit_only( ctx, &idx, 1 );
 | 
|---|
| [6ddef36] | 677 | 
 | 
|---|
 | 678 |                         /* paranoid */ verify( proc == __cfaabi_tls.this_processor );
 | 
|---|
 | 679 |                         /* paranoid */ verify( ! __preemption_enabled() );
 | 
|---|
| [078fb05] | 680 | 
 | 
|---|
 | 681 |                         return true;
 | 
|---|
| [6ddef36] | 682 |                 }
 | 
|---|
| [18f7858] | 683 | 
 | 
|---|
 | 684 |                 void __cfa_io_idle( processor * proc ) {
 | 
|---|
 | 685 |                         iovec iov;
 | 
|---|
 | 686 |                         __atomic_acquire( &proc->io.ctx->cq.lock );
 | 
|---|
 | 687 | 
 | 
|---|
| [262fafd9] | 688 |                         __attribute__((used)) volatile bool was_reset = false;
 | 
|---|
 | 689 | 
 | 
|---|
| [d5cdbed] | 690 |                         with( proc->idle_wctx) {
 | 
|---|
| [18f7858] | 691 | 
 | 
|---|
| [37a3aa23] | 692 |                                 // Do we already have a pending read
 | 
|---|
 | 693 |                                 if(available(*ftr)) {
 | 
|---|
 | 694 |                                         // There is no pending read, we need to add one
 | 
|---|
 | 695 |                                         reset(*ftr);
 | 
|---|
 | 696 | 
 | 
|---|
 | 697 |                                         iov.iov_base = rdbuf;
 | 
|---|
 | 698 |                                         iov.iov_len  = sizeof(eventfd_t);
 | 
|---|
 | 699 |                                         __kernel_read(proc, *ftr, iov, evfd );
 | 
|---|
| [262fafd9] | 700 |                                         ftr->result = 0xDEADDEAD;
 | 
|---|
 | 701 |                                         *((eventfd_t *)rdbuf) = 0xDEADDEADDEADDEAD;
 | 
|---|
 | 702 |                                         was_reset = true;
 | 
|---|
| [37a3aa23] | 703 |                                 }
 | 
|---|
| [18f7858] | 704 |                         }
 | 
|---|
 | 705 | 
 | 
|---|
| [010636f] | 706 |                         if( !__atomic_load_n( &proc->do_terminate, __ATOMIC_SEQ_CST ) ) {
 | 
|---|
 | 707 |                                 __ioarbiter_flush( *proc->io.ctx );
 | 
|---|
| [262fafd9] | 708 |                                 proc->idle_wctx.sleep_time = rdtscl();
 | 
|---|
| [010636f] | 709 |                                 ioring_syscsll( *proc->io.ctx, 1, IORING_ENTER_GETEVENTS);
 | 
|---|
 | 710 |                         }
 | 
|---|
| [18f7858] | 711 | 
 | 
|---|
| [d5cdbed] | 712 |                         ready_schedule_lock();
 | 
|---|
| [18f7858] | 713 |                         __cfa_do_drain( proc->io.ctx, proc->cltr );
 | 
|---|
| [d5cdbed] | 714 |                         ready_schedule_unlock();
 | 
|---|
| [262fafd9] | 715 | 
 | 
|---|
 | 716 |                         asm volatile ("" :: "m" (was_reset));
 | 
|---|
| [18f7858] | 717 |                 }
 | 
|---|
| [6ddef36] | 718 |         #endif
 | 
|---|
| [47746a2] | 719 | #endif
 | 
|---|