[3e2b9c9] | 1 | // |
---|
| 2 | // Cforall Version 1.0.0 Copyright (C) 2020 University of Waterloo |
---|
| 3 | // |
---|
| 4 | // The contents of this file are covered under the licence agreement in the |
---|
| 5 | // file "LICENCE" distributed with Cforall. |
---|
| 6 | // |
---|
[454f478] | 7 | // io/types.hfa -- PRIVATE |
---|
| 8 | // Types used by the I/O subsystem |
---|
[3e2b9c9] | 9 | // |
---|
| 10 | // Author : Thierry Delisle |
---|
| 11 | // Created On : Fri Jul 31 16:22:47 2020 |
---|
| 12 | // Last Modified By : |
---|
| 13 | // Last Modified On : |
---|
| 14 | // Update Count : |
---|
| 15 | // |
---|
| 16 | |
---|
| 17 | #pragma once |
---|
| 18 | |
---|
[b035046] | 19 | #include <limits.h> |
---|
| 20 | |
---|
[930e57e] | 21 | extern "C" { |
---|
| 22 | #include <linux/types.h> |
---|
| 23 | } |
---|
[4998155] | 24 | |
---|
[9db2c92] | 25 | #include "bits/locks.hfa" |
---|
[40a606d2] | 26 | #include "iofwd.hfa" |
---|
[454f478] | 27 | #include "kernel/fwd.hfa" |
---|
[3e2b9c9] | 28 | |
---|
[930e57e] | 29 | #if defined(CFA_HAVE_LINUX_IO_URING_H) |
---|
[78da4ab] | 30 | #include "monitor.hfa" |
---|
[2fafe7e] | 31 | |
---|
[78da4ab] | 32 | struct processor; |
---|
[8bee858] | 33 | monitor io_arbiter$; |
---|
[2fafe7e] | 34 | |
---|
[3e2b9c9] | 35 | //----------------------------------------------------------------------- |
---|
| 36 | // Ring Data structure |
---|
[26544f9] | 37 | // represent the io_uring submission ring which contains operations that will be sent to io_uring for processing |
---|
| 38 | struct __sub_ring_t { |
---|
| 39 | // lock needed because remote processors might need to flush the instance |
---|
| 40 | __spinlock_t lock; |
---|
| 41 | |
---|
[78da4ab] | 42 | struct { |
---|
| 43 | // Head and tail of the ring (associated with array) |
---|
| 44 | volatile __u32 * head; // one passed last index consumed by the kernel |
---|
| 45 | volatile __u32 * tail; // one passed last index visible to the kernel |
---|
| 46 | volatile __u32 released; // one passed last index released back to the free list |
---|
| 47 | |
---|
| 48 | // The actual kernel ring which uses head/tail |
---|
| 49 | // indexes into the sqes arrays |
---|
| 50 | __u32 * array; |
---|
| 51 | } kring; |
---|
| 52 | |
---|
| 53 | struct { |
---|
| 54 | volatile __u32 head; |
---|
| 55 | volatile __u32 tail; |
---|
| 56 | // The ring which contains free allocations |
---|
| 57 | // indexes into the sqes arrays |
---|
| 58 | __u32 * array; |
---|
| 59 | } free_ring; |
---|
| 60 | |
---|
| 61 | // number of sqes to submit on next system call. |
---|
[26544f9] | 62 | volatile __u32 to_submit; |
---|
[3e2b9c9] | 63 | |
---|
| 64 | // number of entries and mask to go with it |
---|
[4998155] | 65 | const __u32 * num; |
---|
| 66 | const __u32 * mask; |
---|
[3e2b9c9] | 67 | |
---|
[78da4ab] | 68 | // Submission flags, currently only IORING_SETUP_SQPOLL |
---|
[4998155] | 69 | __u32 * flags; |
---|
[3e2b9c9] | 70 | |
---|
[78da4ab] | 71 | // number of sqes not submitted |
---|
| 72 | // From documentation : [dropped] is incremented for each invalid submission queue entry encountered in the ring buffer. |
---|
[4998155] | 73 | __u32 * dropped; |
---|
[3e2b9c9] | 74 | |
---|
| 75 | // A buffer of sqes (not the actual ring) |
---|
[78da4ab] | 76 | struct io_uring_sqe * sqes; |
---|
[3e2b9c9] | 77 | |
---|
| 78 | // The location and size of the mmaped area |
---|
| 79 | void * ring_ptr; |
---|
| 80 | size_t ring_sz; |
---|
[26544f9] | 81 | |
---|
| 82 | // for debug purposes, whether or not the last flush was due to a arbiter flush |
---|
| 83 | bool last_external; |
---|
[3e2b9c9] | 84 | }; |
---|
| 85 | |
---|
[26544f9] | 86 | // represent the io_uring completion ring which contains operations that have completed |
---|
[78da4ab] | 87 | struct __cmp_ring_t { |
---|
[26544f9] | 88 | // needed because remote processors can help drain the buffer |
---|
| 89 | volatile bool try_lock; |
---|
[4ecc35a] | 90 | |
---|
[26544f9] | 91 | // id of the ring, used for the helping/topology algorithms |
---|
[78a580d] | 92 | unsigned id; |
---|
| 93 | |
---|
[26544f9] | 94 | // timestamp from last time it was drained |
---|
[78a580d] | 95 | unsigned long long ts; |
---|
| 96 | |
---|
[3e2b9c9] | 97 | // Head and tail of the ring |
---|
[4998155] | 98 | volatile __u32 * head; |
---|
| 99 | volatile __u32 * tail; |
---|
[3e2b9c9] | 100 | |
---|
| 101 | // number of entries and mask to go with it |
---|
[4998155] | 102 | const __u32 * mask; |
---|
| 103 | const __u32 * num; |
---|
[3e2b9c9] | 104 | |
---|
[78da4ab] | 105 | // I don't know what this value is for |
---|
[4998155] | 106 | __u32 * overflow; |
---|
[3e2b9c9] | 107 | |
---|
| 108 | // the kernel ring |
---|
[426f60c] | 109 | volatile struct io_uring_cqe * cqes; |
---|
[3e2b9c9] | 110 | |
---|
| 111 | // The location and size of the mmaped area |
---|
| 112 | void * ring_ptr; |
---|
| 113 | size_t ring_sz; |
---|
| 114 | }; |
---|
| 115 | |
---|
[26544f9] | 116 | // struct representing an io operation that still needs processing |
---|
| 117 | // actual operations are expected to inherit from this |
---|
[11054eb] | 118 | struct __outstanding_io { |
---|
[26544f9] | 119 | // intrusive link fields |
---|
[9d47c1f] | 120 | inline dlink(__outstanding_io); |
---|
[26544f9] | 121 | |
---|
| 122 | // primitive on which to block until the io is processed |
---|
[a55472cc] | 123 | oneshot waitctx; |
---|
[11054eb] | 124 | }; |
---|
[9d47c1f] | 125 | P9_EMBEDDED( __outstanding_io, dlink(__outstanding_io) ) |
---|
[11054eb] | 126 | |
---|
[26544f9] | 127 | // queue of operations that are outstanding |
---|
[11054eb] | 128 | struct __outstanding_io_queue { |
---|
[26544f9] | 129 | // spinlock for protection |
---|
| 130 | // TODO: changing to a lock that blocks, I haven't examined whether it should be a kernel or user lock |
---|
[11054eb] | 131 | __spinlock_t lock; |
---|
[26544f9] | 132 | |
---|
| 133 | // the actual queue |
---|
[9d47c1f] | 134 | dlist(__outstanding_io) queue; |
---|
[26544f9] | 135 | |
---|
| 136 | // volatile used to avoid the need for taking the lock if it's empty |
---|
[11054eb] | 137 | volatile bool empty; |
---|
| 138 | }; |
---|
| 139 | |
---|
[26544f9] | 140 | // struct representing an operation that was submitted |
---|
[11054eb] | 141 | struct __external_io { |
---|
[26544f9] | 142 | // inherits from outstanding io |
---|
[11054eb] | 143 | inline __outstanding_io; |
---|
[26544f9] | 144 | |
---|
| 145 | // pointer and count to an array of ids to be submitted |
---|
[11054eb] | 146 | __u32 * idxs; |
---|
| 147 | __u32 have; |
---|
[26544f9] | 148 | |
---|
| 149 | // whether or not these can be accumulated before flushing the buffer |
---|
[11054eb] | 150 | bool lazy; |
---|
| 151 | }; |
---|
| 152 | |
---|
[26544f9] | 153 | // complete io_context, contains all the data for io submission and completion |
---|
[8bee858] | 154 | struct __attribute__((aligned(64))) io_context$ { |
---|
[26544f9] | 155 | // arbiter, used in cases where threads for migrated at unfortunate moments |
---|
[8bee858] | 156 | io_arbiter$ * arbiter; |
---|
[26544f9] | 157 | |
---|
| 158 | // which prcessor the context is tied to |
---|
[1756e08] | 159 | struct processor * proc; |
---|
[78da4ab] | 160 | |
---|
[26544f9] | 161 | // queue of io submissions that haven't beeen processed. |
---|
[11054eb] | 162 | __outstanding_io_queue ext_sq; |
---|
[78da4ab] | 163 | |
---|
[26544f9] | 164 | // io_uring ring data structures |
---|
[78da4ab] | 165 | struct __sub_ring_t sq; |
---|
| 166 | struct __cmp_ring_t cq; |
---|
[26544f9] | 167 | |
---|
| 168 | // flag the io_uring rings where created with |
---|
[4998155] | 169 | __u32 ring_flags; |
---|
[26544f9] | 170 | |
---|
| 171 | // file descriptor that identifies the io_uring instance |
---|
[3e2b9c9] | 172 | int fd; |
---|
[78da4ab] | 173 | }; |
---|
| 174 | |
---|
[26544f9] | 175 | // short hand to check when the io_context was last processed (io drained) |
---|
[8bee858] | 176 | static inline unsigned long long ts(io_context$ *& this) { |
---|
[e71e94a] | 177 | const __u32 head = *this->cq.head; |
---|
| 178 | const __u32 tail = *this->cq.tail; |
---|
| 179 | |
---|
[26544f9] | 180 | // if there is no pending completions, just pretend it's infinetely recent |
---|
[b035046] | 181 | if(head == tail) return ULLONG_MAX; |
---|
[e71e94a] | 182 | |
---|
[4479890] | 183 | return this->cq.ts; |
---|
| 184 | } |
---|
| 185 | |
---|
[26544f9] | 186 | // structure represeting allocations that couldn't succeed locally |
---|
[11054eb] | 187 | struct __pending_alloc { |
---|
[26544f9] | 188 | // inherit from outstanding io |
---|
[11054eb] | 189 | inline __outstanding_io; |
---|
[26544f9] | 190 | |
---|
| 191 | // array and size of the desired allocation |
---|
[11054eb] | 192 | __u32 * idxs; |
---|
| 193 | __u32 want; |
---|
[26544f9] | 194 | |
---|
| 195 | // output param, the context the io was allocated from |
---|
[8bee858] | 196 | io_context$ * ctx; |
---|
[11054eb] | 197 | }; |
---|
| 198 | |
---|
[26544f9] | 199 | // arbiter that handles cases where the context tied to the local processor is unable to satisfy the io |
---|
[8bee858] | 200 | monitor __attribute__((aligned(64))) io_arbiter$ { |
---|
[26544f9] | 201 | // contains a queue of io for pending allocations |
---|
[11054eb] | 202 | __outstanding_io_queue pending; |
---|
[3e2b9c9] | 203 | }; |
---|
| 204 | |
---|
| 205 | //----------------------------------------------------------------------- |
---|
| 206 | // Misc |
---|
| 207 | // Weirdly, some systems that do support io_uring don't actually define these |
---|
| 208 | #ifdef __alpha__ |
---|
| 209 | /* |
---|
| 210 | * alpha is the only exception, all other architectures |
---|
| 211 | * have common numbers for new system calls. |
---|
| 212 | */ |
---|
| 213 | #ifndef __NR_io_uring_setup |
---|
| 214 | #define __NR_io_uring_setup 535 |
---|
| 215 | #endif |
---|
| 216 | #ifndef __NR_io_uring_enter |
---|
| 217 | #define __NR_io_uring_enter 536 |
---|
| 218 | #endif |
---|
| 219 | #ifndef __NR_io_uring_register |
---|
| 220 | #define __NR_io_uring_register 537 |
---|
| 221 | #endif |
---|
| 222 | #else /* !__alpha__ */ |
---|
| 223 | #ifndef __NR_io_uring_setup |
---|
| 224 | #define __NR_io_uring_setup 425 |
---|
| 225 | #endif |
---|
| 226 | #ifndef __NR_io_uring_enter |
---|
| 227 | #define __NR_io_uring_enter 426 |
---|
| 228 | #endif |
---|
| 229 | #ifndef __NR_io_uring_register |
---|
| 230 | #define __NR_io_uring_register 427 |
---|
| 231 | #endif |
---|
| 232 | #endif |
---|
| 233 | |
---|
[8bee858] | 234 | // void __ioctx_prepare_block(io_context$ & ctx); |
---|
[b2f3880] | 235 | #endif |
---|