[3e2b9c9] | 1 | // |
---|
| 2 | // Cforall Version 1.0.0 Copyright (C) 2020 University of Waterloo |
---|
| 3 | // |
---|
| 4 | // The contents of this file are covered under the licence agreement in the |
---|
| 5 | // file "LICENCE" distributed with Cforall. |
---|
| 6 | // |
---|
[454f478] | 7 | // io/types.hfa -- PRIVATE |
---|
| 8 | // Types used by the I/O subsystem |
---|
[3e2b9c9] | 9 | // |
---|
| 10 | // Author : Thierry Delisle |
---|
| 11 | // Created On : Fri Jul 31 16:22:47 2020 |
---|
| 12 | // Last Modified By : |
---|
| 13 | // Last Modified On : |
---|
| 14 | // Update Count : |
---|
| 15 | // |
---|
| 16 | |
---|
| 17 | #pragma once |
---|
| 18 | |
---|
[b035046] | 19 | #include <limits.h> |
---|
| 20 | |
---|
[930e57e] | 21 | extern "C" { |
---|
| 22 | #include <linux/types.h> |
---|
| 23 | } |
---|
[4998155] | 24 | |
---|
[9db2c92] | 25 | #include "bits/locks.hfa" |
---|
[11054eb] | 26 | #include "bits/queue.hfa" |
---|
[40a606d2] | 27 | #include "iofwd.hfa" |
---|
[454f478] | 28 | #include "kernel/fwd.hfa" |
---|
[3e2b9c9] | 29 | |
---|
[930e57e] | 30 | #if defined(CFA_HAVE_LINUX_IO_URING_H) |
---|
[78da4ab] | 31 | #include "bits/sequence.hfa" |
---|
| 32 | #include "monitor.hfa" |
---|
[2fafe7e] | 33 | |
---|
[78da4ab] | 34 | struct processor; |
---|
[8bee858] | 35 | monitor io_arbiter$; |
---|
[2fafe7e] | 36 | |
---|
[3e2b9c9] | 37 | //----------------------------------------------------------------------- |
---|
| 38 | // Ring Data structure |
---|
[26544f9] | 39 | // represent the io_uring submission ring which contains operations that will be sent to io_uring for processing |
---|
| 40 | struct __sub_ring_t { |
---|
| 41 | // lock needed because remote processors might need to flush the instance |
---|
| 42 | __spinlock_t lock; |
---|
| 43 | |
---|
[78da4ab] | 44 | struct { |
---|
| 45 | // Head and tail of the ring (associated with array) |
---|
| 46 | volatile __u32 * head; // one passed last index consumed by the kernel |
---|
| 47 | volatile __u32 * tail; // one passed last index visible to the kernel |
---|
| 48 | volatile __u32 released; // one passed last index released back to the free list |
---|
| 49 | |
---|
| 50 | // The actual kernel ring which uses head/tail |
---|
| 51 | // indexes into the sqes arrays |
---|
| 52 | __u32 * array; |
---|
| 53 | } kring; |
---|
| 54 | |
---|
| 55 | struct { |
---|
| 56 | volatile __u32 head; |
---|
| 57 | volatile __u32 tail; |
---|
| 58 | // The ring which contains free allocations |
---|
| 59 | // indexes into the sqes arrays |
---|
| 60 | __u32 * array; |
---|
| 61 | } free_ring; |
---|
| 62 | |
---|
| 63 | // number of sqes to submit on next system call. |
---|
[26544f9] | 64 | volatile __u32 to_submit; |
---|
[3e2b9c9] | 65 | |
---|
| 66 | // number of entries and mask to go with it |
---|
[4998155] | 67 | const __u32 * num; |
---|
| 68 | const __u32 * mask; |
---|
[3e2b9c9] | 69 | |
---|
[78da4ab] | 70 | // Submission flags, currently only IORING_SETUP_SQPOLL |
---|
[4998155] | 71 | __u32 * flags; |
---|
[3e2b9c9] | 72 | |
---|
[78da4ab] | 73 | // number of sqes not submitted |
---|
| 74 | // From documentation : [dropped] is incremented for each invalid submission queue entry encountered in the ring buffer. |
---|
[4998155] | 75 | __u32 * dropped; |
---|
[3e2b9c9] | 76 | |
---|
| 77 | // A buffer of sqes (not the actual ring) |
---|
[78da4ab] | 78 | struct io_uring_sqe * sqes; |
---|
[3e2b9c9] | 79 | |
---|
| 80 | // The location and size of the mmaped area |
---|
| 81 | void * ring_ptr; |
---|
| 82 | size_t ring_sz; |
---|
[26544f9] | 83 | |
---|
| 84 | // for debug purposes, whether or not the last flush was due to a arbiter flush |
---|
| 85 | bool last_external; |
---|
[3e2b9c9] | 86 | }; |
---|
| 87 | |
---|
[26544f9] | 88 | // represent the io_uring completion ring which contains operations that have completed |
---|
[78da4ab] | 89 | struct __cmp_ring_t { |
---|
[26544f9] | 90 | // needed because remote processors can help drain the buffer |
---|
| 91 | volatile bool try_lock; |
---|
[4ecc35a] | 92 | |
---|
[26544f9] | 93 | // id of the ring, used for the helping/topology algorithms |
---|
[78a580d] | 94 | unsigned id; |
---|
| 95 | |
---|
[26544f9] | 96 | // timestamp from last time it was drained |
---|
[78a580d] | 97 | unsigned long long ts; |
---|
| 98 | |
---|
[3e2b9c9] | 99 | // Head and tail of the ring |
---|
[4998155] | 100 | volatile __u32 * head; |
---|
| 101 | volatile __u32 * tail; |
---|
[3e2b9c9] | 102 | |
---|
| 103 | // number of entries and mask to go with it |
---|
[4998155] | 104 | const __u32 * mask; |
---|
| 105 | const __u32 * num; |
---|
[3e2b9c9] | 106 | |
---|
[78da4ab] | 107 | // I don't know what this value is for |
---|
[4998155] | 108 | __u32 * overflow; |
---|
[3e2b9c9] | 109 | |
---|
| 110 | // the kernel ring |
---|
[426f60c] | 111 | volatile struct io_uring_cqe * cqes; |
---|
[3e2b9c9] | 112 | |
---|
| 113 | // The location and size of the mmaped area |
---|
| 114 | void * ring_ptr; |
---|
| 115 | size_t ring_sz; |
---|
| 116 | }; |
---|
| 117 | |
---|
[26544f9] | 118 | // struct representing an io operation that still needs processing |
---|
| 119 | // actual operations are expected to inherit from this |
---|
[11054eb] | 120 | struct __outstanding_io { |
---|
[26544f9] | 121 | // intrusive link fields |
---|
[11054eb] | 122 | inline Colable; |
---|
[26544f9] | 123 | |
---|
| 124 | // primitive on which to block until the io is processed |
---|
[a55472cc] | 125 | oneshot waitctx; |
---|
[11054eb] | 126 | }; |
---|
| 127 | static inline __outstanding_io *& Next( __outstanding_io * n ) { return (__outstanding_io *)Next( (Colable *)n ); } |
---|
| 128 | |
---|
[26544f9] | 129 | // queue of operations that are outstanding |
---|
[11054eb] | 130 | struct __outstanding_io_queue { |
---|
[26544f9] | 131 | // spinlock for protection |
---|
| 132 | // TODO: changing to a lock that blocks, I haven't examined whether it should be a kernel or user lock |
---|
[11054eb] | 133 | __spinlock_t lock; |
---|
[26544f9] | 134 | |
---|
| 135 | // the actual queue |
---|
[11054eb] | 136 | Queue(__outstanding_io) queue; |
---|
[26544f9] | 137 | |
---|
| 138 | // volatile used to avoid the need for taking the lock if it's empty |
---|
[11054eb] | 139 | volatile bool empty; |
---|
| 140 | }; |
---|
| 141 | |
---|
[26544f9] | 142 | // struct representing an operation that was submitted |
---|
[11054eb] | 143 | struct __external_io { |
---|
[26544f9] | 144 | // inherits from outstanding io |
---|
[11054eb] | 145 | inline __outstanding_io; |
---|
[26544f9] | 146 | |
---|
| 147 | // pointer and count to an array of ids to be submitted |
---|
[11054eb] | 148 | __u32 * idxs; |
---|
| 149 | __u32 have; |
---|
[26544f9] | 150 | |
---|
| 151 | // whether or not these can be accumulated before flushing the buffer |
---|
[11054eb] | 152 | bool lazy; |
---|
| 153 | }; |
---|
| 154 | |
---|
[26544f9] | 155 | // complete io_context, contains all the data for io submission and completion |
---|
[8bee858] | 156 | struct __attribute__((aligned(64))) io_context$ { |
---|
[26544f9] | 157 | // arbiter, used in cases where threads for migrated at unfortunate moments |
---|
[8bee858] | 158 | io_arbiter$ * arbiter; |
---|
[26544f9] | 159 | |
---|
| 160 | // which prcessor the context is tied to |
---|
[1756e08] | 161 | struct processor * proc; |
---|
[78da4ab] | 162 | |
---|
[26544f9] | 163 | // queue of io submissions that haven't beeen processed. |
---|
[11054eb] | 164 | __outstanding_io_queue ext_sq; |
---|
[78da4ab] | 165 | |
---|
[26544f9] | 166 | // io_uring ring data structures |
---|
[78da4ab] | 167 | struct __sub_ring_t sq; |
---|
| 168 | struct __cmp_ring_t cq; |
---|
[26544f9] | 169 | |
---|
| 170 | // flag the io_uring rings where created with |
---|
[4998155] | 171 | __u32 ring_flags; |
---|
[26544f9] | 172 | |
---|
| 173 | // file descriptor that identifies the io_uring instance |
---|
[3e2b9c9] | 174 | int fd; |
---|
[78da4ab] | 175 | }; |
---|
| 176 | |
---|
[26544f9] | 177 | // short hand to check when the io_context was last processed (io drained) |
---|
[8bee858] | 178 | static inline unsigned long long ts(io_context$ *& this) { |
---|
[e71e94a] | 179 | const __u32 head = *this->cq.head; |
---|
| 180 | const __u32 tail = *this->cq.tail; |
---|
| 181 | |
---|
[26544f9] | 182 | // if there is no pending completions, just pretend it's infinetely recent |
---|
[b035046] | 183 | if(head == tail) return ULLONG_MAX; |
---|
[e71e94a] | 184 | |
---|
[4479890] | 185 | return this->cq.ts; |
---|
| 186 | } |
---|
| 187 | |
---|
[26544f9] | 188 | // structure represeting allocations that couldn't succeed locally |
---|
[11054eb] | 189 | struct __pending_alloc { |
---|
[26544f9] | 190 | // inherit from outstanding io |
---|
[11054eb] | 191 | inline __outstanding_io; |
---|
[26544f9] | 192 | |
---|
| 193 | // array and size of the desired allocation |
---|
[11054eb] | 194 | __u32 * idxs; |
---|
| 195 | __u32 want; |
---|
[26544f9] | 196 | |
---|
| 197 | // output param, the context the io was allocated from |
---|
[8bee858] | 198 | io_context$ * ctx; |
---|
[11054eb] | 199 | }; |
---|
| 200 | |
---|
[26544f9] | 201 | // arbiter that handles cases where the context tied to the local processor is unable to satisfy the io |
---|
[8bee858] | 202 | monitor __attribute__((aligned(64))) io_arbiter$ { |
---|
[26544f9] | 203 | // contains a queue of io for pending allocations |
---|
[11054eb] | 204 | __outstanding_io_queue pending; |
---|
[3e2b9c9] | 205 | }; |
---|
| 206 | |
---|
| 207 | //----------------------------------------------------------------------- |
---|
| 208 | // Misc |
---|
| 209 | // Weirdly, some systems that do support io_uring don't actually define these |
---|
| 210 | #ifdef __alpha__ |
---|
| 211 | /* |
---|
| 212 | * alpha is the only exception, all other architectures |
---|
| 213 | * have common numbers for new system calls. |
---|
| 214 | */ |
---|
| 215 | #ifndef __NR_io_uring_setup |
---|
| 216 | #define __NR_io_uring_setup 535 |
---|
| 217 | #endif |
---|
| 218 | #ifndef __NR_io_uring_enter |
---|
| 219 | #define __NR_io_uring_enter 536 |
---|
| 220 | #endif |
---|
| 221 | #ifndef __NR_io_uring_register |
---|
| 222 | #define __NR_io_uring_register 537 |
---|
| 223 | #endif |
---|
| 224 | #else /* !__alpha__ */ |
---|
| 225 | #ifndef __NR_io_uring_setup |
---|
| 226 | #define __NR_io_uring_setup 425 |
---|
| 227 | #endif |
---|
| 228 | #ifndef __NR_io_uring_enter |
---|
| 229 | #define __NR_io_uring_enter 426 |
---|
| 230 | #endif |
---|
| 231 | #ifndef __NR_io_uring_register |
---|
| 232 | #define __NR_io_uring_register 427 |
---|
| 233 | #endif |
---|
| 234 | #endif |
---|
| 235 | |
---|
[8bee858] | 236 | // void __ioctx_prepare_block(io_context$ & ctx); |
---|
[b2f3880] | 237 | #endif |
---|