1 | // |
---|
2 | // Cforall Version 1.0.0 Copyright (C) 2020 University of Waterloo |
---|
3 | // |
---|
4 | // The contents of this file are covered under the licence agreement in the |
---|
5 | // file "LICENCE" distributed with Cforall. |
---|
6 | // |
---|
7 | // io/types.hfa -- PRIVATE |
---|
8 | // Types used by the I/O subsystem |
---|
9 | // |
---|
10 | // Author : Thierry Delisle |
---|
11 | // Created On : Fri Jul 31 16:22:47 2020 |
---|
12 | // Last Modified By : |
---|
13 | // Last Modified On : |
---|
14 | // Update Count : |
---|
15 | // |
---|
16 | |
---|
17 | #pragma once |
---|
18 | |
---|
19 | #include <limits.h> |
---|
20 | |
---|
21 | extern "C" { |
---|
22 | #include <linux/types.h> |
---|
23 | } |
---|
24 | |
---|
25 | #include "bits/locks.hfa" |
---|
26 | #include "iofwd.hfa" |
---|
27 | #include "kernel/fwd.hfa" |
---|
28 | |
---|
29 | #if defined(CFA_HAVE_LINUX_IO_URING_H) |
---|
30 | #include "monitor.hfa" |
---|
31 | |
---|
32 | struct processor; |
---|
33 | monitor io_arbiter$; |
---|
34 | |
---|
35 | //----------------------------------------------------------------------- |
---|
36 | // Ring Data structure |
---|
37 | // represent the io_uring submission ring which contains operations that will be sent to io_uring for processing |
---|
38 | struct __sub_ring_t { |
---|
39 | // lock needed because remote processors might need to flush the instance |
---|
40 | __spinlock_t lock; |
---|
41 | |
---|
42 | struct { |
---|
43 | // Head and tail of the ring (associated with array) |
---|
44 | volatile __u32 * head; // one passed last index consumed by the kernel |
---|
45 | volatile __u32 * tail; // one passed last index visible to the kernel |
---|
46 | volatile __u32 released; // one passed last index released back to the free list |
---|
47 | |
---|
48 | // The actual kernel ring which uses head/tail |
---|
49 | // indexes into the sqes arrays |
---|
50 | __u32 * array; |
---|
51 | } kring; |
---|
52 | |
---|
53 | struct { |
---|
54 | volatile __u32 head; |
---|
55 | volatile __u32 tail; |
---|
56 | // The ring which contains free allocations |
---|
57 | // indexes into the sqes arrays |
---|
58 | __u32 * array; |
---|
59 | } free_ring; |
---|
60 | |
---|
61 | // number of sqes to submit on next system call. |
---|
62 | volatile __u32 to_submit; |
---|
63 | |
---|
64 | // number of entries and mask to go with it |
---|
65 | const __u32 * num; |
---|
66 | const __u32 * mask; |
---|
67 | |
---|
68 | // Submission flags, currently only IORING_SETUP_SQPOLL |
---|
69 | __u32 * flags; |
---|
70 | |
---|
71 | // number of sqes not submitted |
---|
72 | // From documentation : [dropped] is incremented for each invalid submission queue entry encountered in the ring buffer. |
---|
73 | __u32 * dropped; |
---|
74 | |
---|
75 | // A buffer of sqes (not the actual ring) |
---|
76 | struct io_uring_sqe * sqes; |
---|
77 | |
---|
78 | // The location and size of the mmaped area |
---|
79 | void * ring_ptr; |
---|
80 | size_t ring_sz; |
---|
81 | |
---|
82 | // for debug purposes, whether or not the last flush was due to a arbiter flush |
---|
83 | bool last_external; |
---|
84 | }; |
---|
85 | |
---|
86 | // represent the io_uring completion ring which contains operations that have completed |
---|
87 | struct __cmp_ring_t { |
---|
88 | // needed because remote processors can help drain the buffer |
---|
89 | volatile bool try_lock; |
---|
90 | |
---|
91 | // id of the ring, used for the helping/topology algorithms |
---|
92 | unsigned id; |
---|
93 | |
---|
94 | // timestamp from last time it was drained |
---|
95 | unsigned long long ts; |
---|
96 | |
---|
97 | // Head and tail of the ring |
---|
98 | volatile __u32 * head; |
---|
99 | volatile __u32 * tail; |
---|
100 | |
---|
101 | // number of entries and mask to go with it |
---|
102 | const __u32 * mask; |
---|
103 | const __u32 * num; |
---|
104 | |
---|
105 | // I don't know what this value is for |
---|
106 | __u32 * overflow; |
---|
107 | |
---|
108 | // the kernel ring |
---|
109 | volatile struct io_uring_cqe * cqes; |
---|
110 | |
---|
111 | // The location and size of the mmaped area |
---|
112 | void * ring_ptr; |
---|
113 | size_t ring_sz; |
---|
114 | }; |
---|
115 | |
---|
116 | // struct representing an io operation that still needs processing |
---|
117 | // actual operations are expected to inherit from this |
---|
118 | struct __outstanding_io { |
---|
119 | // intrusive link fields |
---|
120 | inline dlink(__outstanding_io); |
---|
121 | |
---|
122 | // primitive on which to block until the io is processed |
---|
123 | oneshot waitctx; |
---|
124 | }; |
---|
125 | P9_EMBEDDED( __outstanding_io, dlink(__outstanding_io) ) |
---|
126 | |
---|
127 | // queue of operations that are outstanding |
---|
128 | struct __outstanding_io_queue { |
---|
129 | // spinlock for protection |
---|
130 | // TODO: changing to a lock that blocks, I haven't examined whether it should be a kernel or user lock |
---|
131 | __spinlock_t lock; |
---|
132 | |
---|
133 | // the actual queue |
---|
134 | dlist(__outstanding_io) queue; |
---|
135 | |
---|
136 | // volatile used to avoid the need for taking the lock if it's empty |
---|
137 | volatile bool empty; |
---|
138 | }; |
---|
139 | |
---|
140 | // struct representing an operation that was submitted |
---|
141 | struct __external_io { |
---|
142 | // inherits from outstanding io |
---|
143 | inline __outstanding_io; |
---|
144 | |
---|
145 | // pointer and count to an array of ids to be submitted |
---|
146 | __u32 * idxs; |
---|
147 | __u32 have; |
---|
148 | |
---|
149 | // whether or not these can be accumulated before flushing the buffer |
---|
150 | bool lazy; |
---|
151 | }; |
---|
152 | |
---|
153 | // complete io_context, contains all the data for io submission and completion |
---|
154 | struct __attribute__((aligned(64))) io_context$ { |
---|
155 | // arbiter, used in cases where threads for migrated at unfortunate moments |
---|
156 | io_arbiter$ * arbiter; |
---|
157 | |
---|
158 | // which prcessor the context is tied to |
---|
159 | struct processor * proc; |
---|
160 | |
---|
161 | // queue of io submissions that haven't beeen processed. |
---|
162 | __outstanding_io_queue ext_sq; |
---|
163 | |
---|
164 | // io_uring ring data structures |
---|
165 | struct __sub_ring_t sq; |
---|
166 | struct __cmp_ring_t cq; |
---|
167 | |
---|
168 | // flag the io_uring rings where created with |
---|
169 | __u32 ring_flags; |
---|
170 | |
---|
171 | // file descriptor that identifies the io_uring instance |
---|
172 | int fd; |
---|
173 | }; |
---|
174 | |
---|
175 | // short hand to check when the io_context was last processed (io drained) |
---|
176 | static inline unsigned long long ts(io_context$ *& this) { |
---|
177 | const __u32 head = *this->cq.head; |
---|
178 | const __u32 tail = *this->cq.tail; |
---|
179 | |
---|
180 | // if there is no pending completions, just pretend it's infinetely recent |
---|
181 | if(head == tail) return ULLONG_MAX; |
---|
182 | |
---|
183 | return this->cq.ts; |
---|
184 | } |
---|
185 | |
---|
186 | // structure represeting allocations that couldn't succeed locally |
---|
187 | struct __pending_alloc { |
---|
188 | // inherit from outstanding io |
---|
189 | inline __outstanding_io; |
---|
190 | |
---|
191 | // array and size of the desired allocation |
---|
192 | __u32 * idxs; |
---|
193 | __u32 want; |
---|
194 | |
---|
195 | // output param, the context the io was allocated from |
---|
196 | io_context$ * ctx; |
---|
197 | }; |
---|
198 | |
---|
199 | // arbiter that handles cases where the context tied to the local processor is unable to satisfy the io |
---|
200 | monitor __attribute__((aligned(64))) io_arbiter$ { |
---|
201 | // contains a queue of io for pending allocations |
---|
202 | __outstanding_io_queue pending; |
---|
203 | }; |
---|
204 | |
---|
205 | //----------------------------------------------------------------------- |
---|
206 | // Misc |
---|
207 | // Weirdly, some systems that do support io_uring don't actually define these |
---|
208 | #ifdef __alpha__ |
---|
209 | /* |
---|
210 | * alpha is the only exception, all other architectures |
---|
211 | * have common numbers for new system calls. |
---|
212 | */ |
---|
213 | #ifndef __NR_io_uring_setup |
---|
214 | #define __NR_io_uring_setup 535 |
---|
215 | #endif |
---|
216 | #ifndef __NR_io_uring_enter |
---|
217 | #define __NR_io_uring_enter 536 |
---|
218 | #endif |
---|
219 | #ifndef __NR_io_uring_register |
---|
220 | #define __NR_io_uring_register 537 |
---|
221 | #endif |
---|
222 | #else /* !__alpha__ */ |
---|
223 | #ifndef __NR_io_uring_setup |
---|
224 | #define __NR_io_uring_setup 425 |
---|
225 | #endif |
---|
226 | #ifndef __NR_io_uring_enter |
---|
227 | #define __NR_io_uring_enter 426 |
---|
228 | #endif |
---|
229 | #ifndef __NR_io_uring_register |
---|
230 | #define __NR_io_uring_register 427 |
---|
231 | #endif |
---|
232 | #endif |
---|
233 | |
---|
234 | // void __ioctx_prepare_block(io_context$ & ctx); |
---|
235 | #endif |
---|