source: doc/theses/thierry_delisle_PhD/code/relaxed_list.hpp@ 9421f3d8

ADT arm-eh ast-experimental enum forall-pointer-decay jacob/cs343-translation jenkins-sandbox new-ast new-ast-unique-expr pthread-emulation qualifiedEnum
Last change on this file since 9421f3d8 was 9421f3d8, checked in by Thierry Delisle <tdelisle@…>, 6 years ago

Adding some of the implemented code. Current state: relaxed list is achieves at least 6M ops/sec total

  • Property mode set to 100644
File size: 10.0 KB
Line 
1#pragma once
2
3#ifndef NO_STATS
4#include <iostream>
5#endif
6
7#include <memory>
8#include <mutex>
9#include <type_traits>
10
11#include "assert.hpp"
12#include "utils.hpp"
13
14using namespace std;
15
16struct spinlock_t {
17 std::atomic_bool ll = { false };
18
19 inline void lock() {
20 while( __builtin_expect(ll.exchange(true),false) ) {
21 while(ll.load(std::memory_order_relaxed))
22 asm volatile("pause");
23 }
24 }
25
26 inline bool try_lock() {
27 return false == ll.exchange(true);
28 }
29
30 inline void unlock() {
31 ll.store(false, std::memory_order_release);
32 }
33
34 inline explicit operator bool() {
35 return ll.load(std::memory_order_relaxed);
36 }
37};
38
39static inline bool bts(std::atomic_size_t & target, size_t bit ) {
40 //*
41 int result = 0;
42 asm volatile(
43 "LOCK btsq %[bit], %[target]\n\t"
44 :"=@ccc" (result)
45 : [target] "m" (target), [bit] "r" (bit)
46 );
47 return result != 0;
48 /*/
49 size_t mask = 1ul << bit;
50 size_t ret = target.fetch_or(mask, std::memory_order_relaxed);
51 return (ret & mask) != 0;
52 //*/
53}
54
55static inline bool btr(std::atomic_size_t & target, size_t bit ) {
56 //*
57 int result = 0;
58 asm volatile(
59 "LOCK btrq %[bit], %[target]\n\t"
60 :"=@ccc" (result)
61 : [target] "m" (target), [bit] "r" (bit)
62 );
63 return result != 0;
64 /*/
65 size_t mask = 1ul << bit;
66 size_t ret = target.fetch_and(~mask, std::memory_order_relaxed);
67 return (ret & mask) != 0;
68 //*/
69}
70
71extern bool enable_stats;
72
73struct pick_stat {
74 struct {
75 size_t attempt = 0;
76 size_t success = 0;
77 } push;
78 struct {
79 size_t attempt = 0;
80 size_t success = 0;
81 size_t mask_attempt = 0;
82 } pop;
83};
84
85struct empty_stat {
86 struct {
87 size_t value = 0;
88 size_t count = 0;
89 } push;
90 struct {
91 size_t value = 0;
92 size_t count = 0;
93 } pop;
94};
95
96template<typename node_t>
97struct _LinksFields_t {
98 node_t * prev = nullptr;
99 node_t * next = nullptr;
100 unsigned long long ts = 0;
101};
102
103template<typename node_t>
104class __attribute__((aligned(128))) relaxed_list {
105 static_assert(std::is_same<decltype(node_t::_links), _LinksFields_t<node_t>>::value, "Node must have a links field");
106
107
108public:
109 relaxed_list(unsigned numLists)
110 : lists(new intrusive_queue_t[numLists])
111 , numLists(numLists)
112 {
113 assertf(7 * 8 * 8 >= numLists, "List currently only supports 448 sublists");
114 assert(sizeof(*this) == 128);
115 }
116
117 ~relaxed_list() {
118 lists.reset();
119 #ifndef NO_STATS
120 std::cout << "Difference : "
121 << ssize_t(double(intrusive_queue_t::stat::dif.value) / intrusive_queue_t::stat::dif.num ) << " avg\t"
122 << intrusive_queue_t::stat::dif.max << "max" << std::endl;
123 #endif
124 }
125
126 __attribute__((noinline, hot)) void push(node_t * node) {
127 node->_links.ts = rdtscl();
128
129 while(true) {
130 // Pick a random list
131 unsigned i = tls.rng.next() % numLists;
132
133 #ifndef NO_STATS
134 tls.pick.push.attempt++;
135 #endif
136
137 // If we can't lock it retry
138 if( !lists[i].lock.try_lock() ) continue;
139
140 __attribute__((unused)) int num = numNonEmpty;
141
142 // Actually push it
143 if(lists[i].push(node)) {
144 numNonEmpty++;
145 size_t qword = i >> 6ull;
146 size_t bit = i & 63ull;
147 assertf((list_mask[qword] & (1ul << bit)) == 0, "Before set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
148 __attribute__((unused)) bool ret = bts(list_mask[qword], bit);
149 assert(!ret);
150 assertf((list_mask[qword] & (1ul << bit)) != 0, "After set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
151 }
152 assert(numNonEmpty <= (int)numLists);
153
154 // Unlock and return
155 lists[i].lock.unlock();
156
157 #ifndef NO_STATS
158 tls.pick.push.success++;
159 tls.empty.push.value += num;
160 tls.empty.push.count += 1;
161 #endif
162 return;
163 }
164 }
165
166 __attribute__((noinline, hot)) node_t * pop() {
167 #if !defined(NO_BITMASK)
168 // for(int r = 0; r < 10 && numNonEmpty != 0; r++) {
169 // // Pick two lists at random
170 // unsigned i = tls.rng.next() % numLists;
171 // unsigned j = tls.rng.next() % numLists;
172
173 // if(auto node = try_pop(i, j)) return node;
174 // }
175 int nnempty;
176 while(0 != (nnempty = numNonEmpty)) {
177 unsigned i, j;
178 if( numLists < 4 || (numLists / nnempty) < 4 ) {
179 // Pick two lists at random
180 i = tls.rng.next() % numLists;
181 j = tls.rng.next() % numLists;
182 } else {
183 #ifndef NO_STATS
184 // tls.pick.push.mask_attempt++;
185 #endif
186
187 // Pick two lists at random
188 unsigned num = ((numLists - 1) >> 6) + 1;
189
190 unsigned ri = tls.rng.next();
191 unsigned rj = tls.rng.next();
192
193 unsigned wdxi = (ri >> 6u) % num;
194 unsigned wdxj = (rj >> 6u) % num;
195
196 size_t maski = list_mask[wdxi].load(std::memory_order_relaxed);
197 size_t maskj = list_mask[wdxj].load(std::memory_order_relaxed);
198
199 if(maski == 0 && maskj == 0) continue;
200
201 unsigned biti = maski ? ri % __builtin_popcountl(maski) : 0;
202 unsigned bitj = maskj ? rj % __builtin_popcountl(maskj) : 0;
203
204 unsigned bi = 64 - nthSetBit(maski, biti + 1);
205 unsigned bj = 64 - nthSetBit(maskj, bitj + 1);
206
207 assertf(bi < 64, "%zu %u %u", maski, biti, bi);
208 assertf(bj < 64, "%zu %u %u", maskj, bitj, bj);
209
210 i = bi | (wdxi << 6);
211 j = bj | (wdxj << 6);
212
213 assertf(i < numLists, "%u", wdxi << 6);
214 assertf(j < numLists, "%u", wdxj << 6);
215 }
216
217 if(auto node = try_pop(i, j)) return node;
218 }
219 #else
220 while(numNonEmpty != 0) {
221 // Pick two lists at random
222 int i = tls.rng.next() % numLists;
223 int j = tls.rng.next() % numLists;
224
225 if(auto node = try_pop(i, j)) return node;
226 }
227 #endif
228
229 return nullptr;
230 }
231
232private:
233 node_t * try_pop(unsigned i, unsigned j) {
234 #ifndef NO_STATS
235 tls.pick.pop.attempt++;
236 #endif
237
238 // Pick the bet list
239 int w = i;
240 if( __builtin_expect(lists[j].ts() != 0, true) ) {
241 w = (lists[i].ts() < lists[j].ts()) ? i : j;
242 }
243
244 auto & list = lists[w];
245 // If list looks empty retry
246 if( list.ts() == 0 ) return nullptr;
247
248 // If we can't get the lock retry
249 if( !list.lock.try_lock() ) return nullptr;
250
251 __attribute__((unused)) int num = numNonEmpty;
252
253 // If list is empty, unlock and retry
254 if( list.ts() == 0 ) {
255 list.lock.unlock();
256 return nullptr;
257 }
258
259 // Actually pop the list
260 node_t * node;
261 bool emptied;
262 std::tie(node, emptied) = list.pop();
263 assert(node);
264
265 if(emptied) {
266 numNonEmpty--;
267 size_t qword = w >> 6ull;
268 size_t bit = w & 63ull;
269 assert((list_mask[qword] & (1ul << bit)) != 0);
270 __attribute__((unused)) bool ret = btr(list_mask[qword], bit);
271 assert(ret);
272 assert((list_mask[qword] & (1ul << bit)) == 0);
273 }
274
275 // Unlock and return
276 list.lock.unlock();
277 assert(numNonEmpty >= 0);
278 #ifndef NO_STATS
279 tls.pick.pop.success++;
280 tls.empty.pop.value += num;
281 tls.empty.pop.count += 1;
282 #endif
283 return node;
284 }
285
286private:
287
288 class __attribute__((aligned(128))) intrusive_queue_t {
289 public:
290 typedef spinlock_t lock_t;
291
292 friend class relaxed_list<node_t>;
293
294 struct stat {
295 ssize_t diff = 0;
296
297 static struct Dif {
298 ssize_t value = 0;
299 size_t num = 0;
300 ssize_t max = 0;
301 } dif;
302 };
303
304 private:
305 struct sentinel_t {
306 _LinksFields_t<node_t> _links;
307 };
308
309 lock_t lock;
310 sentinel_t before;
311 sentinel_t after;
312 #ifndef NO_STATS
313 stat s;
314 #endif
315
316 static constexpr auto fields_offset = offsetof( node_t, _links );
317 public:
318 intrusive_queue_t()
319 : before{{ nullptr, tail() }}
320 , after {{ head(), nullptr }}
321 {
322 /* paranoid */ assert((reinterpret_cast<uintptr_t>( head() ) + fields_offset) == reinterpret_cast<uintptr_t>(&before));
323 /* paranoid */ assert((reinterpret_cast<uintptr_t>( tail() ) + fields_offset) == reinterpret_cast<uintptr_t>(&after ));
324 /* paranoid */ assert(head()->_links.prev == nullptr);
325 /* paranoid */ assert(head()->_links.next == tail() );
326 /* paranoid */ assert(tail()->_links.next == nullptr);
327 /* paranoid */ assert(tail()->_links.prev == head() );
328 /* paranoid */ assert(sizeof(*this) == 128);
329 /* paranoid */ assert((intptr_t(this) % 128) == 0);
330 }
331
332 ~intrusive_queue_t() {
333 #ifndef NO_STATS
334 stat::dif.value+= s.diff;
335 stat::dif.num ++;
336 stat::dif.max = std::abs(stat::dif.max) > std::abs(s.diff) ? stat::dif.max : s.diff;
337 #endif
338 }
339
340 inline node_t * head() const {
341 node_t * rhead = reinterpret_cast<node_t *>(
342 reinterpret_cast<uintptr_t>( &before ) - fields_offset
343 );
344 assert(rhead);
345 return rhead;
346 }
347
348 inline node_t * tail() const {
349 node_t * rtail = reinterpret_cast<node_t *>(
350 reinterpret_cast<uintptr_t>( &after ) - fields_offset
351 );
352 assert(rtail);
353 return rtail;
354 }
355
356 inline bool push(node_t * node) {
357 assert(lock);
358 assert(node->_links.ts != 0);
359 node_t * tail = this->tail();
360
361 node_t * prev = tail->_links.prev;
362 // assertf(node->_links.ts >= prev->_links.ts,
363 // "New node has smaller timestamp: %llu < %llu", node->_links.ts, prev->_links.ts);
364 node->_links.next = tail;
365 node->_links.prev = prev;
366 prev->_links.next = node;
367 tail->_links.prev = node;
368 #ifndef NO_STATS
369 if(enable_stats) s.diff++;
370 #endif
371 if(before._links.ts == 0l) {
372 before._links.ts = node->_links.ts;
373 assert(node->_links.prev == this->head());
374 return true;
375 }
376 return false;
377 }
378
379 inline std::pair<node_t *, bool> pop() {
380 assert(lock);
381 node_t * head = this->head();
382 node_t * tail = this->tail();
383
384 node_t * node = head->_links.next;
385 node_t * next = node->_links.next;
386 if(node == tail) return {nullptr, false};
387
388 head->_links.next = next;
389 next->_links.prev = head;
390
391 #ifndef NO_STATS
392 if(enable_stats) s.diff--;
393 #endif
394 if(next == tail) {
395 before._links.ts = 0l;
396 return {node, true};
397 }
398 else {
399 assert(next->_links.ts != 0);
400 before._links.ts = next->_links.ts;
401 assert(before._links.ts != 0);
402 return {node, false};
403 }
404 }
405
406 long long ts() const {
407 return before._links.ts;
408 }
409 };
410
411
412public:
413
414 static __attribute__((aligned(128))) thread_local struct TLS {
415 Random rng = { int(rdtscl()) };
416 pick_stat pick;
417 empty_stat empty;
418 } tls;
419
420public:
421 std::atomic_int numNonEmpty = 0; // number of non-empty lists
422 std::atomic_size_t list_mask[7] = { 0 }; // which queues are empty
423private:
424 __attribute__((aligned(64))) std::unique_ptr<intrusive_queue_t []> lists;
425 const unsigned numLists;
426
427public:
428 static const constexpr size_t sizeof_queue = sizeof(intrusive_queue_t);
429};
Note: See TracBrowser for help on using the repository browser.