source: doc/theses/thierry_delisle_PhD/code/relaxed_list.hpp@ 0092853

ADT arm-eh ast-experimental enum forall-pointer-decay jacob/cs343-translation new-ast new-ast-unique-expr pthread-emulation qualifiedEnum
Last change on this file since 0092853 was 0092853, checked in by Thierry Delisle <tdelisle@…>, 5 years ago

Fixed Variants

  • Property mode set to 100644
File size: 14.3 KB
Line 
1#pragma once
2
3#define MACRO_XSTR(s) MACRO_STR(s)
4#define MACRO_STR(s) #s
5
6#define VANILLA 0
7#define SNZI 1
8#define BITMASK 2
9#define DISCOVER 3
10
11#ifndef VARIANT
12#define VARIANT VANILLA
13#endif
14
15#ifndef NO_STATS
16#include <iostream>
17#endif
18
19#include <cmath>
20#include <memory>
21#include <mutex>
22#include <type_traits>
23
24#include "assert.hpp"
25#include "utils.hpp"
26#include "snzi.hpp"
27
28using namespace std;
29
30extern bool enable_stats;
31
32struct pick_stat {
33 struct {
34 size_t attempt = 0;
35 size_t success = 0;
36 } push;
37 struct {
38 size_t attempt = 0;
39 size_t success = 0;
40 size_t mask_attempt = 0;
41 size_t mask_reset = 0;
42 } pop;
43};
44
45struct empty_stat {
46 struct {
47 size_t value = 0;
48 size_t count = 0;
49 } push;
50 struct {
51 size_t value = 0;
52 size_t count = 0;
53 } pop;
54};
55
56template<typename node_t>
57struct _LinksFields_t {
58 node_t * prev = nullptr;
59 node_t * next = nullptr;
60 unsigned long long ts = 0;
61};
62
63template<typename node_t>
64class __attribute__((aligned(128))) relaxed_list {
65 static_assert(std::is_same<decltype(node_t::_links), _LinksFields_t<node_t>>::value, "Node must have a links field");
66
67public:
68 static const char * name() {
69 const char * names[] = {
70 "VANILLA",
71 "SNZI",
72 "BITMASK",
73 "DISCOVER"
74 };
75 return names[VARIANT];
76 }
77
78 relaxed_list(unsigned numLists)
79 : lists(new intrusive_queue_t[numLists])
80 , numLists(numLists)
81 #if VARIANT == SNZI || VARIANT == DISCOVER
82 , snzi( std::log2( numLists / 8 ) )
83 #endif
84 {
85 assertf(7 * 8 * 8 >= numLists, "List currently only supports 448 sublists");
86 // assert(sizeof(*this) == 128);
87 std::cout << "Constructing Relaxed List with " << numLists << std::endl;
88
89 #ifndef NO_STATS
90 if(head) this->next = head;
91 head = this;
92 #endif
93 }
94
95 ~relaxed_list() {
96 std::cout << "Destroying Relaxed List" << std::endl;
97 lists.reset();
98 }
99
100 __attribute__((noinline, hot)) void push(node_t * node) {
101 node->_links.ts = rdtscl();
102
103 while(true) {
104 // Pick a random list
105 unsigned i = tls.rng.next() % numLists;
106
107 #ifndef NO_STATS
108 tls.pick.push.attempt++;
109 #endif
110
111 // If we can't lock it retry
112 if( !lists[i].lock.try_lock() ) continue;
113
114 #if VARIANT != SNZI && VARIANT != DISCOVER
115 __attribute__((unused)) int num = numNonEmpty;
116 #endif
117
118 // Actually push it
119 if(lists[i].push(node)) {
120 #if VARIANT == DISCOVER
121 size_t qword = i >> 6ull;
122 size_t bit = i & 63ull;
123 assert(qword == 0);
124 bts(tls.mask, bit);
125 snzi.arrive(i);
126 #elif VARIANT == SNZI
127 snzi.arrive(i);
128 #elif VARIANT == BITMASK
129 numNonEmpty++;
130 size_t qword = i >> 6ull;
131 size_t bit = i & 63ull;
132 assertf((list_mask[qword] & (1ul << bit)) == 0, "Before set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
133 __attribute__((unused)) bool ret = bts(list_mask[qword], bit);
134 assert(!ret);
135 assertf((list_mask[qword] & (1ul << bit)) != 0, "After set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
136 #else
137 numNonEmpty++;
138 #endif
139 }
140 #if VARIANT != SNZI && VARIANT != DISCOVER
141 assert(numNonEmpty <= (int)numLists);
142 #endif
143
144 // Unlock and return
145 lists[i].lock.unlock();
146
147 #ifndef NO_STATS
148 tls.pick.push.success++;
149 #if VARIANT != SNZI && VARIANT != DISCOVER
150 tls.empty.push.value += num;
151 tls.empty.push.count += 1;
152 #endif
153 #endif
154 return;
155 }
156 }
157
158 __attribute__((noinline, hot)) node_t * pop() {
159 #if VARIANT == DISCOVER
160 assert(numLists <= 64);
161 while(snzi.query()) {
162 tls.pick.pop.mask_attempt++;
163 unsigned i, j;
164 {
165 // Pick first list totally randomly
166 i = tls.rng.next() % numLists;
167
168 // Pick the other according to the bitmask
169 unsigned r = tls.rng.next();
170
171 size_t mask = tls.mask.load(std::memory_order_relaxed);
172 if(mask == 0) {
173 tls.pick.pop.mask_reset++;
174 mask = (1U << numLists) - 1;
175 }
176
177 unsigned b = rand_bit(r, mask);
178
179 assertf(b < 64, "%zu %u", mask, b);
180
181 j = b;
182
183 assert(j < numLists);
184 }
185
186 if(auto node = try_pop(i, j)) return node;
187 }
188 #elif VARIANT == SNZI
189 while(snzi.query()) {
190 // Pick two lists at random
191 int i = tls.rng.next() % numLists;
192 int j = tls.rng.next() % numLists;
193
194 if(auto node = try_pop(i, j)) return node;
195 }
196 #elif VARIANT == BITMASK
197 int nnempty;
198 while(0 != (nnempty = numNonEmpty)) {
199 tls.pick.pop.mask_attempt++;
200 unsigned i, j;
201 {
202 // Pick two lists at random
203 unsigned num = ((numLists - 1) >> 6) + 1;
204
205 unsigned ri = tls.rng.next();
206 unsigned rj = tls.rng.next();
207
208 unsigned wdxi = (ri >> 6u) % num;
209 unsigned wdxj = (rj >> 6u) % num;
210
211 size_t maski = list_mask[wdxi].load(std::memory_order_relaxed);
212 size_t maskj = list_mask[wdxj].load(std::memory_order_relaxed);
213
214 if(maski == 0 && maskj == 0) continue;
215
216 unsigned bi = rand_bit(ri, maski);
217 unsigned bj = rand_bit(rj, maskj);
218
219 assertf(bi < 64, "%zu %u", maski, bi);
220 assertf(bj < 64, "%zu %u", maskj, bj);
221
222 i = bi | (wdxi << 6);
223 j = bj | (wdxj << 6);
224
225 assertf(i < numLists, "%u", wdxi << 6);
226 assertf(j < numLists, "%u", wdxj << 6);
227 }
228
229 if(auto node = try_pop(i, j)) return node;
230 }
231 #else
232 while(numNonEmpty != 0) {
233 // Pick two lists at random
234 int i = tls.rng.next() % numLists;
235 int j = tls.rng.next() % numLists;
236
237 if(auto node = try_pop(i, j)) return node;
238 }
239 #endif
240
241 return nullptr;
242 }
243
244private:
245 node_t * try_pop(unsigned i, unsigned j) {
246 #ifndef NO_STATS
247 tls.pick.pop.attempt++;
248 #endif
249
250 #if VARIANT == DISCOVER
251 if(lists[i].ts() > 0) bts(tls.mask, i); else btr(tls.mask, i);
252 if(lists[j].ts() > 0) bts(tls.mask, j); else btr(tls.mask, j);
253 #endif
254
255 // Pick the bet list
256 int w = i;
257 if( __builtin_expect(lists[j].ts() != 0, true) ) {
258 w = (lists[i].ts() < lists[j].ts()) ? i : j;
259 }
260
261 auto & list = lists[w];
262 // If list looks empty retry
263 if( list.ts() == 0 ) return nullptr;
264
265 // If we can't get the lock retry
266 if( !list.lock.try_lock() ) return nullptr;
267
268 #if VARIANT != SNZI && VARIANT != DISCOVER
269 __attribute__((unused)) int num = numNonEmpty;
270 #endif
271
272 // If list is empty, unlock and retry
273 if( list.ts() == 0 ) {
274 list.lock.unlock();
275 return nullptr;
276 }
277
278 // Actually pop the list
279 node_t * node;
280 bool emptied;
281 std::tie(node, emptied) = list.pop();
282 assert(node);
283
284 if(emptied) {
285 #if VARIANT == DISCOVER
286 size_t qword = w >> 6ull;
287 size_t bit = w & 63ull;
288 assert(qword == 0);
289 __attribute__((unused)) bool ret = btr(tls.mask, bit);
290 snzi.depart(w);
291 #elif VARIANT == SNZI
292 snzi.depart(w);
293 #elif VARIANT == BITMASK
294 numNonEmpty--;
295 size_t qword = w >> 6ull;
296 size_t bit = w & 63ull;
297 assert((list_mask[qword] & (1ul << bit)) != 0);
298 __attribute__((unused)) bool ret = btr(list_mask[qword], bit);
299 assert(ret);
300 assert((list_mask[qword] & (1ul << bit)) == 0);
301 #else
302 numNonEmpty--;
303 #endif
304 }
305
306 // Unlock and return
307 list.lock.unlock();
308 #if VARIANT != SNZI && VARIANT != DISCOVER
309 assert(numNonEmpty >= 0);
310 #endif
311 #ifndef NO_STATS
312 tls.pick.pop.success++;
313 #if VARIANT != SNZI && VARIANT != DISCOVER
314 tls.empty.pop.value += num;
315 tls.empty.pop.count += 1;
316 #endif
317 #endif
318 return node;
319 }
320
321private:
322
323 class __attribute__((aligned(128))) intrusive_queue_t {
324 public:
325 typedef spinlock_t lock_t;
326
327 friend class relaxed_list<node_t>;
328
329 struct stat {
330 ssize_t diff = 0;
331 size_t push = 0;
332 size_t pop = 0;
333 };
334
335 private:
336 struct sentinel_t {
337 _LinksFields_t<node_t> _links;
338 };
339
340 lock_t lock;
341 sentinel_t before;
342 sentinel_t after;
343 #ifndef NO_STATS
344 stat s;
345 #endif
346
347#pragma GCC diagnostic push
348#pragma GCC diagnostic ignored "-Winvalid-offsetof"
349 static constexpr auto fields_offset = offsetof( node_t, _links );
350#pragma GCC diagnostic pop
351 public:
352 intrusive_queue_t()
353 : before{{ nullptr, tail() }}
354 , after {{ head(), nullptr }}
355 {
356 /* paranoid */ assert((reinterpret_cast<uintptr_t>( head() ) + fields_offset) == reinterpret_cast<uintptr_t>(&before));
357 /* paranoid */ assert((reinterpret_cast<uintptr_t>( tail() ) + fields_offset) == reinterpret_cast<uintptr_t>(&after ));
358 /* paranoid */ assert(head()->_links.prev == nullptr);
359 /* paranoid */ assert(head()->_links.next == tail() );
360 /* paranoid */ assert(tail()->_links.next == nullptr);
361 /* paranoid */ assert(tail()->_links.prev == head() );
362 /* paranoid */ assert(sizeof(*this) == 128);
363 /* paranoid */ assert((intptr_t(this) % 128) == 0);
364 }
365
366 ~intrusive_queue_t() = default;
367
368 inline node_t * head() const {
369 node_t * rhead = reinterpret_cast<node_t *>(
370 reinterpret_cast<uintptr_t>( &before ) - fields_offset
371 );
372 assert(rhead);
373 return rhead;
374 }
375
376 inline node_t * tail() const {
377 node_t * rtail = reinterpret_cast<node_t *>(
378 reinterpret_cast<uintptr_t>( &after ) - fields_offset
379 );
380 assert(rtail);
381 return rtail;
382 }
383
384 inline bool push(node_t * node) {
385 assert(lock);
386 assert(node->_links.ts != 0);
387 node_t * tail = this->tail();
388
389 node_t * prev = tail->_links.prev;
390 // assertf(node->_links.ts >= prev->_links.ts,
391 // "New node has smaller timestamp: %llu < %llu", node->_links.ts, prev->_links.ts);
392 node->_links.next = tail;
393 node->_links.prev = prev;
394 prev->_links.next = node;
395 tail->_links.prev = node;
396 #ifndef NO_STATS
397 if(enable_stats) {
398 s.diff++;
399 s.push++;
400 }
401 #endif
402 if(before._links.ts == 0l) {
403 before._links.ts = node->_links.ts;
404 assert(node->_links.prev == this->head());
405 return true;
406 }
407 return false;
408 }
409
410 inline std::pair<node_t *, bool> pop() {
411 assert(lock);
412 node_t * head = this->head();
413 node_t * tail = this->tail();
414
415 node_t * node = head->_links.next;
416 node_t * next = node->_links.next;
417 if(node == tail) return {nullptr, false};
418
419 head->_links.next = next;
420 next->_links.prev = head;
421
422 #ifndef NO_STATS
423 if(enable_stats) {
424 s.diff--;
425 s.pop ++;
426 }
427 #endif
428 if(next == tail) {
429 before._links.ts = 0l;
430 return {node, true};
431 }
432 else {
433 assert(next->_links.ts != 0);
434 before._links.ts = next->_links.ts;
435 assert(before._links.ts != 0);
436 return {node, false};
437 }
438 }
439
440 long long ts() const {
441 return before._links.ts;
442 }
443 };
444
445
446public:
447
448 static __attribute__((aligned(128))) thread_local struct TLS {
449 Random rng = { int(rdtscl()) };
450 pick_stat pick;
451 empty_stat empty;
452 __attribute__((aligned(64))) std::atomic_size_t mask = { 0 };
453 } tls;
454
455private:
456 __attribute__((aligned(64))) std::unique_ptr<intrusive_queue_t []> lists;
457 const unsigned numLists;
458private:
459 #if VARIANT == SNZI || VARIANT == DISCOVER
460 snzi_t snzi;
461 #else
462 std::atomic_int numNonEmpty = { 0 }; // number of non-empty lists
463 #endif
464 #if VARIANT == BITMASK
465 std::atomic_size_t list_mask[7] = { {0}, {0}, {0}, {0}, {0}, {0}, {0} }; // which queues are empty
466 #endif
467
468public:
469 static const constexpr size_t sizeof_queue = sizeof(intrusive_queue_t);
470
471#ifndef NO_STATS
472 static void stats_print(std::ostream & os) {
473 auto it = head;
474 while(it) {
475 it->stats_print_local(os);
476 it = it->next;
477 }
478 }
479
480 static void stats_tls_tally() {
481 global_stats.pick.push.attempt += tls.pick.push.attempt;
482 global_stats.pick.push.success += tls.pick.push.success;
483 global_stats.pick.pop .attempt += tls.pick.pop.attempt;
484 global_stats.pick.pop .success += tls.pick.pop.success;
485 global_stats.pick.pop .mask_attempt += tls.pick.pop.mask_attempt;
486 global_stats.pick.pop .mask_reset += tls.pick.pop.mask_reset;
487
488 global_stats.qstat.push.value += tls.empty.push.value;
489 global_stats.qstat.push.count += tls.empty.push.count;
490 global_stats.qstat.pop .value += tls.empty.pop .value;
491 global_stats.qstat.pop .count += tls.empty.pop .count;
492 }
493
494private:
495 static struct GlobalStats {
496 struct {
497 struct {
498 std::atomic_size_t attempt = { 0 };
499 std::atomic_size_t success = { 0 };
500 } push;
501 struct {
502 std::atomic_size_t attempt = { 0 };
503 std::atomic_size_t success = { 0 };
504 std::atomic_size_t mask_attempt = { 0 };
505 std::atomic_size_t mask_reset = { 0 };
506 } pop;
507 } pick;
508 struct {
509 struct {
510 std::atomic_size_t value = { 0 };
511 std::atomic_size_t count = { 0 };
512 } push;
513 struct {
514 std::atomic_size_t value = { 0 };
515 std::atomic_size_t count = { 0 };
516 } pop;
517 } qstat;
518 } global_stats;
519
520 // Link list of all lists for stats
521 __attribute__((aligned(64))) relaxed_list<node_t> * next = nullptr;
522
523 static relaxed_list<node_t> * head;
524
525 void stats_print_local(std::ostream & os ) {
526 std::cout << "----- Relaxed List Stats -----" << std::endl;
527 // {
528 // ssize_t diff = 0;
529 // size_t num = 0;
530 // ssize_t max = 0;
531
532 // for(size_t i = 0; i < numLists; i++) {
533 // const auto & list = lists[i];
534 // diff+= list.s.diff;
535 // num ++;
536 // max = std::abs(max) > std::abs(list.s.diff) ? max : list.s.diff;
537 // os << "Local Q ops : " << (list.s.push + list.s.pop) << "(" << list.s.push << "i, " << list.s.pop << "o)\n";
538 // }
539
540 // os << "Difference : " << ssize_t(double(diff) / num ) << " avg\t" << max << "max" << std::endl;
541 // }
542
543 const auto & global = global_stats;
544
545 double push_sur = (100.0 * double(global.pick.push.success) / global.pick.push.attempt);
546 double pop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .attempt);
547 double mpop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .mask_attempt);
548 double rpop_sur = (100.0 * double(global.pick.pop .mask_reset) / global.pick.pop .mask_attempt);
549
550 os << "Push Pick % : " << push_sur << "(" << global.pick.push.success << " / " << global.pick.push.attempt << ")\n";
551 os << "Pop Pick % : " << pop_sur << "(" << global.pick.pop .success << " / " << global.pick.pop .attempt << ")\n";
552 os << "TryPop Pick % : " << mpop_sur << "(" << global.pick.pop .success << " / " << global.pick.pop .mask_attempt << ")\n";
553 os << "Pop M Reset % : " << rpop_sur << "(" << global.pick.pop .mask_reset << " / " << global.pick.pop .mask_attempt << ")\n";
554
555 double avgQ_push = double(global.qstat.push.value) / global.qstat.push.count;
556 double avgQ_pop = double(global.qstat.pop .value) / global.qstat.pop .count;
557 double avgQ = double(global.qstat.push.value + global.qstat.pop .value) / (global.qstat.push.count + global.qstat.pop .count);
558 os << "Push Avg Qs : " << avgQ_push << " (" << global.qstat.push.count << "ops)\n";
559 os << "Pop Avg Qs : " << avgQ_pop << " (" << global.qstat.pop .count << "ops)\n";
560 os << "Global Avg Qs : " << avgQ << " (" << (global.qstat.push.count + global.qstat.pop .count) << "ops)\n";
561 }
562#endif
563};
Note: See TracBrowser for help on using the repository browser.