source: doc/theses/thierry_delisle_PhD/code/relaxed_list.hpp@ a5873bd

ADT arm-eh ast-experimental enum forall-pointer-decay jacob/cs343-translation new-ast new-ast-unique-expr pthread-emulation qualifiedEnum
Last change on this file since a5873bd was a5873bd, checked in by Thierry Delisle <tdelisle@…>, 6 years ago

Merge branch 'relaxed_ready' of plg.uwaterloo.ca:software/cfa/cfa-cc into relaxed_ready

  • Property mode set to 100644
File size: 16.6 KB
Line 
1#pragma once
2
3#define MACRO_XSTR(s) MACRO_STR(s)
4#define MACRO_STR(s) #s
5
6#define VANILLA 0
7#define SNZI 1
8#define BITMASK 2
9#define DISCOVER 3
10#define SNZM 4
11
12#ifndef VARIANT
13#define VARIANT VANILLA
14#endif
15
16#ifndef NO_STATS
17#include <iostream>
18#endif
19
20#include <cmath>
21#include <memory>
22#include <mutex>
23#include <type_traits>
24
25#include "assert.hpp"
26#include "utils.hpp"
27#include "snzi.hpp"
28#include "snzm.hpp"
29
30using namespace std;
31
32extern bool enable_stats;
33
34struct pick_stat {
35 struct {
36 size_t attempt = 0;
37 size_t success = 0;
38 } push;
39 struct {
40 size_t attempt = 0;
41 size_t success = 0;
42 size_t mask_attempt = 0;
43 size_t mask_reset = 0;
44 } pop;
45};
46
47struct empty_stat {
48 struct {
49 size_t value = 0;
50 size_t count = 0;
51 } push;
52 struct {
53 size_t value = 0;
54 size_t count = 0;
55 } pop;
56};
57
58template<typename node_t>
59struct _LinksFields_t {
60 node_t * prev = nullptr;
61 node_t * next = nullptr;
62 unsigned long long ts = 0;
63};
64
65template<typename node_t>
66class __attribute__((aligned(128))) relaxed_list {
67 static_assert(std::is_same<decltype(node_t::_links), _LinksFields_t<node_t>>::value, "Node must have a links field");
68
69public:
70 static const char * name() {
71 const char * names[] = {
72 "VANILLA",
73 "SNZI",
74 "BITMASK",
75 "SNZI + DISCOVERED MASK",
76 "SNZI + MASK"
77 };
78 return names[VARIANT];
79 }
80
81 relaxed_list(unsigned numLists)
82 : lists(new intrusive_queue_t[numLists])
83 , numLists(numLists)
84 #if VARIANT == SNZI
85 , snzi( std::log2( numLists / 8 ), 2 )
86 #elif VARIANT == SNZM || VARIANT == DISCOVER
87 , snzm( numLists )
88 #endif
89 {
90 assertf(7 * 8 * 8 >= numLists, "List currently only supports 448 sublists");
91 // assert(sizeof(*this) == 128);
92 std::cout << "Constructing Relaxed List with " << numLists << std::endl;
93
94 #ifndef NO_STATS
95 if(head) this->next = head;
96 head = this;
97 #endif
98 }
99
100 ~relaxed_list() {
101 std::cout << "Destroying Relaxed List" << std::endl;
102 lists.reset();
103 }
104
105 __attribute__((noinline, hot)) void push(node_t * node) {
106 node->_links.ts = rdtscl();
107
108 while(true) {
109 // Pick a random list
110 unsigned i = tls.rng.next() % numLists;
111
112 #ifndef NO_STATS
113 tls.pick.push.attempt++;
114 #endif
115
116 // If we can't lock it retry
117 if( !lists[i].lock.try_lock() ) continue;
118
119 #if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER
120 __attribute__((unused)) int num = numNonEmpty;
121 #endif
122
123 // Actually push it
124 if(lists[i].push(node)) {
125 #if VARIANT == DISCOVER
126 size_t qword = i >> 6ull;
127 size_t bit = i & 63ull;
128 assert(qword == 0);
129 bts(tls.mask, bit);
130 snzm.arrive(i);
131 #elif VARIANT == SNZI
132 snzi.arrive(i);
133 #elif VARIANT == SNZM
134 snzm.arrive(i);
135 #elif VARIANT == BITMASK
136 numNonEmpty++;
137 size_t qword = i >> 6ull;
138 size_t bit = i & 63ull;
139 assertf((list_mask[qword] & (1ul << bit)) == 0, "Before set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
140 __attribute__((unused)) bool ret = bts(list_mask[qword], bit);
141 assert(!ret);
142 assertf((list_mask[qword] & (1ul << bit)) != 0, "After set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
143 #else
144 numNonEmpty++;
145 #endif
146 }
147 #if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER
148 assert(numNonEmpty <= (int)numLists);
149 #endif
150
151 // Unlock and return
152 lists[i].lock.unlock();
153
154 #ifndef NO_STATS
155 tls.pick.push.success++;
156 #if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER
157 tls.empty.push.value += num;
158 tls.empty.push.count += 1;
159 #endif
160 #endif
161 return;
162 }
163 }
164
165 __attribute__((noinline, hot)) node_t * pop() {
166 #if VARIANT == DISCOVER
167 assert(numLists <= 64);
168 while(snzm.query()) {
169 tls.pick.pop.mask_attempt++;
170 unsigned i, j;
171 {
172 // Pick first list totally randomly
173 i = tls.rng.next() % numLists;
174
175 // Pick the other according to the bitmask
176 unsigned r = tls.rng.next();
177
178 size_t mask = tls.mask.load(std::memory_order_relaxed);
179 if(mask == 0) {
180 tls.pick.pop.mask_reset++;
181 mask = (1U << numLists) - 1;
182 tls.mask.store(mask, std::memory_order_relaxed);
183 }
184
185 unsigned b = rand_bit(r, mask);
186
187 assertf(b < 64, "%zu %u", mask, b);
188
189 j = b;
190
191 assert(j < numLists);
192 }
193
194 if(auto node = try_pop(i, j)) return node;
195 }
196 #elif VARIANT == SNZI
197 while(snzi.query()) {
198 // Pick two lists at random
199 int i = tls.rng.next() % numLists;
200 int j = tls.rng.next() % numLists;
201
202 if(auto node = try_pop(i, j)) return node;
203 }
204 #elif VARIANT == SNZM
205 //*
206 while(snzm.query()) {
207 tls.pick.pop.mask_attempt++;
208 unsigned i, j;
209 {
210 // Pick two random number
211 unsigned ri = tls.rng.next();
212 unsigned rj = tls.rng.next();
213
214 // Pick two nodes from it
215 unsigned wdxi = ri & snzm.mask;
216 unsigned wdxj = rj & snzm.mask;
217
218 // Get the masks from the nodes
219 size_t maski = snzm.masks(wdxi);
220 size_t maskj = snzm.masks(wdxj);
221
222 if(maski == 0 && maskj == 0) continue;
223
224 #if defined(__BMI2__)
225 uint64_t idxsi = _pext_u64(snzm.indexes, maski);
226 uint64_t idxsj = _pext_u64(snzm.indexes, maskj);
227
228 auto pi = __builtin_popcountll(maski);
229 auto pj = __builtin_popcountll(maskj);
230
231 ri = pi ? ri & ((pi >> 3) - 1) : 0;
232 rj = pj ? rj & ((pj >> 3) - 1) : 0;
233
234 unsigned bi = (idxsi >> (ri << 3)) & 0xff;
235 unsigned bj = (idxsj >> (rj << 3)) & 0xff;
236 #else
237 unsigned bi = rand_bit(ri >> snzm.depth, maski);
238 unsigned bj = rand_bit(rj >> snzm.depth, maskj);
239 #endif
240
241 i = (bi << snzm.depth) | wdxi;
242 j = (bj << snzm.depth) | wdxj;
243
244 /* paranoid */ assertf(i < numLists, "%u %u", bj, wdxi);
245 /* paranoid */ assertf(j < numLists, "%u %u", bj, wdxj);
246 }
247
248 if(auto node = try_pop(i, j)) return node;
249 }
250 /*/
251 while(snzm.query()) {
252 // Pick two lists at random
253 int i = tls.rng.next() % numLists;
254 int j = tls.rng.next() % numLists;
255
256 if(auto node = try_pop(i, j)) return node;
257 }
258 //*/
259 #elif VARIANT == BITMASK
260 int nnempty;
261 while(0 != (nnempty = numNonEmpty)) {
262 tls.pick.pop.mask_attempt++;
263 unsigned i, j;
264 {
265 // Pick two lists at random
266 unsigned num = ((numLists - 1) >> 6) + 1;
267
268 unsigned ri = tls.rng.next();
269 unsigned rj = tls.rng.next();
270
271 unsigned wdxi = (ri >> 6u) % num;
272 unsigned wdxj = (rj >> 6u) % num;
273
274 size_t maski = list_mask[wdxi].load(std::memory_order_relaxed);
275 size_t maskj = list_mask[wdxj].load(std::memory_order_relaxed);
276
277 if(maski == 0 && maskj == 0) continue;
278
279 unsigned bi = rand_bit(ri, maski);
280 unsigned bj = rand_bit(rj, maskj);
281
282 assertf(bi < 64, "%zu %u", maski, bi);
283 assertf(bj < 64, "%zu %u", maskj, bj);
284
285 i = bi | (wdxi << 6);
286 j = bj | (wdxj << 6);
287
288 assertf(i < numLists, "%u", wdxi << 6);
289 assertf(j < numLists, "%u", wdxj << 6);
290 }
291
292 if(auto node = try_pop(i, j)) return node;
293 }
294 #else
295 while(numNonEmpty != 0) {
296 // Pick two lists at random
297 int i = tls.rng.next() % numLists;
298 int j = tls.rng.next() % numLists;
299
300 if(auto node = try_pop(i, j)) return node;
301 }
302 #endif
303
304 return nullptr;
305 }
306
307private:
308 node_t * try_pop(unsigned i, unsigned j) {
309 #ifndef NO_STATS
310 tls.pick.pop.attempt++;
311 #endif
312
313 #if VARIANT == DISCOVER
314 if(lists[i].ts() > 0) bts(tls.mask, i); else btr(tls.mask, i);
315 if(lists[j].ts() > 0) bts(tls.mask, j); else btr(tls.mask, j);
316 #endif
317
318 // Pick the bet list
319 int w = i;
320 if( __builtin_expect(lists[j].ts() != 0, true) ) {
321 w = (lists[i].ts() < lists[j].ts()) ? i : j;
322 }
323
324 auto & list = lists[w];
325 // If list looks empty retry
326 if( list.ts() == 0 ) return nullptr;
327
328 // If we can't get the lock retry
329 if( !list.lock.try_lock() ) return nullptr;
330
331 #if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER
332 __attribute__((unused)) int num = numNonEmpty;
333 #endif
334
335 // If list is empty, unlock and retry
336 if( list.ts() == 0 ) {
337 list.lock.unlock();
338 return nullptr;
339 }
340
341 // Actually pop the list
342 node_t * node;
343 bool emptied;
344 std::tie(node, emptied) = list.pop();
345 assert(node);
346
347 if(emptied) {
348 #if VARIANT == DISCOVER
349 size_t qword = w >> 6ull;
350 size_t bit = w & 63ull;
351 assert(qword == 0);
352 __attribute__((unused)) bool ret = btr(tls.mask, bit);
353 snzm.depart(w);
354 #elif VARIANT == SNZI
355 snzi.depart(w);
356 #elif VARIANT == SNZM
357 snzm.depart(w);
358 #elif VARIANT == BITMASK
359 numNonEmpty--;
360 size_t qword = w >> 6ull;
361 size_t bit = w & 63ull;
362 assert((list_mask[qword] & (1ul << bit)) != 0);
363 __attribute__((unused)) bool ret = btr(list_mask[qword], bit);
364 assert(ret);
365 assert((list_mask[qword] & (1ul << bit)) == 0);
366 #else
367 numNonEmpty--;
368 #endif
369 }
370
371 // Unlock and return
372 list.lock.unlock();
373 #if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER
374 assert(numNonEmpty >= 0);
375 #endif
376 #ifndef NO_STATS
377 tls.pick.pop.success++;
378 #if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER
379 tls.empty.pop.value += num;
380 tls.empty.pop.count += 1;
381 #endif
382 #endif
383 return node;
384 }
385
386private:
387
388 class __attribute__((aligned(128))) intrusive_queue_t {
389 public:
390 typedef spinlock_t lock_t;
391
392 friend class relaxed_list<node_t>;
393
394 struct stat {
395 ssize_t diff = 0;
396 size_t push = 0;
397 size_t pop = 0;
398 };
399
400 private:
401 struct sentinel_t {
402 _LinksFields_t<node_t> _links;
403 };
404
405 lock_t lock;
406 sentinel_t before;
407 sentinel_t after;
408 #ifndef NO_STATS
409 stat s;
410 #endif
411
412#pragma GCC diagnostic push
413#pragma GCC diagnostic ignored "-Winvalid-offsetof"
414 static constexpr auto fields_offset = offsetof( node_t, _links );
415#pragma GCC diagnostic pop
416 public:
417 intrusive_queue_t()
418 : before{{ nullptr, tail() }}
419 , after {{ head(), nullptr }}
420 {
421 /* paranoid */ assert((reinterpret_cast<uintptr_t>( head() ) + fields_offset) == reinterpret_cast<uintptr_t>(&before));
422 /* paranoid */ assert((reinterpret_cast<uintptr_t>( tail() ) + fields_offset) == reinterpret_cast<uintptr_t>(&after ));
423 /* paranoid */ assert(head()->_links.prev == nullptr);
424 /* paranoid */ assert(head()->_links.next == tail() );
425 /* paranoid */ assert(tail()->_links.next == nullptr);
426 /* paranoid */ assert(tail()->_links.prev == head() );
427 /* paranoid */ assert(sizeof(*this) == 128);
428 /* paranoid */ assert((intptr_t(this) % 128) == 0);
429 }
430
431 ~intrusive_queue_t() = default;
432
433 inline node_t * head() const {
434 node_t * rhead = reinterpret_cast<node_t *>(
435 reinterpret_cast<uintptr_t>( &before ) - fields_offset
436 );
437 assert(rhead);
438 return rhead;
439 }
440
441 inline node_t * tail() const {
442 node_t * rtail = reinterpret_cast<node_t *>(
443 reinterpret_cast<uintptr_t>( &after ) - fields_offset
444 );
445 assert(rtail);
446 return rtail;
447 }
448
449 inline bool push(node_t * node) {
450 assert(lock);
451 assert(node->_links.ts != 0);
452 node_t * tail = this->tail();
453
454 node_t * prev = tail->_links.prev;
455 // assertf(node->_links.ts >= prev->_links.ts,
456 // "New node has smaller timestamp: %llu < %llu", node->_links.ts, prev->_links.ts);
457 node->_links.next = tail;
458 node->_links.prev = prev;
459 prev->_links.next = node;
460 tail->_links.prev = node;
461 #ifndef NO_STATS
462 if(enable_stats) {
463 s.diff++;
464 s.push++;
465 }
466 #endif
467 if(before._links.ts == 0l) {
468 before._links.ts = node->_links.ts;
469 assert(node->_links.prev == this->head());
470 return true;
471 }
472 return false;
473 }
474
475 inline std::pair<node_t *, bool> pop() {
476 assert(lock);
477 node_t * head = this->head();
478 node_t * tail = this->tail();
479
480 node_t * node = head->_links.next;
481 node_t * next = node->_links.next;
482 if(node == tail) return {nullptr, false};
483
484 head->_links.next = next;
485 next->_links.prev = head;
486
487 #ifndef NO_STATS
488 if(enable_stats) {
489 s.diff--;
490 s.pop ++;
491 }
492 #endif
493 if(next == tail) {
494 before._links.ts = 0l;
495 return {node, true};
496 }
497 else {
498 assert(next->_links.ts != 0);
499 before._links.ts = next->_links.ts;
500 assert(before._links.ts != 0);
501 return {node, false};
502 }
503 }
504
505 long long ts() const {
506 return before._links.ts;
507 }
508 };
509
510
511public:
512
513 static __attribute__((aligned(128))) thread_local struct TLS {
514 Random rng = { int(rdtscl()) };
515 pick_stat pick;
516 empty_stat empty;
517 __attribute__((aligned(64))) std::atomic_size_t mask = { 0 };
518 } tls;
519
520private:
521 __attribute__((aligned(64))) std::unique_ptr<intrusive_queue_t []> lists;
522 const unsigned numLists;
523private:
524 #if VARIANT == SNZI
525 snzi_t snzi;
526 #elif VARIANT == SNZM || VARIANT == DISCOVER
527 snzm_t snzm;
528 #else
529 std::atomic_int numNonEmpty = { 0 }; // number of non-empty lists
530 #endif
531 #if VARIANT == BITMASK
532 std::atomic_size_t list_mask[7] = { {0}, {0}, {0}, {0}, {0}, {0}, {0} }; // which queues are empty
533 #endif
534
535public:
536 static const constexpr size_t sizeof_queue = sizeof(intrusive_queue_t);
537
538#ifndef NO_STATS
539 static void stats_print(std::ostream & os) {
540 auto it = head;
541 while(it) {
542 it->stats_print_local(os);
543 it = it->next;
544 }
545 }
546
547 static void stats_tls_tally() {
548 global_stats.pick.push.attempt += tls.pick.push.attempt;
549 global_stats.pick.push.success += tls.pick.push.success;
550 global_stats.pick.pop .attempt += tls.pick.pop.attempt;
551 global_stats.pick.pop .success += tls.pick.pop.success;
552 global_stats.pick.pop .mask_attempt += tls.pick.pop.mask_attempt;
553 global_stats.pick.pop .mask_reset += tls.pick.pop.mask_reset;
554
555 global_stats.qstat.push.value += tls.empty.push.value;
556 global_stats.qstat.push.count += tls.empty.push.count;
557 global_stats.qstat.pop .value += tls.empty.pop .value;
558 global_stats.qstat.pop .count += tls.empty.pop .count;
559 }
560
561private:
562 static struct GlobalStats {
563 struct {
564 struct {
565 std::atomic_size_t attempt = { 0 };
566 std::atomic_size_t success = { 0 };
567 } push;
568 struct {
569 std::atomic_size_t attempt = { 0 };
570 std::atomic_size_t success = { 0 };
571 std::atomic_size_t mask_attempt = { 0 };
572 std::atomic_size_t mask_reset = { 0 };
573 } pop;
574 } pick;
575 struct {
576 struct {
577 std::atomic_size_t value = { 0 };
578 std::atomic_size_t count = { 0 };
579 } push;
580 struct {
581 std::atomic_size_t value = { 0 };
582 std::atomic_size_t count = { 0 };
583 } pop;
584 } qstat;
585 } global_stats;
586
587 // Link list of all lists for stats
588 __attribute__((aligned(64))) relaxed_list<node_t> * next = nullptr;
589
590 static relaxed_list<node_t> * head;
591
592 void stats_print_local(std::ostream & os ) {
593 std::cout << "----- Relaxed List Stats -----" << std::endl;
594 // {
595 // ssize_t diff = 0;
596 // size_t num = 0;
597 // ssize_t max = 0;
598
599 // for(size_t i = 0; i < numLists; i++) {
600 // const auto & list = lists[i];
601 // diff+= list.s.diff;
602 // num ++;
603 // max = std::abs(max) > std::abs(list.s.diff) ? max : list.s.diff;
604 // os << "Local Q ops : " << (list.s.push + list.s.pop) << "(" << list.s.push << "i, " << list.s.pop << "o)\n";
605 // }
606
607 // os << "Difference : " << ssize_t(double(diff) / num ) << " avg\t" << max << "max" << std::endl;
608 // }
609
610 const auto & global = global_stats;
611
612 double push_sur = (100.0 * double(global.pick.push.success) / global.pick.push.attempt);
613 double pop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .attempt);
614 double mpop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .mask_attempt);
615 double rpop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .mask_reset);
616
617 double push_len = double(global.pick.push.attempt ) / global.pick.push.success;
618 double pop_len = double(global.pick.pop .attempt ) / global.pick.pop .success;
619 double mpop_len = double(global.pick.pop .mask_attempt) / global.pick.pop .success;
620 double rpop_len = double(global.pick.pop .mask_reset ) / global.pick.pop .success;
621
622 os << "Push Pick : " << push_sur << " %, len " << push_len << " (" << global.pick.push.attempt << " / " << global.pick.push.success << ")\n";
623 os << "Pop Pick : " << pop_sur << " %, len " << pop_len << " (" << global.pick.pop .attempt << " / " << global.pick.pop .success << ")\n";
624 os << "TryPop Pick : " << mpop_sur << " %, len " << mpop_len << " (" << global.pick.pop .mask_attempt << " / " << global.pick.pop .success << ")\n";
625 os << "Pop M Reset : " << rpop_sur << " %, len " << rpop_len << " (" << global.pick.pop .mask_reset << " / " << global.pick.pop .success << ")\n";
626
627 double avgQ_push = double(global.qstat.push.value) / global.qstat.push.count;
628 double avgQ_pop = double(global.qstat.pop .value) / global.qstat.pop .count;
629 double avgQ = double(global.qstat.push.value + global.qstat.pop .value) / (global.qstat.push.count + global.qstat.pop .count);
630 os << "Push Avg Qs : " << avgQ_push << " (" << global.qstat.push.count << "ops)\n";
631 os << "Pop Avg Qs : " << avgQ_pop << " (" << global.qstat.pop .count << "ops)\n";
632 os << "Global Avg Qs : " << avgQ << " (" << (global.qstat.push.count + global.qstat.pop .count) << "ops)\n";
633 }
634#endif
635};
Note: See TracBrowser for help on using the repository browser.