source: doc/theses/thierry_delisle_PhD/code/relaxed_list.hpp@ 47a541d

ADT arm-eh ast-experimental enum forall-pointer-decay jacob/cs343-translation new-ast new-ast-unique-expr pthread-emulation qualifiedEnum
Last change on this file since 47a541d was 47a541d, checked in by Thierry Delisle <tdelisle@…>, 5 years ago

Add first draft of SNZI + MASK approach

  • Property mode set to 100644
File size: 15.5 KB
Line 
1#pragma once
2
3#define MACRO_XSTR(s) MACRO_STR(s)
4#define MACRO_STR(s) #s
5
6#define VANILLA 0
7#define SNZI 1
8#define BITMASK 2
9#define DISCOVER 3
10#define SNZM 4
11
12#ifndef VARIANT
13#define VARIANT VANILLA
14#endif
15
16#ifndef NO_STATS
17#include <iostream>
18#endif
19
20#include <cmath>
21#include <memory>
22#include <mutex>
23#include <type_traits>
24
25#include "assert.hpp"
26#include "utils.hpp"
27#include "snzi.hpp"
28#include "snzm.hpp"
29
30using namespace std;
31
32extern bool enable_stats;
33
34struct pick_stat {
35 struct {
36 size_t attempt = 0;
37 size_t success = 0;
38 } push;
39 struct {
40 size_t attempt = 0;
41 size_t success = 0;
42 size_t mask_attempt = 0;
43 size_t mask_reset = 0;
44 } pop;
45};
46
47struct empty_stat {
48 struct {
49 size_t value = 0;
50 size_t count = 0;
51 } push;
52 struct {
53 size_t value = 0;
54 size_t count = 0;
55 } pop;
56};
57
58template<typename node_t>
59struct _LinksFields_t {
60 node_t * prev = nullptr;
61 node_t * next = nullptr;
62 unsigned long long ts = 0;
63};
64
65template<typename node_t>
66class __attribute__((aligned(128))) relaxed_list {
67 static_assert(std::is_same<decltype(node_t::_links), _LinksFields_t<node_t>>::value, "Node must have a links field");
68
69public:
70 static const char * name() {
71 const char * names[] = {
72 "VANILLA",
73 "SNZI",
74 "BITMASK",
75 "DISCOVER",
76 "SNZI + MASK"
77 };
78 return names[VARIANT];
79 }
80
81 relaxed_list(unsigned numLists)
82 : lists(new intrusive_queue_t[numLists])
83 , numLists(numLists)
84 #if VARIANT == SNZI || VARIANT == DISCOVER
85 , snzi( 1, 8 )
86 #elif VARIANT == SNZM
87 , snzm( numLists )
88 #endif
89 {
90 assertf(7 * 8 * 8 >= numLists, "List currently only supports 448 sublists");
91 // assert(sizeof(*this) == 128);
92 std::cout << "Constructing Relaxed List with " << numLists << std::endl;
93
94 #ifndef NO_STATS
95 if(head) this->next = head;
96 head = this;
97 #endif
98 }
99
100 ~relaxed_list() {
101 std::cout << "Destroying Relaxed List" << std::endl;
102 lists.reset();
103 }
104
105 __attribute__((noinline, hot)) void push(node_t * node) {
106 node->_links.ts = rdtscl();
107
108 while(true) {
109 // Pick a random list
110 unsigned i = tls.rng.next() % numLists;
111
112 #ifndef NO_STATS
113 tls.pick.push.attempt++;
114 #endif
115
116 // If we can't lock it retry
117 if( !lists[i].lock.try_lock() ) continue;
118
119 #if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER
120 __attribute__((unused)) int num = numNonEmpty;
121 #endif
122
123 // Actually push it
124 if(lists[i].push(node)) {
125 #if VARIANT == DISCOVER
126 size_t qword = i >> 6ull;
127 size_t bit = i & 63ull;
128 assert(qword == 0);
129 bts(tls.mask, bit);
130 snzi.arrive(i);
131 #elif VARIANT == SNZI
132 snzi.arrive(i);
133 #elif VARIANT == SNZM
134 snzm.arrive(i);
135 #elif VARIANT == BITMASK
136 numNonEmpty++;
137 size_t qword = i >> 6ull;
138 size_t bit = i & 63ull;
139 assertf((list_mask[qword] & (1ul << bit)) == 0, "Before set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
140 __attribute__((unused)) bool ret = bts(list_mask[qword], bit);
141 assert(!ret);
142 assertf((list_mask[qword] & (1ul << bit)) != 0, "After set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
143 #else
144 numNonEmpty++;
145 #endif
146 }
147 #if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER
148 assert(numNonEmpty <= (int)numLists);
149 #endif
150
151 // Unlock and return
152 lists[i].lock.unlock();
153
154 #ifndef NO_STATS
155 tls.pick.push.success++;
156 #if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER
157 tls.empty.push.value += num;
158 tls.empty.push.count += 1;
159 #endif
160 #endif
161 return;
162 }
163 }
164
165 __attribute__((noinline, hot)) node_t * pop() {
166 #if VARIANT == DISCOVER
167 assert(numLists <= 64);
168 while(snzi.query()) {
169 tls.pick.pop.mask_attempt++;
170 unsigned i, j;
171 {
172 // Pick first list totally randomly
173 i = tls.rng.next() % numLists;
174
175 // Pick the other according to the bitmask
176 unsigned r = tls.rng.next();
177
178 size_t mask = tls.mask.load(std::memory_order_relaxed);
179 if(mask == 0) {
180 tls.pick.pop.mask_reset++;
181 mask = (1U << numLists) - 1;
182 }
183
184 unsigned b = rand_bit(r, mask);
185
186 assertf(b < 64, "%zu %u", mask, b);
187
188 j = b;
189
190 assert(j < numLists);
191 }
192
193 if(auto node = try_pop(i, j)) return node;
194 }
195 #elif VARIANT == SNZI
196 while(snzi.query()) {
197 // Pick two lists at random
198 int i = tls.rng.next() % numLists;
199 int j = tls.rng.next() % numLists;
200
201 if(auto node = try_pop(i, j)) return node;
202 }
203 #elif VARIANT == SNZM
204 while(snzm.query()) {
205 tls.pick.pop.mask_attempt++;
206 unsigned i, j;
207 {
208 // Pick two random number
209 unsigned ri = tls.rng.next();
210 unsigned rj = tls.rng.next();
211
212 // Pick two nodes from it
213 unsigned wdxi = ri & snzm.mask;
214 unsigned wdxj = rj & snzm.mask;
215
216 // Get the masks from the nodes
217 size_t maski = snzm.masks(wdxi);
218 size_t maskj = snzm.masks(wdxj);
219
220 if(maski == 0 && maskj == 0) continue;
221
222 unsigned bi = rand_bit(ri >> snzm.depth, maski);
223 unsigned bj = rand_bit(rj >> snzm.depth, maskj);
224
225 assertf(bi < 64, "%zu %u", maski, bi);
226 assertf(bj < 64, "%zu %u", maskj, bj);
227
228 i = (bi << snzm.depth) | wdxi;
229 j = (bj << snzm.depth) | wdxj;
230
231 assertf(i < numLists, "%u %u", bj, wdxi);
232 assertf(j < numLists, "%u %u", bj, wdxj);
233 }
234
235 if(auto node = try_pop(i, j)) return node;
236 }
237 #elif VARIANT == BITMASK
238 int nnempty;
239 while(0 != (nnempty = numNonEmpty)) {
240 tls.pick.pop.mask_attempt++;
241 unsigned i, j;
242 {
243 // Pick two lists at random
244 unsigned num = ((numLists - 1) >> 6) + 1;
245
246 unsigned ri = tls.rng.next();
247 unsigned rj = tls.rng.next();
248
249 unsigned wdxi = (ri >> 6u) % num;
250 unsigned wdxj = (rj >> 6u) % num;
251
252 size_t maski = list_mask[wdxi].load(std::memory_order_relaxed);
253 size_t maskj = list_mask[wdxj].load(std::memory_order_relaxed);
254
255 if(maski == 0 && maskj == 0) continue;
256
257 unsigned bi = rand_bit(ri, maski);
258 unsigned bj = rand_bit(rj, maskj);
259
260 assertf(bi < 64, "%zu %u", maski, bi);
261 assertf(bj < 64, "%zu %u", maskj, bj);
262
263 i = bi | (wdxi << 6);
264 j = bj | (wdxj << 6);
265
266 assertf(i < numLists, "%u", wdxi << 6);
267 assertf(j < numLists, "%u", wdxj << 6);
268 }
269
270 if(auto node = try_pop(i, j)) return node;
271 }
272 #else
273 while(numNonEmpty != 0) {
274 // Pick two lists at random
275 int i = tls.rng.next() % numLists;
276 int j = tls.rng.next() % numLists;
277
278 if(auto node = try_pop(i, j)) return node;
279 }
280 #endif
281
282 return nullptr;
283 }
284
285private:
286 node_t * try_pop(unsigned i, unsigned j) {
287 #ifndef NO_STATS
288 tls.pick.pop.attempt++;
289 #endif
290
291 #if VARIANT == DISCOVER
292 if(lists[i].ts() > 0) bts(tls.mask, i); else btr(tls.mask, i);
293 if(lists[j].ts() > 0) bts(tls.mask, j); else btr(tls.mask, j);
294 #endif
295
296 // Pick the bet list
297 int w = i;
298 if( __builtin_expect(lists[j].ts() != 0, true) ) {
299 w = (lists[i].ts() < lists[j].ts()) ? i : j;
300 }
301
302 auto & list = lists[w];
303 // If list looks empty retry
304 if( list.ts() == 0 ) return nullptr;
305
306 // If we can't get the lock retry
307 if( !list.lock.try_lock() ) return nullptr;
308
309 #if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER
310 __attribute__((unused)) int num = numNonEmpty;
311 #endif
312
313 // If list is empty, unlock and retry
314 if( list.ts() == 0 ) {
315 list.lock.unlock();
316 return nullptr;
317 }
318
319 // Actually pop the list
320 node_t * node;
321 bool emptied;
322 std::tie(node, emptied) = list.pop();
323 assert(node);
324
325 if(emptied) {
326 #if VARIANT == DISCOVER
327 size_t qword = w >> 6ull;
328 size_t bit = w & 63ull;
329 assert(qword == 0);
330 __attribute__((unused)) bool ret = btr(tls.mask, bit);
331 snzi.depart(w);
332 #elif VARIANT == SNZI
333 snzi.depart(w);
334 #elif VARIANT == SNZM
335 snzm.depart(w);
336 #elif VARIANT == BITMASK
337 numNonEmpty--;
338 size_t qword = w >> 6ull;
339 size_t bit = w & 63ull;
340 assert((list_mask[qword] & (1ul << bit)) != 0);
341 __attribute__((unused)) bool ret = btr(list_mask[qword], bit);
342 assert(ret);
343 assert((list_mask[qword] & (1ul << bit)) == 0);
344 #else
345 numNonEmpty--;
346 #endif
347 }
348
349 // Unlock and return
350 list.lock.unlock();
351 #if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER
352 assert(numNonEmpty >= 0);
353 #endif
354 #ifndef NO_STATS
355 tls.pick.pop.success++;
356 #if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER
357 tls.empty.pop.value += num;
358 tls.empty.pop.count += 1;
359 #endif
360 #endif
361 return node;
362 }
363
364private:
365
366 class __attribute__((aligned(128))) intrusive_queue_t {
367 public:
368 typedef spinlock_t lock_t;
369
370 friend class relaxed_list<node_t>;
371
372 struct stat {
373 ssize_t diff = 0;
374 size_t push = 0;
375 size_t pop = 0;
376 };
377
378 private:
379 struct sentinel_t {
380 _LinksFields_t<node_t> _links;
381 };
382
383 lock_t lock;
384 sentinel_t before;
385 sentinel_t after;
386 #ifndef NO_STATS
387 stat s;
388 #endif
389
390#pragma GCC diagnostic push
391#pragma GCC diagnostic ignored "-Winvalid-offsetof"
392 static constexpr auto fields_offset = offsetof( node_t, _links );
393#pragma GCC diagnostic pop
394 public:
395 intrusive_queue_t()
396 : before{{ nullptr, tail() }}
397 , after {{ head(), nullptr }}
398 {
399 /* paranoid */ assert((reinterpret_cast<uintptr_t>( head() ) + fields_offset) == reinterpret_cast<uintptr_t>(&before));
400 /* paranoid */ assert((reinterpret_cast<uintptr_t>( tail() ) + fields_offset) == reinterpret_cast<uintptr_t>(&after ));
401 /* paranoid */ assert(head()->_links.prev == nullptr);
402 /* paranoid */ assert(head()->_links.next == tail() );
403 /* paranoid */ assert(tail()->_links.next == nullptr);
404 /* paranoid */ assert(tail()->_links.prev == head() );
405 /* paranoid */ assert(sizeof(*this) == 128);
406 /* paranoid */ assert((intptr_t(this) % 128) == 0);
407 }
408
409 ~intrusive_queue_t() = default;
410
411 inline node_t * head() const {
412 node_t * rhead = reinterpret_cast<node_t *>(
413 reinterpret_cast<uintptr_t>( &before ) - fields_offset
414 );
415 assert(rhead);
416 return rhead;
417 }
418
419 inline node_t * tail() const {
420 node_t * rtail = reinterpret_cast<node_t *>(
421 reinterpret_cast<uintptr_t>( &after ) - fields_offset
422 );
423 assert(rtail);
424 return rtail;
425 }
426
427 inline bool push(node_t * node) {
428 assert(lock);
429 assert(node->_links.ts != 0);
430 node_t * tail = this->tail();
431
432 node_t * prev = tail->_links.prev;
433 // assertf(node->_links.ts >= prev->_links.ts,
434 // "New node has smaller timestamp: %llu < %llu", node->_links.ts, prev->_links.ts);
435 node->_links.next = tail;
436 node->_links.prev = prev;
437 prev->_links.next = node;
438 tail->_links.prev = node;
439 #ifndef NO_STATS
440 if(enable_stats) {
441 s.diff++;
442 s.push++;
443 }
444 #endif
445 if(before._links.ts == 0l) {
446 before._links.ts = node->_links.ts;
447 assert(node->_links.prev == this->head());
448 return true;
449 }
450 return false;
451 }
452
453 inline std::pair<node_t *, bool> pop() {
454 assert(lock);
455 node_t * head = this->head();
456 node_t * tail = this->tail();
457
458 node_t * node = head->_links.next;
459 node_t * next = node->_links.next;
460 if(node == tail) return {nullptr, false};
461
462 head->_links.next = next;
463 next->_links.prev = head;
464
465 #ifndef NO_STATS
466 if(enable_stats) {
467 s.diff--;
468 s.pop ++;
469 }
470 #endif
471 if(next == tail) {
472 before._links.ts = 0l;
473 return {node, true};
474 }
475 else {
476 assert(next->_links.ts != 0);
477 before._links.ts = next->_links.ts;
478 assert(before._links.ts != 0);
479 return {node, false};
480 }
481 }
482
483 long long ts() const {
484 return before._links.ts;
485 }
486 };
487
488
489public:
490
491 static __attribute__((aligned(128))) thread_local struct TLS {
492 Random rng = { int(rdtscl()) };
493 pick_stat pick;
494 empty_stat empty;
495 __attribute__((aligned(64))) std::atomic_size_t mask = { 0 };
496 } tls;
497
498private:
499 __attribute__((aligned(64))) std::unique_ptr<intrusive_queue_t []> lists;
500 const unsigned numLists;
501private:
502 #if VARIANT == SNZI || VARIANT == DISCOVER
503 snzi_t snzi;
504 #elif VARIANT == SNZM
505 snzm_t snzm;
506 #else
507 std::atomic_int numNonEmpty = { 0 }; // number of non-empty lists
508 #endif
509 #if VARIANT == BITMASK
510 std::atomic_size_t list_mask[7] = { {0}, {0}, {0}, {0}, {0}, {0}, {0} }; // which queues are empty
511 #endif
512
513public:
514 static const constexpr size_t sizeof_queue = sizeof(intrusive_queue_t);
515
516#ifndef NO_STATS
517 static void stats_print(std::ostream & os) {
518 auto it = head;
519 while(it) {
520 it->stats_print_local(os);
521 it = it->next;
522 }
523 }
524
525 static void stats_tls_tally() {
526 global_stats.pick.push.attempt += tls.pick.push.attempt;
527 global_stats.pick.push.success += tls.pick.push.success;
528 global_stats.pick.pop .attempt += tls.pick.pop.attempt;
529 global_stats.pick.pop .success += tls.pick.pop.success;
530 global_stats.pick.pop .mask_attempt += tls.pick.pop.mask_attempt;
531 global_stats.pick.pop .mask_reset += tls.pick.pop.mask_reset;
532
533 global_stats.qstat.push.value += tls.empty.push.value;
534 global_stats.qstat.push.count += tls.empty.push.count;
535 global_stats.qstat.pop .value += tls.empty.pop .value;
536 global_stats.qstat.pop .count += tls.empty.pop .count;
537 }
538
539private:
540 static struct GlobalStats {
541 struct {
542 struct {
543 std::atomic_size_t attempt = { 0 };
544 std::atomic_size_t success = { 0 };
545 } push;
546 struct {
547 std::atomic_size_t attempt = { 0 };
548 std::atomic_size_t success = { 0 };
549 std::atomic_size_t mask_attempt = { 0 };
550 std::atomic_size_t mask_reset = { 0 };
551 } pop;
552 } pick;
553 struct {
554 struct {
555 std::atomic_size_t value = { 0 };
556 std::atomic_size_t count = { 0 };
557 } push;
558 struct {
559 std::atomic_size_t value = { 0 };
560 std::atomic_size_t count = { 0 };
561 } pop;
562 } qstat;
563 } global_stats;
564
565 // Link list of all lists for stats
566 __attribute__((aligned(64))) relaxed_list<node_t> * next = nullptr;
567
568 static relaxed_list<node_t> * head;
569
570 void stats_print_local(std::ostream & os ) {
571 std::cout << "----- Relaxed List Stats -----" << std::endl;
572 // {
573 // ssize_t diff = 0;
574 // size_t num = 0;
575 // ssize_t max = 0;
576
577 // for(size_t i = 0; i < numLists; i++) {
578 // const auto & list = lists[i];
579 // diff+= list.s.diff;
580 // num ++;
581 // max = std::abs(max) > std::abs(list.s.diff) ? max : list.s.diff;
582 // os << "Local Q ops : " << (list.s.push + list.s.pop) << "(" << list.s.push << "i, " << list.s.pop << "o)\n";
583 // }
584
585 // os << "Difference : " << ssize_t(double(diff) / num ) << " avg\t" << max << "max" << std::endl;
586 // }
587
588 const auto & global = global_stats;
589
590 double push_sur = (100.0 * double(global.pick.push.success) / global.pick.push.attempt);
591 double pop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .attempt);
592 double mpop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .mask_attempt);
593 double rpop_sur = (100.0 * double(global.pick.pop .mask_reset) / global.pick.pop .mask_attempt);
594
595 os << "Push Pick % : " << push_sur << "(" << global.pick.push.success << " / " << global.pick.push.attempt << ")\n";
596 os << "Pop Pick % : " << pop_sur << "(" << global.pick.pop .success << " / " << global.pick.pop .attempt << ")\n";
597 os << "TryPop Pick % : " << mpop_sur << "(" << global.pick.pop .success << " / " << global.pick.pop .mask_attempt << ")\n";
598 os << "Pop M Reset % : " << rpop_sur << "(" << global.pick.pop .mask_reset << " / " << global.pick.pop .mask_attempt << ")\n";
599
600 double avgQ_push = double(global.qstat.push.value) / global.qstat.push.count;
601 double avgQ_pop = double(global.qstat.pop .value) / global.qstat.pop .count;
602 double avgQ = double(global.qstat.push.value + global.qstat.pop .value) / (global.qstat.push.count + global.qstat.pop .count);
603 os << "Push Avg Qs : " << avgQ_push << " (" << global.qstat.push.count << "ops)\n";
604 os << "Pop Avg Qs : " << avgQ_pop << " (" << global.qstat.pop .count << "ops)\n";
605 os << "Global Avg Qs : " << avgQ << " (" << (global.qstat.push.count + global.qstat.pop .count) << "ops)\n";
606 }
607#endif
608};
Note: See TracBrowser for help on using the repository browser.