source: doc/theses/thierry_delisle_PhD/code/relaxed_list.hpp@ 8e1b1bb

ADT arm-eh ast-experimental enum forall-pointer-decay jacob/cs343-translation new-ast new-ast-unique-expr pthread-emulation qualifiedEnum stuck-waitfor-destruct
Last change on this file since 8e1b1bb was 8e1b1bb, checked in by Thierry Delisle <tdelisle@…>, 6 years ago

Now using a single macro for algorithmic variants

  • Property mode set to 100644
File size: 14.2 KB
Line 
1#pragma once
2
3enum VARIANTS {
4 VANILLA,
5 SNZI,
6 BITMASK,
7 DISCOVER
8};
9
10#ifndef VARIANT
11#define VARIANT VANILLA
12#endif
13
14#ifndef NO_STATS
15#include <iostream>
16#endif
17
18#include <cmath>
19#include <memory>
20#include <mutex>
21#include <type_traits>
22
23#include "assert.hpp"
24#include "utils.hpp"
25#include "snzi.hpp"
26
27#define MACRO_XSTR(s) MACRO_STR(s)
28#define MACRO_STR(s) #s
29
30using namespace std;
31
32extern bool enable_stats;
33
34struct pick_stat {
35 struct {
36 size_t attempt = 0;
37 size_t success = 0;
38 } push;
39 struct {
40 size_t attempt = 0;
41 size_t success = 0;
42 size_t mask_attempt = 0;
43 size_t mask_reset = 0;
44 } pop;
45};
46
47struct empty_stat {
48 struct {
49 size_t value = 0;
50 size_t count = 0;
51 } push;
52 struct {
53 size_t value = 0;
54 size_t count = 0;
55 } pop;
56};
57
58template<typename node_t>
59struct _LinksFields_t {
60 node_t * prev = nullptr;
61 node_t * next = nullptr;
62 unsigned long long ts = 0;
63};
64
65template<typename node_t>
66class __attribute__((aligned(128))) relaxed_list {
67 static_assert(std::is_same<decltype(node_t::_links), _LinksFields_t<node_t>>::value, "Node must have a links field");
68
69public:
70 static const char * name() {
71 return MACRO_XSTR(VARIANT);
72 }
73
74 relaxed_list(unsigned numLists)
75 : lists(new intrusive_queue_t[numLists])
76 , numLists(numLists)
77 #if VARIANT == SNZI || VARIANT == DISCOVER
78 , snzi( std::log2( numLists / 8 ) )
79 #endif
80 {
81 assertf(7 * 8 * 8 >= numLists, "List currently only supports 448 sublists");
82 // assert(sizeof(*this) == 128);
83 std::cout << "Constructing Relaxed List with " << numLists << std::endl;
84
85 #ifndef NO_STATS
86 if(head) this->next = head;
87 head = this;
88 #endif
89 }
90
91 ~relaxed_list() {
92 std::cout << "Destroying Relaxed List" << std::endl;
93 lists.reset();
94 }
95
96 __attribute__((noinline, hot)) void push(node_t * node) {
97 node->_links.ts = rdtscl();
98
99 while(true) {
100 // Pick a random list
101 unsigned i = tls.rng.next() % numLists;
102
103 #ifndef NO_STATS
104 tls.pick.push.attempt++;
105 #endif
106
107 // If we can't lock it retry
108 if( !lists[i].lock.try_lock() ) continue;
109
110 #if VARIANT != SNZI && VARIANT != DISCOVER
111 __attribute__((unused)) int num = numNonEmpty;
112 #endif
113
114 // Actually push it
115 if(lists[i].push(node)) {
116 #if VARIANT == DISCOVER
117 size_t qword = i >> 6ull;
118 size_t bit = i & 63ull;
119 assert(qword == 0);
120 bts(tls.mask, bit);
121 snzi.arrive(i);
122 #elif VARIANT == SNZI
123 snzi.arrive(i);
124 #elif VARIANT == BITMASK
125 numNonEmpty++;
126 size_t qword = i >> 6ull;
127 size_t bit = i & 63ull;
128 assertf((list_mask[qword] & (1ul << bit)) == 0, "Before set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
129 __attribute__((unused)) bool ret = bts(list_mask[qword], bit);
130 assert(!ret);
131 assertf((list_mask[qword] & (1ul << bit)) != 0, "After set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
132 #else
133 numNonEmpty++;
134 #endif
135 }
136 #if VARIANT != SNZI && VARIANT != DISCOVER
137 assert(numNonEmpty <= (int)numLists);
138 #endif
139
140 // Unlock and return
141 lists[i].lock.unlock();
142
143 #ifndef NO_STATS
144 tls.pick.push.success++;
145 #if VARIANT != SNZI && VARIANT != DISCOVER
146 tls.empty.push.value += num;
147 tls.empty.push.count += 1;
148 #endif
149 #endif
150 return;
151 }
152 }
153
154 __attribute__((noinline, hot)) node_t * pop() {
155 #if VARIANT == DISCOVER
156 assert(numLists <= 64);
157 while(snzi.query()) {
158 tls.pick.pop.mask_attempt++;
159 unsigned i, j;
160 {
161 // Pick first list totally randomly
162 i = tls.rng.next() % numLists;
163
164 // Pick the other according to the bitmask
165 unsigned r = tls.rng.next();
166
167 size_t mask = tls.mask.load(std::memory_order_relaxed);
168 if(mask == 0) {
169 tls.pick.pop.mask_reset++;
170 mask = (1U << numLists) - 1;
171 }
172
173 unsigned b = rand_bit(r, mask);
174
175 assertf(b < 64, "%zu %u", mask, b);
176
177 j = b;
178
179 assert(j < numLists);
180 }
181
182 if(auto node = try_pop(i, j)) return node;
183 }
184 #elif VARIANT == SNZI
185 while(snzi.query()) {
186 // Pick two lists at random
187 int i = tls.rng.next() % numLists;
188 int j = tls.rng.next() % numLists;
189
190 if(auto node = try_pop(i, j)) return node;
191 }
192 #elif VARIANT == BITMASK
193 int nnempty;
194 while(0 != (nnempty = numNonEmpty)) {
195 tls.pick.pop.mask_attempt++;
196 unsigned i, j;
197 {
198 // Pick two lists at random
199 unsigned num = ((numLists - 1) >> 6) + 1;
200
201 unsigned ri = tls.rng.next();
202 unsigned rj = tls.rng.next();
203
204 unsigned wdxi = (ri >> 6u) % num;
205 unsigned wdxj = (rj >> 6u) % num;
206
207 size_t maski = list_mask[wdxi].load(std::memory_order_relaxed);
208 size_t maskj = list_mask[wdxj].load(std::memory_order_relaxed);
209
210 if(maski == 0 && maskj == 0) continue;
211
212 unsigned bi = rand_bit(ri, maski);
213 unsigned bj = rand_bit(rj, maskj);
214
215 assertf(bi < 64, "%zu %u", maski, bi);
216 assertf(bj < 64, "%zu %u", maskj, bj);
217
218 i = bi | (wdxi << 6);
219 j = bj | (wdxj << 6);
220
221 assertf(i < numLists, "%u", wdxi << 6);
222 assertf(j < numLists, "%u", wdxj << 6);
223 }
224
225 if(auto node = try_pop(i, j)) return node;
226 }
227 #else
228 while(numNonEmpty != 0) {
229 // Pick two lists at random
230 int i = tls.rng.next() % numLists;
231 int j = tls.rng.next() % numLists;
232
233 if(auto node = try_pop(i, j)) return node;
234 }
235 #endif
236
237 return nullptr;
238 }
239
240private:
241 node_t * try_pop(unsigned i, unsigned j) {
242 #ifndef NO_STATS
243 tls.pick.pop.attempt++;
244 #endif
245
246 #if VARIANT == DISCOVER
247 if(lists[i].ts() > 0) bts(tls.mask, i); else btr(tls.mask, i);
248 if(lists[j].ts() > 0) bts(tls.mask, j); else btr(tls.mask, j);
249 #endif
250
251 // Pick the bet list
252 int w = i;
253 if( __builtin_expect(lists[j].ts() != 0, true) ) {
254 w = (lists[i].ts() < lists[j].ts()) ? i : j;
255 }
256
257 auto & list = lists[w];
258 // If list looks empty retry
259 if( list.ts() == 0 ) return nullptr;
260
261 // If we can't get the lock retry
262 if( !list.lock.try_lock() ) return nullptr;
263
264 #if VARIANT != SNZI && VARIANT != DISCOVER
265 __attribute__((unused)) int num = numNonEmpty;
266 #endif
267
268 // If list is empty, unlock and retry
269 if( list.ts() == 0 ) {
270 list.lock.unlock();
271 return nullptr;
272 }
273
274 // Actually pop the list
275 node_t * node;
276 bool emptied;
277 std::tie(node, emptied) = list.pop();
278 assert(node);
279
280 if(emptied) {
281 #if VARIANT == DISCOVER
282 size_t qword = w >> 6ull;
283 size_t bit = w & 63ull;
284 assert(qword == 0);
285 __attribute__((unused)) bool ret = btr(tls.mask, bit);
286 snzi.depart(w);
287 #elif VARIANT == SNZI
288 snzi.depart(w);
289 #elif VARIANT == BITMASK
290 numNonEmpty--;
291 size_t qword = w >> 6ull;
292 size_t bit = w & 63ull;
293 assert((list_mask[qword] & (1ul << bit)) != 0);
294 __attribute__((unused)) bool ret = btr(list_mask[qword], bit);
295 assert(ret);
296 assert((list_mask[qword] & (1ul << bit)) == 0);
297 #else
298 numNonEmpty--;
299 #endif
300 }
301
302 // Unlock and return
303 list.lock.unlock();
304 #if VARIANT != SNZI && VARIANT != DISCOVER
305 assert(numNonEmpty >= 0);
306 #endif
307 #ifndef NO_STATS
308 tls.pick.pop.success++;
309 #if VARIANT != SNZI && VARIANT != DISCOVER
310 tls.empty.pop.value += num;
311 tls.empty.pop.count += 1;
312 #endif
313 #endif
314 return node;
315 }
316
317private:
318
319 class __attribute__((aligned(128))) intrusive_queue_t {
320 public:
321 typedef spinlock_t lock_t;
322
323 friend class relaxed_list<node_t>;
324
325 struct stat {
326 ssize_t diff = 0;
327 size_t push = 0;
328 size_t pop = 0;
329 };
330
331 private:
332 struct sentinel_t {
333 _LinksFields_t<node_t> _links;
334 };
335
336 lock_t lock;
337 sentinel_t before;
338 sentinel_t after;
339 #ifndef NO_STATS
340 stat s;
341 #endif
342
343#pragma GCC diagnostic push
344#pragma GCC diagnostic ignored "-Winvalid-offsetof"
345 static constexpr auto fields_offset = offsetof( node_t, _links );
346#pragma GCC diagnostic pop
347 public:
348 intrusive_queue_t()
349 : before{{ nullptr, tail() }}
350 , after {{ head(), nullptr }}
351 {
352 /* paranoid */ assert((reinterpret_cast<uintptr_t>( head() ) + fields_offset) == reinterpret_cast<uintptr_t>(&before));
353 /* paranoid */ assert((reinterpret_cast<uintptr_t>( tail() ) + fields_offset) == reinterpret_cast<uintptr_t>(&after ));
354 /* paranoid */ assert(head()->_links.prev == nullptr);
355 /* paranoid */ assert(head()->_links.next == tail() );
356 /* paranoid */ assert(tail()->_links.next == nullptr);
357 /* paranoid */ assert(tail()->_links.prev == head() );
358 /* paranoid */ assert(sizeof(*this) == 128);
359 /* paranoid */ assert((intptr_t(this) % 128) == 0);
360 }
361
362 ~intrusive_queue_t() = default;
363
364 inline node_t * head() const {
365 node_t * rhead = reinterpret_cast<node_t *>(
366 reinterpret_cast<uintptr_t>( &before ) - fields_offset
367 );
368 assert(rhead);
369 return rhead;
370 }
371
372 inline node_t * tail() const {
373 node_t * rtail = reinterpret_cast<node_t *>(
374 reinterpret_cast<uintptr_t>( &after ) - fields_offset
375 );
376 assert(rtail);
377 return rtail;
378 }
379
380 inline bool push(node_t * node) {
381 assert(lock);
382 assert(node->_links.ts != 0);
383 node_t * tail = this->tail();
384
385 node_t * prev = tail->_links.prev;
386 // assertf(node->_links.ts >= prev->_links.ts,
387 // "New node has smaller timestamp: %llu < %llu", node->_links.ts, prev->_links.ts);
388 node->_links.next = tail;
389 node->_links.prev = prev;
390 prev->_links.next = node;
391 tail->_links.prev = node;
392 #ifndef NO_STATS
393 if(enable_stats) {
394 s.diff++;
395 s.push++;
396 }
397 #endif
398 if(before._links.ts == 0l) {
399 before._links.ts = node->_links.ts;
400 assert(node->_links.prev == this->head());
401 return true;
402 }
403 return false;
404 }
405
406 inline std::pair<node_t *, bool> pop() {
407 assert(lock);
408 node_t * head = this->head();
409 node_t * tail = this->tail();
410
411 node_t * node = head->_links.next;
412 node_t * next = node->_links.next;
413 if(node == tail) return {nullptr, false};
414
415 head->_links.next = next;
416 next->_links.prev = head;
417
418 #ifndef NO_STATS
419 if(enable_stats) {
420 s.diff--;
421 s.pop ++;
422 }
423 #endif
424 if(next == tail) {
425 before._links.ts = 0l;
426 return {node, true};
427 }
428 else {
429 assert(next->_links.ts != 0);
430 before._links.ts = next->_links.ts;
431 assert(before._links.ts != 0);
432 return {node, false};
433 }
434 }
435
436 long long ts() const {
437 return before._links.ts;
438 }
439 };
440
441
442public:
443
444 static __attribute__((aligned(128))) thread_local struct TLS {
445 Random rng = { int(rdtscl()) };
446 pick_stat pick;
447 empty_stat empty;
448 __attribute__((aligned(64))) std::atomic_size_t mask = { 0 };
449 } tls;
450
451private:
452 __attribute__((aligned(64))) std::unique_ptr<intrusive_queue_t []> lists;
453 const unsigned numLists;
454private:
455 #if VARIANT == SNZI || VARIANT == DISCOVER
456 snzi_t snzi;
457 #else
458 std::atomic_int numNonEmpty = { 0 }; // number of non-empty lists
459 #endif
460 #if VARIANT == BITMASK
461 std::atomic_size_t list_mask[7] = { {0}, {0}, {0}, {0}, {0}, {0}, {0} }; // which queues are empty
462 #endif
463
464public:
465 static const constexpr size_t sizeof_queue = sizeof(intrusive_queue_t);
466
467#ifndef NO_STATS
468 static void stats_print(std::ostream & os) {
469 auto it = head;
470 while(it) {
471 it->stats_print_local(os);
472 it = it->next;
473 }
474 }
475
476 static void stats_tls_tally() {
477 global_stats.pick.push.attempt += tls.pick.push.attempt;
478 global_stats.pick.push.success += tls.pick.push.success;
479 global_stats.pick.pop .attempt += tls.pick.pop.attempt;
480 global_stats.pick.pop .success += tls.pick.pop.success;
481 global_stats.pick.pop .mask_attempt += tls.pick.pop.mask_attempt;
482 global_stats.pick.pop .mask_reset += tls.pick.pop.mask_reset;
483
484 global_stats.qstat.push.value += tls.empty.push.value;
485 global_stats.qstat.push.count += tls.empty.push.count;
486 global_stats.qstat.pop .value += tls.empty.pop .value;
487 global_stats.qstat.pop .count += tls.empty.pop .count;
488 }
489
490private:
491 static struct GlobalStats {
492 struct {
493 struct {
494 std::atomic_size_t attempt = { 0 };
495 std::atomic_size_t success = { 0 };
496 } push;
497 struct {
498 std::atomic_size_t attempt = { 0 };
499 std::atomic_size_t success = { 0 };
500 std::atomic_size_t mask_attempt = { 0 };
501 std::atomic_size_t mask_reset = { 0 };
502 } pop;
503 } pick;
504 struct {
505 struct {
506 std::atomic_size_t value = { 0 };
507 std::atomic_size_t count = { 0 };
508 } push;
509 struct {
510 std::atomic_size_t value = { 0 };
511 std::atomic_size_t count = { 0 };
512 } pop;
513 } qstat;
514 } global_stats;
515
516 // Link list of all lists for stats
517 __attribute__((aligned(64))) relaxed_list<node_t> * next = nullptr;
518
519 static relaxed_list<node_t> * head;
520
521 void stats_print_local(std::ostream & os ) {
522 std::cout << "----- Relaxed List Stats -----" << std::endl;
523 {
524 ssize_t diff = 0;
525 size_t num = 0;
526 ssize_t max = 0;
527
528 for(size_t i = 0; i < numLists; i++) {
529 const auto & list = lists[i];
530 diff+= list.s.diff;
531 num ++;
532 max = std::abs(max) > std::abs(list.s.diff) ? max : list.s.diff;
533 os << "Local Q ops : " << (list.s.push + list.s.pop) << "(" << list.s.push << "i, " << list.s.pop << "o)\n";
534 }
535
536 os << "Difference : " << ssize_t(double(diff) / num ) << " avg\t" << max << "max" << std::endl;
537 }
538
539 const auto & global = global_stats;
540
541 double push_sur = (100.0 * double(global.pick.push.success) / global.pick.push.attempt);
542 double pop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .attempt);
543 double mpop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .mask_attempt);
544 double rpop_sur = (100.0 * double(global.pick.pop .mask_reset) / global.pick.pop .mask_attempt);
545
546 os << "Push Pick % : " << push_sur << "(" << global.pick.push.success << " / " << global.pick.push.attempt << ")\n";
547 os << "Pop Pick % : " << pop_sur << "(" << global.pick.pop .success << " / " << global.pick.pop .attempt << ")\n";
548 os << "TryPop Pick % : " << mpop_sur << "(" << global.pick.pop .success << " / " << global.pick.pop .mask_attempt << ")\n";
549 os << "Pop M Reset % : " << rpop_sur << "(" << global.pick.pop .mask_reset << " / " << global.pick.pop .mask_attempt << ")\n";
550
551 double avgQ_push = double(global.qstat.push.value) / global.qstat.push.count;
552 double avgQ_pop = double(global.qstat.pop .value) / global.qstat.pop .count;
553 double avgQ = double(global.qstat.push.value + global.qstat.pop .value) / (global.qstat.push.count + global.qstat.pop .count);
554 os << "Push Avg Qs : " << avgQ_push << " (" << global.qstat.push.count << "ops)\n";
555 os << "Pop Avg Qs : " << avgQ_pop << " (" << global.qstat.pop .count << "ops)\n";
556 os << "Global Avg Qs : " << avgQ << " (" << (global.qstat.push.count + global.qstat.pop .count) << "ops)\n";
557 }
558#endif
559};
Note: See TracBrowser for help on using the repository browser.