source: doc/theses/thierry_delisle_PhD/code/relaxed_list.hpp@ 33e62f1b

ADT arm-eh ast-experimental enum forall-pointer-decay jacob/cs343-translation new-ast new-ast-unique-expr pthread-emulation qualifiedEnum stuck-waitfor-destruct
Last change on this file since 33e62f1b was 33e62f1b, checked in by Thierry Delisle <tdelisle@…>, 6 years ago

Added simple SNZI implementation for the relaxed list

  • Property mode set to 100644
File size: 13.8 KB
Line 
1#pragma once
2
3#ifndef NO_STATS
4#include <iostream>
5#endif
6
7#include <memory>
8#include <mutex>
9#include <type_traits>
10
11#include "assert.hpp"
12#include "utils.hpp"
13#include "snzi.hpp"
14
15using namespace std;
16
17extern bool enable_stats;
18
19struct pick_stat {
20 struct {
21 size_t attempt = 0;
22 size_t success = 0;
23 } push;
24 struct {
25 size_t attempt = 0;
26 size_t success = 0;
27 size_t mask_attempt = 0;
28 size_t mask_reset = 0;
29 } pop;
30};
31
32struct empty_stat {
33 struct {
34 size_t value = 0;
35 size_t count = 0;
36 } push;
37 struct {
38 size_t value = 0;
39 size_t count = 0;
40 } pop;
41};
42
43template<typename node_t>
44struct _LinksFields_t {
45 node_t * prev = nullptr;
46 node_t * next = nullptr;
47 unsigned long long ts = 0;
48};
49
50template<typename node_t>
51class __attribute__((aligned(128))) relaxed_list {
52 static_assert(std::is_same<decltype(node_t::_links), _LinksFields_t<node_t>>::value, "Node must have a links field");
53
54public:
55 relaxed_list(unsigned numLists)
56 : lists(new intrusive_queue_t[numLists])
57 , numLists(numLists)
58 #if defined(SIMPLE_SNZI)
59 , snzi(4)
60 #endif
61 {
62 assertf(7 * 8 * 8 >= numLists, "List currently only supports 448 sublists");
63 // assert(sizeof(*this) == 128);
64 std::cout << "Constructing Relaxed List with " << numLists << std::endl;
65
66 #ifndef NO_STATS
67 if(head) this->next = head;
68 head = this;
69 #endif
70 }
71
72 ~relaxed_list() {
73 std::cout << "Destroying Relaxed List" << std::endl;
74 lists.reset();
75 }
76
77 __attribute__((noinline, hot)) void push(node_t * node) {
78 node->_links.ts = rdtscl();
79
80 while(true) {
81 // Pick a random list
82 unsigned i = tls.rng.next() % numLists;
83
84 #ifndef NO_STATS
85 tls.pick.push.attempt++;
86 #endif
87
88 // If we can't lock it retry
89 if( !lists[i].lock.try_lock() ) continue;
90
91 #if !defined(SIMPLE_SNZI)
92 __attribute__((unused)) int num = numNonEmpty;
93 #endif
94
95 // Actually push it
96 if(lists[i].push(node)) {
97 #if defined(DISCOVER_BITMASK)
98 size_t qword = i >> 6ull;
99 size_t bit = i & 63ull;
100 assert(qword == 0);
101 bts(tls.mask, bit);
102 #elif defined(SIMPLE_SNZI)
103 snzi.arrive(i);
104 #elif !defined(NO_BITMASK)
105 numNonEmpty++;
106 size_t qword = i >> 6ull;
107 size_t bit = i & 63ull;
108 assertf((list_mask[qword] & (1ul << bit)) == 0, "Before set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
109 __attribute__((unused)) bool ret = bts(list_mask[qword], bit);
110 assert(!ret);
111 assertf((list_mask[qword] & (1ul << bit)) != 0, "After set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
112 #else
113 numNonEmpty++;
114 #endif
115 }
116 #if !defined(SIMPLE_SNZI)
117 assert(numNonEmpty <= (int)numLists);
118 #endif
119
120 // Unlock and return
121 lists[i].lock.unlock();
122
123 #ifndef NO_STATS
124 tls.pick.push.success++;
125 #if !defined(SIMPLE_SNZI)
126 tls.empty.push.value += num;
127 tls.empty.push.count += 1;
128 #endif
129 #endif
130 return;
131 }
132 }
133
134 __attribute__((noinline, hot)) node_t * pop() {
135 #if defined(DISCOVER_BITMASK)
136 assert(numLists <= 64);
137 while(true) {
138 tls.pick.pop.mask_attempt++;
139 unsigned i, j;
140 {
141 // Pick two lists at random
142 unsigned num = ((numLists - 1) >> 6) + 1;
143
144 // Pick first list totally randomly
145 i = tls.rng.next() % numLists;
146
147 // Pick the other according to the bitmask
148 unsigned r = tls.rng.next();
149
150 size_t mask = tls.mask.load(std::memory_order_relaxed);
151 if(mask == 0) {
152 tls.pick.pop.mask_reset++;
153 mask = (1U << numLists) - 1;
154 }
155
156 unsigned b = rand_bit(r, mask);
157
158 assertf(b < 64, "%zu %u", mask, b);
159
160 j = b;
161
162 assert(j < numLists);
163 }
164
165 if(auto node = try_pop(i, j)) return node;
166 }
167 #elif defined(SIMPLE_SNZI)
168 while(snzi.query()) {
169 // Pick two lists at random
170 int i = tls.rng.next() % numLists;
171 int j = tls.rng.next() % numLists;
172
173 if(auto node = try_pop(i, j)) return node;
174 }
175 #elif !defined(NO_BITMASK)
176 int nnempty;
177 while(0 != (nnempty = numNonEmpty)) {
178 tls.pick.pop.mask_attempt++;
179 unsigned i, j;
180 {
181 // Pick two lists at random
182 unsigned num = ((numLists - 1) >> 6) + 1;
183
184 unsigned ri = tls.rng.next();
185 unsigned rj = tls.rng.next();
186
187 unsigned wdxi = (ri >> 6u) % num;
188 unsigned wdxj = (rj >> 6u) % num;
189
190 size_t maski = list_mask[wdxi].load(std::memory_order_relaxed);
191 size_t maskj = list_mask[wdxj].load(std::memory_order_relaxed);
192
193 if(maski == 0 && maskj == 0) continue;
194
195 unsigned bi = rand_bit(ri, maski);
196 unsigned bj = rand_bit(rj, maskj);
197
198 assertf(bi < 64, "%zu %u", maski, bi);
199 assertf(bj < 64, "%zu %u", maskj, bj);
200
201 i = bi | (wdxi << 6);
202 j = bj | (wdxj << 6);
203
204 assertf(i < numLists, "%u", wdxi << 6);
205 assertf(j < numLists, "%u", wdxj << 6);
206 }
207
208 if(auto node = try_pop(i, j)) return node;
209 }
210 #else
211 while(numNonEmpty != 0) {
212 // Pick two lists at random
213 int i = tls.rng.next() % numLists;
214 int j = tls.rng.next() % numLists;
215
216 if(auto node = try_pop(i, j)) return node;
217 }
218 #endif
219
220 return nullptr;
221 }
222
223private:
224 node_t * try_pop(unsigned i, unsigned j) {
225 #ifndef NO_STATS
226 tls.pick.pop.attempt++;
227 #endif
228
229 #if defined(DISCOVER_BITMASK)
230 if(lists[i].ts() > 0) bts(tls.mask, i); else btr(tls.mask, i);
231 if(lists[j].ts() > 0) bts(tls.mask, j); else btr(tls.mask, j);
232 #endif
233
234 // Pick the bet list
235 int w = i;
236 if( __builtin_expect(lists[j].ts() != 0, true) ) {
237 w = (lists[i].ts() < lists[j].ts()) ? i : j;
238 }
239
240 auto & list = lists[w];
241 // If list looks empty retry
242 if( list.ts() == 0 ) return nullptr;
243
244 // If we can't get the lock retry
245 if( !list.lock.try_lock() ) return nullptr;
246
247 #if !defined(SIMPLE_SNZI)
248 __attribute__((unused)) int num = numNonEmpty;
249 #endif
250
251 // If list is empty, unlock and retry
252 if( list.ts() == 0 ) {
253 list.lock.unlock();
254 return nullptr;
255 }
256
257 // Actually pop the list
258 node_t * node;
259 bool emptied;
260 std::tie(node, emptied) = list.pop();
261 assert(node);
262
263 if(emptied) {
264 #if defined(DISCOVER_BITMASK)
265 size_t qword = w >> 6ull;
266 size_t bit = w & 63ull;
267 assert(qword == 0);
268 __attribute__((unused)) bool ret = btr(tls.mask, bit);
269 #elif defined(SIMPLE_SNZI)
270 snzi.depart(w);
271 #elif !defined(NO_BITMASK)
272 numNonEmpty--;
273 size_t qword = w >> 6ull;
274 size_t bit = w & 63ull;
275 assert((list_mask[qword] & (1ul << bit)) != 0);
276 __attribute__((unused)) bool ret = btr(list_mask[qword], bit);
277 assert(ret);
278 assert((list_mask[qword] & (1ul << bit)) == 0);
279 #else
280 numNonEmpty--;
281 #endif
282 }
283
284 // Unlock and return
285 list.lock.unlock();
286 #if !defined(SIMPLE_SNZI)
287 assert(numNonEmpty >= 0);
288 #endif
289 #ifndef NO_STATS
290 tls.pick.pop.success++;
291 #if !defined(SIMPLE_SNZI)
292 tls.empty.pop.value += num;
293 tls.empty.pop.count += 1;
294 #endif
295 #endif
296 return node;
297 }
298
299private:
300
301 class __attribute__((aligned(128))) intrusive_queue_t {
302 public:
303 typedef spinlock_t lock_t;
304
305 friend class relaxed_list<node_t>;
306
307 struct stat {
308 ssize_t diff = 0;
309 size_t push = 0;
310 size_t pop = 0;
311 };
312
313 private:
314 struct sentinel_t {
315 _LinksFields_t<node_t> _links;
316 };
317
318 lock_t lock;
319 sentinel_t before;
320 sentinel_t after;
321 #ifndef NO_STATS
322 stat s;
323 #endif
324
325#pragma GCC diagnostic push
326#pragma GCC diagnostic ignored "-Winvalid-offsetof"
327 static constexpr auto fields_offset = offsetof( node_t, _links );
328#pragma GCC diagnostic pop
329 public:
330 intrusive_queue_t()
331 : before{{ nullptr, tail() }}
332 , after {{ head(), nullptr }}
333 {
334 /* paranoid */ assert((reinterpret_cast<uintptr_t>( head() ) + fields_offset) == reinterpret_cast<uintptr_t>(&before));
335 /* paranoid */ assert((reinterpret_cast<uintptr_t>( tail() ) + fields_offset) == reinterpret_cast<uintptr_t>(&after ));
336 /* paranoid */ assert(head()->_links.prev == nullptr);
337 /* paranoid */ assert(head()->_links.next == tail() );
338 /* paranoid */ assert(tail()->_links.next == nullptr);
339 /* paranoid */ assert(tail()->_links.prev == head() );
340 /* paranoid */ assert(sizeof(*this) == 128);
341 /* paranoid */ assert((intptr_t(this) % 128) == 0);
342 }
343
344 ~intrusive_queue_t() = default;
345
346 inline node_t * head() const {
347 node_t * rhead = reinterpret_cast<node_t *>(
348 reinterpret_cast<uintptr_t>( &before ) - fields_offset
349 );
350 assert(rhead);
351 return rhead;
352 }
353
354 inline node_t * tail() const {
355 node_t * rtail = reinterpret_cast<node_t *>(
356 reinterpret_cast<uintptr_t>( &after ) - fields_offset
357 );
358 assert(rtail);
359 return rtail;
360 }
361
362 inline bool push(node_t * node) {
363 assert(lock);
364 assert(node->_links.ts != 0);
365 node_t * tail = this->tail();
366
367 node_t * prev = tail->_links.prev;
368 // assertf(node->_links.ts >= prev->_links.ts,
369 // "New node has smaller timestamp: %llu < %llu", node->_links.ts, prev->_links.ts);
370 node->_links.next = tail;
371 node->_links.prev = prev;
372 prev->_links.next = node;
373 tail->_links.prev = node;
374 #ifndef NO_STATS
375 if(enable_stats) {
376 s.diff++;
377 s.push++;
378 }
379 #endif
380 if(before._links.ts == 0l) {
381 before._links.ts = node->_links.ts;
382 assert(node->_links.prev == this->head());
383 return true;
384 }
385 return false;
386 }
387
388 inline std::pair<node_t *, bool> pop() {
389 assert(lock);
390 node_t * head = this->head();
391 node_t * tail = this->tail();
392
393 node_t * node = head->_links.next;
394 node_t * next = node->_links.next;
395 if(node == tail) return {nullptr, false};
396
397 head->_links.next = next;
398 next->_links.prev = head;
399
400 #ifndef NO_STATS
401 if(enable_stats) {
402 s.diff--;
403 s.pop ++;
404 }
405 #endif
406 if(next == tail) {
407 before._links.ts = 0l;
408 return {node, true};
409 }
410 else {
411 assert(next->_links.ts != 0);
412 before._links.ts = next->_links.ts;
413 assert(before._links.ts != 0);
414 return {node, false};
415 }
416 }
417
418 long long ts() const {
419 return before._links.ts;
420 }
421 };
422
423
424public:
425
426 static __attribute__((aligned(128))) thread_local struct TLS {
427 Random rng = { int(rdtscl()) };
428 pick_stat pick;
429 empty_stat empty;
430 __attribute__((aligned(64))) std::atomic_size_t mask = { 0 };
431 } tls;
432
433private:
434 __attribute__((aligned(64))) std::unique_ptr<intrusive_queue_t []> lists;
435 const unsigned numLists;
436private:
437 #if defined(SIMPLE_SNZI)
438 snzi_t snzi;
439 #else
440 std::atomic_int numNonEmpty = { 0 }; // number of non-empty lists
441 std::atomic_size_t list_mask[7] = { {0}, {0}, {0}, {0}, {0}, {0}, {0} }; // which queues are empty
442 #endif
443
444public:
445 static const constexpr size_t sizeof_queue = sizeof(intrusive_queue_t);
446
447#ifndef NO_STATS
448 static void stats_print(std::ostream & os) {
449 auto it = head;
450 while(it) {
451 it->stats_print_local(os);
452 it = it->next;
453 }
454 }
455
456 static void stats_tls_tally() {
457 global_stats.pick.push.attempt += tls.pick.push.attempt;
458 global_stats.pick.push.success += tls.pick.push.success;
459 global_stats.pick.pop .attempt += tls.pick.pop.attempt;
460 global_stats.pick.pop .success += tls.pick.pop.success;
461 global_stats.pick.pop .mask_attempt += tls.pick.pop.mask_attempt;
462 global_stats.pick.pop .mask_reset += tls.pick.pop.mask_reset;
463
464 global_stats.qstat.push.value += tls.empty.push.value;
465 global_stats.qstat.push.count += tls.empty.push.count;
466 global_stats.qstat.pop .value += tls.empty.pop .value;
467 global_stats.qstat.pop .count += tls.empty.pop .count;
468 }
469
470private:
471 static struct GlobalStats {
472 struct {
473 struct {
474 std::atomic_size_t attempt = { 0 };
475 std::atomic_size_t success = { 0 };
476 } push;
477 struct {
478 std::atomic_size_t attempt = { 0 };
479 std::atomic_size_t success = { 0 };
480 std::atomic_size_t mask_attempt = { 0 };
481 std::atomic_size_t mask_reset = { 0 };
482 } pop;
483 } pick;
484 struct {
485 struct {
486 std::atomic_size_t value = { 0 };
487 std::atomic_size_t count = { 0 };
488 } push;
489 struct {
490 std::atomic_size_t value = { 0 };
491 std::atomic_size_t count = { 0 };
492 } pop;
493 } qstat;
494 } global_stats;
495
496 // Link list of all lists for stats
497 __attribute__((aligned(64))) relaxed_list<node_t> * next = nullptr;
498
499 static relaxed_list<node_t> * head;
500
501 void stats_print_local(std::ostream & os ) {
502 std::cout << "----- Relaxed List Stats -----" << std::endl;
503 {
504 ssize_t diff = 0;
505 size_t num = 0;
506 ssize_t max = 0;
507
508 for(size_t i = 0; i < numLists; i++) {
509 const auto & list = lists[i];
510 diff+= list.s.diff;
511 num ++;
512 max = std::abs(max) > std::abs(list.s.diff) ? max : list.s.diff;
513 os << "Local Q ops : " << (list.s.push + list.s.pop) << "(" << list.s.push << "i, " << list.s.pop << "o)\n";
514 }
515
516 os << "Difference : " << ssize_t(double(diff) / num ) << " avg\t" << max << "max" << std::endl;
517 }
518
519 const auto & global = global_stats;
520
521 double push_sur = (100.0 * double(global.pick.push.success) / global.pick.push.attempt);
522 double pop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .attempt);
523 double mpop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .mask_attempt);
524 double rpop_sur = (100.0 * double(global.pick.pop .mask_reset) / global.pick.pop .mask_attempt);
525
526 os << "Push Pick % : " << push_sur << "(" << global.pick.push.success << " / " << global.pick.push.attempt << ")\n";
527 os << "Pop Pick % : " << pop_sur << "(" << global.pick.pop .success << " / " << global.pick.pop .attempt << ")\n";
528 os << "TryPop Pick % : " << mpop_sur << "(" << global.pick.pop .success << " / " << global.pick.pop .mask_attempt << ")\n";
529 os << "Pop M Reset % : " << rpop_sur << "(" << global.pick.pop .mask_reset << " / " << global.pick.pop .mask_attempt << ")\n";
530
531 double avgQ_push = double(global.qstat.push.value) / global.qstat.push.count;
532 double avgQ_pop = double(global.qstat.pop .value) / global.qstat.pop .count;
533 double avgQ = double(global.qstat.push.value + global.qstat.pop .value) / (global.qstat.push.count + global.qstat.pop .count);
534 os << "Push Avg Qs : " << avgQ_push << " (" << global.qstat.push.count << "ops)\n";
535 os << "Pop Avg Qs : " << avgQ_pop << " (" << global.qstat.pop .count << "ops)\n";
536 os << "Global Avg Qs : " << avgQ << " (" << (global.qstat.push.count + global.qstat.pop .count) << "ops)\n";
537 }
538#endif
539};
Note: See TracBrowser for help on using the repository browser.