source: doc/theses/thierry_delisle_PhD/code/relaxed_list.hpp@ 8f4f3e0

ADT arm-eh ast-experimental enum forall-pointer-decay jacob/cs343-translation new-ast new-ast-unique-expr pthread-emulation qualifiedEnum stuck-waitfor-destruct
Last change on this file since 8f4f3e0 was 8f4f3e0, checked in by Thierry Delisle <tdelisle@…>, 6 years ago

bitmask discovery no use snzi

  • Property mode set to 100644
File size: 14.1 KB
Line 
1#pragma once
2
3#ifndef NO_STATS
4#include <iostream>
5#endif
6
7#include <cmath>
8#include <memory>
9#include <mutex>
10#include <type_traits>
11
12#include "assert.hpp"
13#include "utils.hpp"
14#include "snzi.hpp"
15
16using namespace std;
17
18extern bool enable_stats;
19
20struct pick_stat {
21 struct {
22 size_t attempt = 0;
23 size_t success = 0;
24 } push;
25 struct {
26 size_t attempt = 0;
27 size_t success = 0;
28 size_t mask_attempt = 0;
29 size_t mask_reset = 0;
30 } pop;
31};
32
33struct empty_stat {
34 struct {
35 size_t value = 0;
36 size_t count = 0;
37 } push;
38 struct {
39 size_t value = 0;
40 size_t count = 0;
41 } pop;
42};
43
44template<typename node_t>
45struct _LinksFields_t {
46 node_t * prev = nullptr;
47 node_t * next = nullptr;
48 unsigned long long ts = 0;
49};
50
51template<typename node_t>
52class __attribute__((aligned(128))) relaxed_list {
53 static_assert(std::is_same<decltype(node_t::_links), _LinksFields_t<node_t>>::value, "Node must have a links field");
54
55public:
56 relaxed_list(unsigned numLists)
57 : lists(new intrusive_queue_t[numLists])
58 , numLists(numLists)
59 #if defined(SIMPLE_SNZI) || defined(DISCOVER_BITMASK)
60 , snzi( std::log2( numLists / 8 ) )
61 #endif
62 {
63 assertf(7 * 8 * 8 >= numLists, "List currently only supports 448 sublists");
64 // assert(sizeof(*this) == 128);
65 std::cout << "Constructing Relaxed List with " << numLists << std::endl;
66
67 #ifndef NO_STATS
68 if(head) this->next = head;
69 head = this;
70 #endif
71 }
72
73 ~relaxed_list() {
74 std::cout << "Destroying Relaxed List" << std::endl;
75 lists.reset();
76 }
77
78 __attribute__((noinline, hot)) void push(node_t * node) {
79 node->_links.ts = rdtscl();
80
81 while(true) {
82 // Pick a random list
83 unsigned i = tls.rng.next() % numLists;
84
85 #ifndef NO_STATS
86 tls.pick.push.attempt++;
87 #endif
88
89 // If we can't lock it retry
90 if( !lists[i].lock.try_lock() ) continue;
91
92 #if !defined(SIMPLE_SNZI) && !defined(DISCOVER_BITMASK)
93 __attribute__((unused)) int num = numNonEmpty;
94 #endif
95
96 // Actually push it
97 if(lists[i].push(node)) {
98 #if defined(DISCOVER_BITMASK)
99 size_t qword = i >> 6ull;
100 size_t bit = i & 63ull;
101 assert(qword == 0);
102 bts(tls.mask, bit);
103 snzi.arrive(i);
104 #elif defined(SIMPLE_SNZI)
105 snzi.arrive(i);
106 #elif !defined(NO_BITMASK)
107 numNonEmpty++;
108 size_t qword = i >> 6ull;
109 size_t bit = i & 63ull;
110 assertf((list_mask[qword] & (1ul << bit)) == 0, "Before set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
111 __attribute__((unused)) bool ret = bts(list_mask[qword], bit);
112 assert(!ret);
113 assertf((list_mask[qword] & (1ul << bit)) != 0, "After set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
114 #else
115 numNonEmpty++;
116 #endif
117 }
118 #if !defined(SIMPLE_SNZI) && !defined(DISCOVER_BITMASK)
119 assert(numNonEmpty <= (int)numLists);
120 #endif
121
122 // Unlock and return
123 lists[i].lock.unlock();
124
125 #ifndef NO_STATS
126 tls.pick.push.success++;
127 #if !defined(SIMPLE_SNZI) && !defined(DISCOVER_BITMASK)
128 tls.empty.push.value += num;
129 tls.empty.push.count += 1;
130 #endif
131 #endif
132 return;
133 }
134 }
135
136 __attribute__((noinline, hot)) node_t * pop() {
137 #if defined(DISCOVER_BITMASK)
138 assert(numLists <= 64);
139 while(snzi.query()) {
140 tls.pick.pop.mask_attempt++;
141 unsigned i, j;
142 {
143 // Pick first list totally randomly
144 i = tls.rng.next() % numLists;
145
146 // Pick the other according to the bitmask
147 unsigned r = tls.rng.next();
148
149 size_t mask = tls.mask.load(std::memory_order_relaxed);
150 if(mask == 0) {
151 tls.pick.pop.mask_reset++;
152 mask = (1U << numLists) - 1;
153 }
154
155 unsigned b = rand_bit(r, mask);
156
157 assertf(b < 64, "%zu %u", mask, b);
158
159 j = b;
160
161 assert(j < numLists);
162 }
163
164 if(auto node = try_pop(i, j)) return node;
165 }
166 #elif defined(SIMPLE_SNZI)
167 while(snzi.query()) {
168 // Pick two lists at random
169 int i = tls.rng.next() % numLists;
170 int j = tls.rng.next() % numLists;
171
172 if(auto node = try_pop(i, j)) return node;
173 }
174 #elif !defined(NO_BITMASK)
175 int nnempty;
176 while(0 != (nnempty = numNonEmpty)) {
177 tls.pick.pop.mask_attempt++;
178 unsigned i, j;
179 {
180 // Pick two lists at random
181 unsigned num = ((numLists - 1) >> 6) + 1;
182
183 unsigned ri = tls.rng.next();
184 unsigned rj = tls.rng.next();
185
186 unsigned wdxi = (ri >> 6u) % num;
187 unsigned wdxj = (rj >> 6u) % num;
188
189 size_t maski = list_mask[wdxi].load(std::memory_order_relaxed);
190 size_t maskj = list_mask[wdxj].load(std::memory_order_relaxed);
191
192 if(maski == 0 && maskj == 0) continue;
193
194 unsigned bi = rand_bit(ri, maski);
195 unsigned bj = rand_bit(rj, maskj);
196
197 assertf(bi < 64, "%zu %u", maski, bi);
198 assertf(bj < 64, "%zu %u", maskj, bj);
199
200 i = bi | (wdxi << 6);
201 j = bj | (wdxj << 6);
202
203 assertf(i < numLists, "%u", wdxi << 6);
204 assertf(j < numLists, "%u", wdxj << 6);
205 }
206
207 if(auto node = try_pop(i, j)) return node;
208 }
209 #else
210 while(numNonEmpty != 0) {
211 // Pick two lists at random
212 int i = tls.rng.next() % numLists;
213 int j = tls.rng.next() % numLists;
214
215 if(auto node = try_pop(i, j)) return node;
216 }
217 #endif
218
219 return nullptr;
220 }
221
222private:
223 node_t * try_pop(unsigned i, unsigned j) {
224 #ifndef NO_STATS
225 tls.pick.pop.attempt++;
226 #endif
227
228 #if defined(DISCOVER_BITMASK)
229 if(lists[i].ts() > 0) bts(tls.mask, i); else btr(tls.mask, i);
230 if(lists[j].ts() > 0) bts(tls.mask, j); else btr(tls.mask, j);
231 #endif
232
233 // Pick the bet list
234 int w = i;
235 if( __builtin_expect(lists[j].ts() != 0, true) ) {
236 w = (lists[i].ts() < lists[j].ts()) ? i : j;
237 }
238
239 auto & list = lists[w];
240 // If list looks empty retry
241 if( list.ts() == 0 ) return nullptr;
242
243 // If we can't get the lock retry
244 if( !list.lock.try_lock() ) return nullptr;
245
246 #if !defined(SIMPLE_SNZI) && !defined(DISCOVER_BITMASK)
247 __attribute__((unused)) int num = numNonEmpty;
248 #endif
249
250 // If list is empty, unlock and retry
251 if( list.ts() == 0 ) {
252 list.lock.unlock();
253 return nullptr;
254 }
255
256 // Actually pop the list
257 node_t * node;
258 bool emptied;
259 std::tie(node, emptied) = list.pop();
260 assert(node);
261
262 if(emptied) {
263 #if defined(DISCOVER_BITMASK)
264 size_t qword = w >> 6ull;
265 size_t bit = w & 63ull;
266 assert(qword == 0);
267 __attribute__((unused)) bool ret = btr(tls.mask, bit);
268 snzi.depart(w);
269 #elif defined(SIMPLE_SNZI)
270 snzi.depart(w);
271 #elif !defined(NO_BITMASK)
272 numNonEmpty--;
273 size_t qword = w >> 6ull;
274 size_t bit = w & 63ull;
275 assert((list_mask[qword] & (1ul << bit)) != 0);
276 __attribute__((unused)) bool ret = btr(list_mask[qword], bit);
277 assert(ret);
278 assert((list_mask[qword] & (1ul << bit)) == 0);
279 #else
280 numNonEmpty--;
281 #endif
282 }
283
284 // Unlock and return
285 list.lock.unlock();
286 #if !defined(SIMPLE_SNZI) && !defined(DISCOVER_BITMASK)
287 assert(numNonEmpty >= 0);
288 #endif
289 #ifndef NO_STATS
290 tls.pick.pop.success++;
291 #if !defined(SIMPLE_SNZI) && !defined(DISCOVER_BITMASK)
292 tls.empty.pop.value += num;
293 tls.empty.pop.count += 1;
294 #endif
295 #endif
296 return node;
297 }
298
299private:
300
301 class __attribute__((aligned(128))) intrusive_queue_t {
302 public:
303 typedef spinlock_t lock_t;
304
305 friend class relaxed_list<node_t>;
306
307 struct stat {
308 ssize_t diff = 0;
309 size_t push = 0;
310 size_t pop = 0;
311 };
312
313 private:
314 struct sentinel_t {
315 _LinksFields_t<node_t> _links;
316 };
317
318 lock_t lock;
319 sentinel_t before;
320 sentinel_t after;
321 #ifndef NO_STATS
322 stat s;
323 #endif
324
325#pragma GCC diagnostic push
326#pragma GCC diagnostic ignored "-Winvalid-offsetof"
327 static constexpr auto fields_offset = offsetof( node_t, _links );
328#pragma GCC diagnostic pop
329 public:
330 intrusive_queue_t()
331 : before{{ nullptr, tail() }}
332 , after {{ head(), nullptr }}
333 {
334 /* paranoid */ assert((reinterpret_cast<uintptr_t>( head() ) + fields_offset) == reinterpret_cast<uintptr_t>(&before));
335 /* paranoid */ assert((reinterpret_cast<uintptr_t>( tail() ) + fields_offset) == reinterpret_cast<uintptr_t>(&after ));
336 /* paranoid */ assert(head()->_links.prev == nullptr);
337 /* paranoid */ assert(head()->_links.next == tail() );
338 /* paranoid */ assert(tail()->_links.next == nullptr);
339 /* paranoid */ assert(tail()->_links.prev == head() );
340 /* paranoid */ assert(sizeof(*this) == 128);
341 /* paranoid */ assert((intptr_t(this) % 128) == 0);
342 }
343
344 ~intrusive_queue_t() = default;
345
346 inline node_t * head() const {
347 node_t * rhead = reinterpret_cast<node_t *>(
348 reinterpret_cast<uintptr_t>( &before ) - fields_offset
349 );
350 assert(rhead);
351 return rhead;
352 }
353
354 inline node_t * tail() const {
355 node_t * rtail = reinterpret_cast<node_t *>(
356 reinterpret_cast<uintptr_t>( &after ) - fields_offset
357 );
358 assert(rtail);
359 return rtail;
360 }
361
362 inline bool push(node_t * node) {
363 assert(lock);
364 assert(node->_links.ts != 0);
365 node_t * tail = this->tail();
366
367 node_t * prev = tail->_links.prev;
368 // assertf(node->_links.ts >= prev->_links.ts,
369 // "New node has smaller timestamp: %llu < %llu", node->_links.ts, prev->_links.ts);
370 node->_links.next = tail;
371 node->_links.prev = prev;
372 prev->_links.next = node;
373 tail->_links.prev = node;
374 #ifndef NO_STATS
375 if(enable_stats) {
376 s.diff++;
377 s.push++;
378 }
379 #endif
380 if(before._links.ts == 0l) {
381 before._links.ts = node->_links.ts;
382 assert(node->_links.prev == this->head());
383 return true;
384 }
385 return false;
386 }
387
388 inline std::pair<node_t *, bool> pop() {
389 assert(lock);
390 node_t * head = this->head();
391 node_t * tail = this->tail();
392
393 node_t * node = head->_links.next;
394 node_t * next = node->_links.next;
395 if(node == tail) return {nullptr, false};
396
397 head->_links.next = next;
398 next->_links.prev = head;
399
400 #ifndef NO_STATS
401 if(enable_stats) {
402 s.diff--;
403 s.pop ++;
404 }
405 #endif
406 if(next == tail) {
407 before._links.ts = 0l;
408 return {node, true};
409 }
410 else {
411 assert(next->_links.ts != 0);
412 before._links.ts = next->_links.ts;
413 assert(before._links.ts != 0);
414 return {node, false};
415 }
416 }
417
418 long long ts() const {
419 return before._links.ts;
420 }
421 };
422
423
424public:
425
426 static __attribute__((aligned(128))) thread_local struct TLS {
427 Random rng = { int(rdtscl()) };
428 pick_stat pick;
429 empty_stat empty;
430 __attribute__((aligned(64))) std::atomic_size_t mask = { 0 };
431 } tls;
432
433private:
434 __attribute__((aligned(64))) std::unique_ptr<intrusive_queue_t []> lists;
435 const unsigned numLists;
436private:
437 #if defined(SIMPLE_SNZI) || defined(DISCOVER_BITMASK)
438 snzi_t snzi;
439 #else
440 std::atomic_int numNonEmpty = { 0 }; // number of non-empty lists
441 std::atomic_size_t list_mask[7] = { {0}, {0}, {0}, {0}, {0}, {0}, {0} }; // which queues are empty
442 #endif
443
444public:
445 static const constexpr size_t sizeof_queue = sizeof(intrusive_queue_t);
446
447#ifndef NO_STATS
448 static void stats_print(std::ostream & os) {
449 auto it = head;
450 while(it) {
451 it->stats_print_local(os);
452 it = it->next;
453 }
454 }
455
456 static void stats_tls_tally() {
457 global_stats.pick.push.attempt += tls.pick.push.attempt;
458 global_stats.pick.push.success += tls.pick.push.success;
459 global_stats.pick.pop .attempt += tls.pick.pop.attempt;
460 global_stats.pick.pop .success += tls.pick.pop.success;
461 global_stats.pick.pop .mask_attempt += tls.pick.pop.mask_attempt;
462 global_stats.pick.pop .mask_reset += tls.pick.pop.mask_reset;
463
464 global_stats.qstat.push.value += tls.empty.push.value;
465 global_stats.qstat.push.count += tls.empty.push.count;
466 global_stats.qstat.pop .value += tls.empty.pop .value;
467 global_stats.qstat.pop .count += tls.empty.pop .count;
468 }
469
470private:
471 static struct GlobalStats {
472 struct {
473 struct {
474 std::atomic_size_t attempt = { 0 };
475 std::atomic_size_t success = { 0 };
476 } push;
477 struct {
478 std::atomic_size_t attempt = { 0 };
479 std::atomic_size_t success = { 0 };
480 std::atomic_size_t mask_attempt = { 0 };
481 std::atomic_size_t mask_reset = { 0 };
482 } pop;
483 } pick;
484 struct {
485 struct {
486 std::atomic_size_t value = { 0 };
487 std::atomic_size_t count = { 0 };
488 } push;
489 struct {
490 std::atomic_size_t value = { 0 };
491 std::atomic_size_t count = { 0 };
492 } pop;
493 } qstat;
494 } global_stats;
495
496 // Link list of all lists for stats
497 __attribute__((aligned(64))) relaxed_list<node_t> * next = nullptr;
498
499 static relaxed_list<node_t> * head;
500
501 void stats_print_local(std::ostream & os ) {
502 std::cout << "----- Relaxed List Stats -----" << std::endl;
503 {
504 ssize_t diff = 0;
505 size_t num = 0;
506 ssize_t max = 0;
507
508 for(size_t i = 0; i < numLists; i++) {
509 const auto & list = lists[i];
510 diff+= list.s.diff;
511 num ++;
512 max = std::abs(max) > std::abs(list.s.diff) ? max : list.s.diff;
513 os << "Local Q ops : " << (list.s.push + list.s.pop) << "(" << list.s.push << "i, " << list.s.pop << "o)\n";
514 }
515
516 os << "Difference : " << ssize_t(double(diff) / num ) << " avg\t" << max << "max" << std::endl;
517 }
518
519 const auto & global = global_stats;
520
521 double push_sur = (100.0 * double(global.pick.push.success) / global.pick.push.attempt);
522 double pop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .attempt);
523 double mpop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .mask_attempt);
524 double rpop_sur = (100.0 * double(global.pick.pop .mask_reset) / global.pick.pop .mask_attempt);
525
526 os << "Push Pick % : " << push_sur << "(" << global.pick.push.success << " / " << global.pick.push.attempt << ")\n";
527 os << "Pop Pick % : " << pop_sur << "(" << global.pick.pop .success << " / " << global.pick.pop .attempt << ")\n";
528 os << "TryPop Pick % : " << mpop_sur << "(" << global.pick.pop .success << " / " << global.pick.pop .mask_attempt << ")\n";
529 os << "Pop M Reset % : " << rpop_sur << "(" << global.pick.pop .mask_reset << " / " << global.pick.pop .mask_attempt << ")\n";
530
531 double avgQ_push = double(global.qstat.push.value) / global.qstat.push.count;
532 double avgQ_pop = double(global.qstat.pop .value) / global.qstat.pop .count;
533 double avgQ = double(global.qstat.push.value + global.qstat.pop .value) / (global.qstat.push.count + global.qstat.pop .count);
534 os << "Push Avg Qs : " << avgQ_push << " (" << global.qstat.push.count << "ops)\n";
535 os << "Pop Avg Qs : " << avgQ_pop << " (" << global.qstat.pop .count << "ops)\n";
536 os << "Global Avg Qs : " << avgQ << " (" << (global.qstat.push.count + global.qstat.pop .count) << "ops)\n";
537 }
538#endif
539};
Note: See TracBrowser for help on using the repository browser.