source: doc/theses/thierry_delisle_PhD/code/relaxed_list.hpp@ 2e5fd8b6

ADT arm-eh ast-experimental enum forall-pointer-decay jacob/cs343-translation new-ast new-ast-unique-expr pthread-emulation qualifiedEnum stuck-waitfor-destruct
Last change on this file since 2e5fd8b6 was 2e5fd8b6, checked in by Thierry Delisle <tdelisle@…>, 6 years ago

Changed seed to be more different per threads and added more snzi nodes

  • Property mode set to 100644
File size: 14.9 KB
Line 
1#pragma once
2#define LIST_VARIANT relaxed_list
3
4#define VANILLA 0
5#define SNZI 1
6#define BITMASK 2
7#define DISCOVER 3
8#define SNZM 4
9#define BIAS 5
10#define BACK 6
11#define BACKBIAS 7
12
13#ifndef VARIANT
14#define VARIANT VANILLA
15#endif
16
17#ifndef NO_STATS
18#include <iostream>
19#endif
20
21#include <cmath>
22#include <functional>
23#include <memory>
24#include <mutex>
25#include <thread>
26#include <type_traits>
27
28#include "assert.hpp"
29#include "utils.hpp"
30#include "links.hpp"
31#include "snzi.hpp"
32#include "snzm.hpp"
33
34using namespace std;
35
36struct pick_stat {
37 struct {
38 size_t attempt = 0;
39 size_t success = 0;
40 size_t local = 0;
41 } push;
42 struct {
43 size_t attempt = 0;
44 size_t success = 0;
45 size_t mask_attempt = 0;
46 size_t mask_reset = 0;
47 size_t local = 0;
48 } pop;
49};
50
51struct empty_stat {
52 struct {
53 size_t value = 0;
54 size_t count = 0;
55 } push;
56 struct {
57 size_t value = 0;
58 size_t count = 0;
59 } pop;
60};
61
62template<typename node_t>
63class __attribute__((aligned(128))) relaxed_list {
64 static_assert(std::is_same<decltype(node_t::_links), _LinksFields_t<node_t>>::value, "Node must have a links field");
65
66public:
67 static const char * name() {
68 const char * names[] = {
69 "RELAXED: VANILLA",
70 "RELAXED: SNZI",
71 "RELAXED: BITMASK",
72 "RELAXED: SNZI + DISCOVERED MASK",
73 "RELAXED: SNZI + MASK",
74 "RELAXED: SNZI + LOCAL BIAS",
75 "RELAXED: SNZI + REVERSE RNG",
76 "RELAXED: SNZI + LOCAL BIAS + REVERSE RNG"
77 };
78 return names[VARIANT];
79 }
80
81 relaxed_list(unsigned numThreads, unsigned numQueues)
82 : numLists(numThreads * numQueues)
83 , lists(new intrusive_queue_t<node_t>[numLists])
84 #if VARIANT == SNZI || VARIANT == BIAS || VARIANT == BACK || VARIANT == BACKBIAS
85 , snzi( std::log2( numLists / (numQueues) ), 2 )
86 #elif VARIANT == SNZM || VARIANT == DISCOVER
87 , snzm( numLists )
88 #endif
89 {
90 assertf(7 * 8 * 8 >= numLists, "List currently only supports 448 sublists");
91 std::cout << "Constructing Relaxed List with " << numLists << std::endl;
92 }
93
94 ~relaxed_list() {
95 std::cout << "Destroying Relaxed List" << std::endl;
96 lists.reset();
97 }
98
99 __attribute__((noinline, hot)) void push(node_t * node) {
100 node->_links.ts = rdtscl();
101
102 while(true) {
103 // Pick a random list
104 unsigned i = idx_from_r(tls.rng1.next(), VARIANT == BIAS || VARIANT == BACKBIAS);
105
106 #ifndef NO_STATS
107 tls.pick.push.attempt++;
108 #endif
109
110 // If we can't lock it retry
111 if( !lists[i].lock.try_lock() ) continue;
112
113 #if VARIANT == VANILLA || VARIANT == BITMASK
114 __attribute__((unused)) int num = numNonEmpty;
115 #endif
116
117 // Actually push it
118 if(lists[i].push(node)) {
119 #if VARIANT == DISCOVER
120 size_t qword = i >> 6ull;
121 size_t bit = i & 63ull;
122 assert(qword == 0);
123 bts(tls.mask, bit);
124 snzm.arrive(i);
125 #elif VARIANT == SNZI || VARIANT == BIAS
126 snzi.arrive(i);
127 #elif VARIANT == BACK || VARIANT == BACKBIAS
128 snzi.arrive(i);
129 tls.rng2.set_raw_state( tls.rng1.get_raw_state());
130 #elif VARIANT == SNZM
131 snzm.arrive(i);
132 #elif VARIANT == BITMASK
133 numNonEmpty++;
134 size_t qword = i >> 6ull;
135 size_t bit = i & 63ull;
136 assertf((list_mask[qword] & (1ul << bit)) == 0, "Before set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
137 __attribute__((unused)) bool ret = bts(list_mask[qword], bit);
138 assert(!ret);
139 assertf((list_mask[qword] & (1ul << bit)) != 0, "After set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
140 #else
141 numNonEmpty++;
142 #endif
143 }
144 #if VARIANT == VANILLA || VARIANT == BITMASK
145 assert(numNonEmpty <= (int)numLists);
146 #endif
147
148 // Unlock and return
149 lists[i].lock.unlock();
150
151 #ifndef NO_STATS
152 tls.pick.push.success++;
153 #if VARIANT == VANILLA || VARIANT == BITMASK
154 tls.empty.push.value += num;
155 tls.empty.push.count += 1;
156 #endif
157 #endif
158 return;
159 }
160 }
161
162 __attribute__((noinline, hot)) node_t * pop() {
163 #if VARIANT == DISCOVER
164 assert(numLists <= 64);
165 while(snzm.query()) {
166 tls.pick.pop.mask_attempt++;
167 unsigned i, j;
168 {
169 // Pick first list totally randomly
170 i = tls.rng1.next() % numLists;
171
172 // Pick the other according to the bitmask
173 unsigned r = tls.rng1.next();
174
175 size_t mask = tls.mask.load(std::memory_order_relaxed);
176 if(mask == 0) {
177 tls.pick.pop.mask_reset++;
178 mask = (1U << numLists) - 1;
179 tls.mask.store(mask, std::memory_order_relaxed);
180 }
181
182 unsigned b = rand_bit(r, mask);
183
184 assertf(b < 64, "%zu %u", mask, b);
185
186 j = b;
187
188 assert(j < numLists);
189 }
190
191 if(auto node = try_pop(i, j)) return node;
192 }
193 #elif VARIANT == SNZI
194 while(snzi.query()) {
195 // Pick two lists at random
196 int i = tls.rng1.next() % numLists;
197 int j = tls.rng1.next() % numLists;
198
199 if(auto node = try_pop(i, j)) return node;
200 }
201
202 #elif VARIANT == BACK
203 while(snzi.query()) {
204 // Pick two lists at random
205 int i = tls.rng2.prev() % numLists;
206 int j = tls.rng2.prev() % numLists;
207
208 if(auto node = try_pop(i, j)) return node;
209 }
210
211 #elif VARIANT == BACKBIAS
212 while(snzi.query()) {
213 // Pick two lists at random
214 int i = idx_from_r(tls.rng2.prev(), true);
215 int j = idx_from_r(tls.rng2.prev(), true);
216
217 if(auto node = try_pop(i, j)) return node;
218 }
219
220 #elif VARIANT == BIAS
221 while(snzi.query()) {
222 // Pick two lists at random
223 unsigned ri = tls.rng1.next();
224 unsigned i;
225 unsigned j = tls.rng1.next();
226 if(0 == (ri & 0xF)) {
227 i = (ri >> 4) % numLists;
228 } else {
229 i = tls.my_queue + ((ri >> 4) % 4);
230 j = tls.my_queue + ((j >> 4) % 4);
231 tls.pick.pop.local++;
232 }
233 i %= numLists;
234 j %= numLists;
235
236 if(auto node = try_pop(i, j)) return node;
237 }
238 #elif VARIANT == SNZM
239 //*
240 while(snzm.query()) {
241 tls.pick.pop.mask_attempt++;
242 unsigned i, j;
243 {
244 // Pick two random number
245 unsigned ri = tls.rng1.next();
246 unsigned rj = tls.rng1.next();
247
248 // Pick two nodes from it
249 unsigned wdxi = ri & snzm.mask;
250 // unsigned wdxj = rj & snzm.mask;
251
252 // Get the masks from the nodes
253 // size_t maski = snzm.masks(wdxi);
254 size_t maskj = snzm.masks(wdxj);
255
256 if(maski == 0 && maskj == 0) continue;
257
258 #if defined(__BMI2__)
259 uint64_t idxsi = _pext_u64(snzm.indexes, maski);
260 // uint64_t idxsj = _pext_u64(snzm.indexes, maskj);
261
262 auto pi = __builtin_popcountll(maski);
263 // auto pj = __builtin_popcountll(maskj);
264
265 ri = pi ? ri & ((pi >> 3) - 1) : 0;
266 rj = pj ? rj & ((pj >> 3) - 1) : 0;
267
268 unsigned bi = (idxsi >> (ri << 3)) & 0xff;
269 unsigned bj = (idxsj >> (rj << 3)) & 0xff;
270 #else
271 unsigned bi = rand_bit(ri >> snzm.depth, maski);
272 unsigned bj = rand_bit(rj >> snzm.depth, maskj);
273 #endif
274
275 i = (bi << snzm.depth) | wdxi;
276 j = (bj << snzm.depth) | wdxj;
277
278 /* paranoid */ assertf(i < numLists, "%u %u", bj, wdxi);
279 /* paranoid */ assertf(j < numLists, "%u %u", bj, wdxj);
280 }
281
282 if(auto node = try_pop(i, j)) return node;
283 }
284 /*/
285 while(snzm.query()) {
286 // Pick two lists at random
287 int i = tls.rng1.next() % numLists;
288 int j = tls.rng1.next() % numLists;
289
290 if(auto node = try_pop(i, j)) return node;
291 }
292 //*/
293 #elif VARIANT == BITMASK
294 int nnempty;
295 while(0 != (nnempty = numNonEmpty)) {
296 tls.pick.pop.mask_attempt++;
297 unsigned i, j;
298 {
299 // Pick two lists at random
300 unsigned num = ((numLists - 1) >> 6) + 1;
301
302 unsigned ri = tls.rng1.next();
303 unsigned rj = tls.rng1.next();
304
305 unsigned wdxi = (ri >> 6u) % num;
306 unsigned wdxj = (rj >> 6u) % num;
307
308 size_t maski = list_mask[wdxi].load(std::memory_order_relaxed);
309 size_t maskj = list_mask[wdxj].load(std::memory_order_relaxed);
310
311 if(maski == 0 && maskj == 0) continue;
312
313 unsigned bi = rand_bit(ri, maski);
314 unsigned bj = rand_bit(rj, maskj);
315
316 assertf(bi < 64, "%zu %u", maski, bi);
317 assertf(bj < 64, "%zu %u", maskj, bj);
318
319 i = bi | (wdxi << 6);
320 j = bj | (wdxj << 6);
321
322 assertf(i < numLists, "%u", wdxi << 6);
323 assertf(j < numLists, "%u", wdxj << 6);
324 }
325
326 if(auto node = try_pop(i, j)) return node;
327 }
328 #else
329 while(numNonEmpty != 0) {
330 // Pick two lists at random
331 int i = tls.rng1.next() % numLists;
332 int j = tls.rng1.next() % numLists;
333
334 if(auto node = try_pop(i, j)) return node;
335 }
336 #endif
337
338 return nullptr;
339 }
340
341private:
342 node_t * try_pop(unsigned i, unsigned j) {
343 #ifndef NO_STATS
344 tls.pick.pop.attempt++;
345 #endif
346
347 #if VARIANT == DISCOVER
348 if(lists[i].ts() > 0) bts(tls.mask, i); else btr(tls.mask, i);
349 if(lists[j].ts() > 0) bts(tls.mask, j); else btr(tls.mask, j);
350 #endif
351
352 // Pick the bet list
353 int w = i;
354 if( __builtin_expect(lists[j].ts() != 0, true) ) {
355 w = (lists[i].ts() < lists[j].ts()) ? i : j;
356 }
357
358 auto & list = lists[w];
359 // If list looks empty retry
360 if( list.ts() == 0 ) return nullptr;
361
362 // If we can't get the lock retry
363 if( !list.lock.try_lock() ) return nullptr;
364
365 #if VARIANT == VANILLA || VARIANT == BITMASK
366 __attribute__((unused)) int num = numNonEmpty;
367 #endif
368
369 // If list is empty, unlock and retry
370 if( list.ts() == 0 ) {
371 list.lock.unlock();
372 return nullptr;
373 }
374
375 // Actually pop the list
376 node_t * node;
377 bool emptied;
378 std::tie(node, emptied) = list.pop();
379 assert(node);
380
381 if(emptied) {
382 #if VARIANT == DISCOVER
383 size_t qword = w >> 6ull;
384 size_t bit = w & 63ull;
385 assert(qword == 0);
386 __attribute__((unused)) bool ret = btr(tls.mask, bit);
387 snzm.depart(w);
388 #elif VARIANT == SNZI || VARIANT == BIAS || VARIANT == BACK || VARIANT == BACKBIAS
389 snzi.depart(w);
390 #elif VARIANT == SNZM
391 snzm.depart(w);
392 #elif VARIANT == BITMASK
393 numNonEmpty--;
394 size_t qword = w >> 6ull;
395 size_t bit = w & 63ull;
396 assert((list_mask[qword] & (1ul << bit)) != 0);
397 __attribute__((unused)) bool ret = btr(list_mask[qword], bit);
398 assert(ret);
399 assert((list_mask[qword] & (1ul << bit)) == 0);
400 #else
401 numNonEmpty--;
402 #endif
403 }
404
405 // Unlock and return
406 list.lock.unlock();
407 #if VARIANT == VANILLA || VARIANT == BITMASK
408 assert(numNonEmpty >= 0);
409 #endif
410 #ifndef NO_STATS
411 tls.pick.pop.success++;
412 #if VARIANT == VANILLA || VARIANT == BITMASK
413 tls.empty.pop.value += num;
414 tls.empty.pop.count += 1;
415 #endif
416 #endif
417 return node;
418 }
419
420 inline unsigned idx_from_r(unsigned r, bool bias) {
421 unsigned i;
422 if(bias) {
423 if(0 == (r & 0xF)) {
424 i = r >> 4;
425 } else {
426 i = tls.my_queue + ((r >> 4) % 4);
427 tls.pick.push.local++;
428 }
429 } else {
430 i = r;
431 }
432 return i % numLists;
433 }
434
435public:
436
437 static __attribute__((aligned(128))) thread_local struct TLS {
438 Random rng1 = { unsigned(std::hash<std::thread::id>{}(std::this_thread::get_id()) ^ rdtscl()) };
439 Random rng2 = { unsigned(std::hash<std::thread::id>{}(std::this_thread::get_id()) ^ rdtscl()) };
440 unsigned my_queue = (ticket++) * 4;
441 pick_stat pick;
442 empty_stat empty;
443 __attribute__((aligned(64))) std::atomic_size_t mask = { 0 };
444 } tls;
445
446private:
447 const unsigned numLists;
448 __attribute__((aligned(64))) std::unique_ptr<intrusive_queue_t<node_t> []> lists;
449private:
450 #if VARIANT == SNZI || VARIANT == BIAS || VARIANT == BACK || VARIANT == BACKBIAS
451 snzi_t snzi;
452 #elif VARIANT == SNZM || VARIANT == DISCOVER
453 snzm_t snzm;
454 #else
455 std::atomic_int numNonEmpty = { 0 }; // number of non-empty lists
456 #endif
457 #if VARIANT == BITMASK
458 std::atomic_size_t list_mask[7] = { {0}, {0}, {0}, {0}, {0}, {0}, {0} }; // which queues are empty
459 #endif
460
461public:
462 static const constexpr size_t sizeof_queue = sizeof(intrusive_queue_t<node_t>);
463 static std::atomic_uint32_t ticket;
464
465#ifndef NO_STATS
466 static void stats_tls_tally() {
467 global_stats.pick.push.attempt += tls.pick.push.attempt;
468 global_stats.pick.push.success += tls.pick.push.success;
469 global_stats.pick.push.local += tls.pick.push.local;
470 global_stats.pick.pop .attempt += tls.pick.pop.attempt;
471 global_stats.pick.pop .success += tls.pick.pop.success;
472 global_stats.pick.pop .mask_attempt += tls.pick.pop.mask_attempt;
473 global_stats.pick.pop .mask_reset += tls.pick.pop.mask_reset;
474 global_stats.pick.pop .local += tls.pick.pop.local;
475
476 global_stats.qstat.push.value += tls.empty.push.value;
477 global_stats.qstat.push.count += tls.empty.push.count;
478 global_stats.qstat.pop .value += tls.empty.pop .value;
479 global_stats.qstat.pop .count += tls.empty.pop .count;
480 }
481
482private:
483 static struct GlobalStats {
484 struct {
485 struct {
486 std::atomic_size_t attempt = { 0 };
487 std::atomic_size_t success = { 0 };
488 std::atomic_size_t local = { 0 };
489 } push;
490 struct {
491 std::atomic_size_t attempt = { 0 };
492 std::atomic_size_t success = { 0 };
493 std::atomic_size_t mask_attempt = { 0 };
494 std::atomic_size_t mask_reset = { 0 };
495 std::atomic_size_t local = { 0 };
496 } pop;
497 } pick;
498 struct {
499 struct {
500 std::atomic_size_t value = { 0 };
501 std::atomic_size_t count = { 0 };
502 } push;
503 struct {
504 std::atomic_size_t value = { 0 };
505 std::atomic_size_t count = { 0 };
506 } pop;
507 } qstat;
508 } global_stats;
509
510public:
511 static void stats_print(std::ostream & os ) {
512 std::cout << "----- Relaxed List Stats -----" << std::endl;
513
514 const auto & global = global_stats;
515
516 double push_sur = (100.0 * double(global.pick.push.success) / global.pick.push.attempt);
517 double pop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .attempt);
518 double mpop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .mask_attempt);
519 double rpop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .mask_reset);
520
521 double push_len = double(global.pick.push.attempt ) / global.pick.push.success;
522 double pop_len = double(global.pick.pop .attempt ) / global.pick.pop .success;
523 double mpop_len = double(global.pick.pop .mask_attempt) / global.pick.pop .success;
524 double rpop_len = double(global.pick.pop .mask_reset ) / global.pick.pop .success;
525
526 os << "Push Pick : " << push_sur << " %, len " << push_len << " (" << global.pick.push.attempt << " / " << global.pick.push.success << ")\n";
527 os << "Pop Pick : " << pop_sur << " %, len " << pop_len << " (" << global.pick.pop .attempt << " / " << global.pick.pop .success << ")\n";
528 os << "TryPop Pick : " << mpop_sur << " %, len " << mpop_len << " (" << global.pick.pop .mask_attempt << " / " << global.pick.pop .success << ")\n";
529 os << "Pop M Reset : " << rpop_sur << " %, len " << rpop_len << " (" << global.pick.pop .mask_reset << " / " << global.pick.pop .success << ")\n";
530
531 double avgQ_push = double(global.qstat.push.value) / global.qstat.push.count;
532 double avgQ_pop = double(global.qstat.pop .value) / global.qstat.pop .count;
533 double avgQ = double(global.qstat.push.value + global.qstat.pop .value) / (global.qstat.push.count + global.qstat.pop .count);
534 os << "Push Avg Qs : " << avgQ_push << " (" << global.qstat.push.count << "ops)\n";
535 os << "Pop Avg Qs : " << avgQ_pop << " (" << global.qstat.pop .count << "ops)\n";
536 os << "Global Avg Qs : " << avgQ << " (" << (global.qstat.push.count + global.qstat.pop .count) << "ops)\n";
537
538 os << "Local Push : " << global.pick.push.local << "\n";
539 os << "Local Pop : " << global.pick.pop .local << "\n";
540 }
541#endif
542};
Note: See TracBrowser for help on using the repository browser.