source: doc/theses/thierry_delisle_PhD/code/relaxed_list.hpp@ a82a8f4

ADT arm-eh ast-experimental enum forall-pointer-decay jacob/cs343-translation new-ast new-ast-unique-expr pthread-emulation qualifiedEnum
Last change on this file since a82a8f4 was a82a8f4, checked in by Thierry Delisle <tdelisle@…>, 5 years ago

Added two new variants to the ready queue which are based on the idea of running the RNG backwards in certain cases

  • Property mode set to 100644
File size: 14.8 KB
Line 
1#pragma once
2#define LIST_VARIANT relaxed_list
3
4#define VANILLA 0
5#define SNZI 1
6#define BITMASK 2
7#define DISCOVER 3
8#define SNZM 4
9#define BIAS 5
10#define BACK 6
11#define BACKBIAS 7
12
13#ifndef VARIANT
14#define VARIANT VANILLA
15#endif
16
17#ifndef NO_STATS
18#include <iostream>
19#endif
20
21#include <cmath>
22#include <memory>
23#include <mutex>
24#include <type_traits>
25
26#include "assert.hpp"
27#include "utils.hpp"
28#include "links.hpp"
29#include "snzi.hpp"
30#include "snzm.hpp"
31
32using namespace std;
33
34struct pick_stat {
35 struct {
36 size_t attempt = 0;
37 size_t success = 0;
38 size_t local = 0;
39 } push;
40 struct {
41 size_t attempt = 0;
42 size_t success = 0;
43 size_t mask_attempt = 0;
44 size_t mask_reset = 0;
45 size_t local = 0;
46 } pop;
47};
48
49struct empty_stat {
50 struct {
51 size_t value = 0;
52 size_t count = 0;
53 } push;
54 struct {
55 size_t value = 0;
56 size_t count = 0;
57 } pop;
58};
59
60template<typename node_t>
61class __attribute__((aligned(128))) relaxed_list {
62 static_assert(std::is_same<decltype(node_t::_links), _LinksFields_t<node_t>>::value, "Node must have a links field");
63
64public:
65 static const char * name() {
66 const char * names[] = {
67 "RELAXED: VANILLA",
68 "RELAXED: SNZI",
69 "RELAXED: BITMASK",
70 "RELAXED: SNZI + DISCOVERED MASK",
71 "RELAXED: SNZI + MASK",
72 "RELAXED: SNZI + LOCAL BIAS",
73 "RELAXED: SNZI + REVERSE RNG",
74 "RELAXED: SNZI + LOCAL BIAS + REVERSE RNG"
75 };
76 return names[VARIANT];
77 }
78
79 relaxed_list(unsigned numThreads, unsigned numQueues)
80 : numLists(numThreads * numQueues)
81 , lists(new intrusive_queue_t<node_t>[numLists])
82 #if VARIANT == SNZI || VARIANT == BIAS || VARIANT == BACK || VARIANT == BACKBIAS
83 , snzi( std::log2( numLists / (2 * numQueues) ), 2 )
84 #elif VARIANT == SNZM || VARIANT == DISCOVER
85 , snzm( numLists )
86 #endif
87 {
88 assertf(7 * 8 * 8 >= numLists, "List currently only supports 448 sublists");
89 std::cout << "Constructing Relaxed List with " << numLists << std::endl;
90 }
91
92 ~relaxed_list() {
93 std::cout << "Destroying Relaxed List" << std::endl;
94 lists.reset();
95 }
96
97 __attribute__((noinline, hot)) void push(node_t * node) {
98 node->_links.ts = rdtscl();
99
100 while(true) {
101 // Pick a random list
102 unsigned i = idx_from_r(tls.rng1.next(), VARIANT == BIAS || VARIANT == BACKBIAS);
103
104 #ifndef NO_STATS
105 tls.pick.push.attempt++;
106 #endif
107
108 // If we can't lock it retry
109 if( !lists[i].lock.try_lock() ) continue;
110
111 #if VARIANT == VANILLA || VARIANT == BITMASK
112 __attribute__((unused)) int num = numNonEmpty;
113 #endif
114
115 // Actually push it
116 if(lists[i].push(node)) {
117 #if VARIANT == DISCOVER
118 size_t qword = i >> 6ull;
119 size_t bit = i & 63ull;
120 assert(qword == 0);
121 bts(tls.mask, bit);
122 snzm.arrive(i);
123 #elif VARIANT == SNZI || VARIANT == BIAS
124 snzi.arrive(i);
125 #elif VARIANT == BACK || VARIANT == BACKBIAS
126 snzi.arrive(i);
127 tls.rng2.set_raw_state( tls.rng1.get_raw_state());
128 #elif VARIANT == SNZM
129 snzm.arrive(i);
130 #elif VARIANT == BITMASK
131 numNonEmpty++;
132 size_t qword = i >> 6ull;
133 size_t bit = i & 63ull;
134 assertf((list_mask[qword] & (1ul << bit)) == 0, "Before set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
135 __attribute__((unused)) bool ret = bts(list_mask[qword], bit);
136 assert(!ret);
137 assertf((list_mask[qword] & (1ul << bit)) != 0, "After set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
138 #else
139 numNonEmpty++;
140 #endif
141 }
142 #if VARIANT == VANILLA || VARIANT == BITMASK
143 assert(numNonEmpty <= (int)numLists);
144 #endif
145
146 // Unlock and return
147 lists[i].lock.unlock();
148
149 #ifndef NO_STATS
150 tls.pick.push.success++;
151 #if VARIANT == VANILLA || VARIANT == BITMASK
152 tls.empty.push.value += num;
153 tls.empty.push.count += 1;
154 #endif
155 #endif
156 return;
157 }
158 }
159
160 __attribute__((noinline, hot)) node_t * pop() {
161 #if VARIANT == DISCOVER
162 assert(numLists <= 64);
163 while(snzm.query()) {
164 tls.pick.pop.mask_attempt++;
165 unsigned i, j;
166 {
167 // Pick first list totally randomly
168 i = tls.rng1.next() % numLists;
169
170 // Pick the other according to the bitmask
171 unsigned r = tls.rng1.next();
172
173 size_t mask = tls.mask.load(std::memory_order_relaxed);
174 if(mask == 0) {
175 tls.pick.pop.mask_reset++;
176 mask = (1U << numLists) - 1;
177 tls.mask.store(mask, std::memory_order_relaxed);
178 }
179
180 unsigned b = rand_bit(r, mask);
181
182 assertf(b < 64, "%zu %u", mask, b);
183
184 j = b;
185
186 assert(j < numLists);
187 }
188
189 if(auto node = try_pop(i, j)) return node;
190 }
191 #elif VARIANT == SNZI
192 while(snzi.query()) {
193 // Pick two lists at random
194 int i = tls.rng1.next() % numLists;
195 int j = tls.rng1.next() % numLists;
196
197 if(auto node = try_pop(i, j)) return node;
198 }
199
200 #elif VARIANT == BACK
201 while(snzi.query()) {
202 // Pick two lists at random
203 int i = tls.rng2.prev() % numLists;
204 int j = tls.rng2.prev() % numLists;
205
206 if(auto node = try_pop(i, j)) return node;
207 }
208
209 #elif VARIANT == BACKBIAS
210 while(snzi.query()) {
211 // Pick two lists at random
212 int i = idx_from_r(tls.rng2.prev(), true);
213 int j = idx_from_r(tls.rng2.prev(), true);
214
215 if(auto node = try_pop(i, j)) return node;
216 }
217
218 #elif VARIANT == BIAS
219 while(snzi.query()) {
220 // Pick two lists at random
221 unsigned ri = tls.rng1.next();
222 unsigned i;
223 unsigned j = tls.rng1.next();
224 if(0 == (ri & 0xF)) {
225 i = (ri >> 4) % numLists;
226 } else {
227 i = tls.my_queue + ((ri >> 4) % 4);
228 j = tls.my_queue + ((j >> 4) % 4);
229 tls.pick.pop.local++;
230 }
231 i %= numLists;
232 j %= numLists;
233
234 if(auto node = try_pop(i, j)) return node;
235 }
236 #elif VARIANT == SNZM
237 //*
238 while(snzm.query()) {
239 tls.pick.pop.mask_attempt++;
240 unsigned i, j;
241 {
242 // Pick two random number
243 unsigned ri = tls.rng1.next();
244 unsigned rj = tls.rng1.next();
245
246 // Pick two nodes from it
247 unsigned wdxi = ri & snzm.mask;
248 // unsigned wdxj = rj & snzm.mask;
249
250 // Get the masks from the nodes
251 // size_t maski = snzm.masks(wdxi);
252 size_t maskj = snzm.masks(wdxj);
253
254 if(maski == 0 && maskj == 0) continue;
255
256 #if defined(__BMI2__)
257 uint64_t idxsi = _pext_u64(snzm.indexes, maski);
258 // uint64_t idxsj = _pext_u64(snzm.indexes, maskj);
259
260 auto pi = __builtin_popcountll(maski);
261 // auto pj = __builtin_popcountll(maskj);
262
263 ri = pi ? ri & ((pi >> 3) - 1) : 0;
264 rj = pj ? rj & ((pj >> 3) - 1) : 0;
265
266 unsigned bi = (idxsi >> (ri << 3)) & 0xff;
267 unsigned bj = (idxsj >> (rj << 3)) & 0xff;
268 #else
269 unsigned bi = rand_bit(ri >> snzm.depth, maski);
270 unsigned bj = rand_bit(rj >> snzm.depth, maskj);
271 #endif
272
273 i = (bi << snzm.depth) | wdxi;
274 j = (bj << snzm.depth) | wdxj;
275
276 /* paranoid */ assertf(i < numLists, "%u %u", bj, wdxi);
277 /* paranoid */ assertf(j < numLists, "%u %u", bj, wdxj);
278 }
279
280 if(auto node = try_pop(i, j)) return node;
281 }
282 /*/
283 while(snzm.query()) {
284 // Pick two lists at random
285 int i = tls.rng1.next() % numLists;
286 int j = tls.rng1.next() % numLists;
287
288 if(auto node = try_pop(i, j)) return node;
289 }
290 //*/
291 #elif VARIANT == BITMASK
292 int nnempty;
293 while(0 != (nnempty = numNonEmpty)) {
294 tls.pick.pop.mask_attempt++;
295 unsigned i, j;
296 {
297 // Pick two lists at random
298 unsigned num = ((numLists - 1) >> 6) + 1;
299
300 unsigned ri = tls.rng1.next();
301 unsigned rj = tls.rng1.next();
302
303 unsigned wdxi = (ri >> 6u) % num;
304 unsigned wdxj = (rj >> 6u) % num;
305
306 size_t maski = list_mask[wdxi].load(std::memory_order_relaxed);
307 size_t maskj = list_mask[wdxj].load(std::memory_order_relaxed);
308
309 if(maski == 0 && maskj == 0) continue;
310
311 unsigned bi = rand_bit(ri, maski);
312 unsigned bj = rand_bit(rj, maskj);
313
314 assertf(bi < 64, "%zu %u", maski, bi);
315 assertf(bj < 64, "%zu %u", maskj, bj);
316
317 i = bi | (wdxi << 6);
318 j = bj | (wdxj << 6);
319
320 assertf(i < numLists, "%u", wdxi << 6);
321 assertf(j < numLists, "%u", wdxj << 6);
322 }
323
324 if(auto node = try_pop(i, j)) return node;
325 }
326 #else
327 while(numNonEmpty != 0) {
328 // Pick two lists at random
329 int i = tls.rng1.next() % numLists;
330 int j = tls.rng1.next() % numLists;
331
332 if(auto node = try_pop(i, j)) return node;
333 }
334 #endif
335
336 return nullptr;
337 }
338
339private:
340 node_t * try_pop(unsigned i, unsigned j) {
341 #ifndef NO_STATS
342 tls.pick.pop.attempt++;
343 #endif
344
345 #if VARIANT == DISCOVER
346 if(lists[i].ts() > 0) bts(tls.mask, i); else btr(tls.mask, i);
347 if(lists[j].ts() > 0) bts(tls.mask, j); else btr(tls.mask, j);
348 #endif
349
350 // Pick the bet list
351 int w = i;
352 if( __builtin_expect(lists[j].ts() != 0, true) ) {
353 w = (lists[i].ts() < lists[j].ts()) ? i : j;
354 }
355
356 auto & list = lists[w];
357 // If list looks empty retry
358 if( list.ts() == 0 ) return nullptr;
359
360 // If we can't get the lock retry
361 if( !list.lock.try_lock() ) return nullptr;
362
363 #if VARIANT == VANILLA || VARIANT == BITMASK
364 __attribute__((unused)) int num = numNonEmpty;
365 #endif
366
367 // If list is empty, unlock and retry
368 if( list.ts() == 0 ) {
369 list.lock.unlock();
370 return nullptr;
371 }
372
373 // Actually pop the list
374 node_t * node;
375 bool emptied;
376 std::tie(node, emptied) = list.pop();
377 assert(node);
378
379 if(emptied) {
380 #if VARIANT == DISCOVER
381 size_t qword = w >> 6ull;
382 size_t bit = w & 63ull;
383 assert(qword == 0);
384 __attribute__((unused)) bool ret = btr(tls.mask, bit);
385 snzm.depart(w);
386 #elif VARIANT == SNZI || VARIANT == BIAS || VARIANT == BACK || VARIANT == BACKBIAS
387 snzi.depart(w);
388 #elif VARIANT == SNZM
389 snzm.depart(w);
390 #elif VARIANT == BITMASK
391 numNonEmpty--;
392 size_t qword = w >> 6ull;
393 size_t bit = w & 63ull;
394 assert((list_mask[qword] & (1ul << bit)) != 0);
395 __attribute__((unused)) bool ret = btr(list_mask[qword], bit);
396 assert(ret);
397 assert((list_mask[qword] & (1ul << bit)) == 0);
398 #else
399 numNonEmpty--;
400 #endif
401 }
402
403 // Unlock and return
404 list.lock.unlock();
405 #if VARIANT == VANILLA || VARIANT == BITMASK
406 assert(numNonEmpty >= 0);
407 #endif
408 #ifndef NO_STATS
409 tls.pick.pop.success++;
410 #if VARIANT == VANILLA || VARIANT == BITMASK
411 tls.empty.pop.value += num;
412 tls.empty.pop.count += 1;
413 #endif
414 #endif
415 return node;
416 }
417
418 inline unsigned idx_from_r(unsigned r, bool bias) {
419 unsigned i;
420 if(bias) {
421 if(0 == (r & 0xF)) {
422 i = r >> 4;
423 } else {
424 i = tls.my_queue + ((r >> 4) % 4);
425 tls.pick.push.local++;
426 }
427 } else {
428 i = r;
429 }
430 return i % numLists;
431 }
432
433public:
434
435 static __attribute__((aligned(128))) thread_local struct TLS {
436 Random rng1 = { int(rdtscl()) };
437 Random rng2 = { int(rdtscl()) };
438 unsigned my_queue = (ticket++) * 4;
439 pick_stat pick;
440 empty_stat empty;
441 __attribute__((aligned(64))) std::atomic_size_t mask = { 0 };
442 } tls;
443
444private:
445 const unsigned numLists;
446 __attribute__((aligned(64))) std::unique_ptr<intrusive_queue_t<node_t> []> lists;
447private:
448 #if VARIANT == SNZI || VARIANT == BIAS || VARIANT == BACK || VARIANT == BACKBIAS
449 snzi_t snzi;
450 #elif VARIANT == SNZM || VARIANT == DISCOVER
451 snzm_t snzm;
452 #else
453 std::atomic_int numNonEmpty = { 0 }; // number of non-empty lists
454 #endif
455 #if VARIANT == BITMASK
456 std::atomic_size_t list_mask[7] = { {0}, {0}, {0}, {0}, {0}, {0}, {0} }; // which queues are empty
457 #endif
458
459public:
460 static const constexpr size_t sizeof_queue = sizeof(intrusive_queue_t<node_t>);
461 static std::atomic_uint32_t ticket;
462
463#ifndef NO_STATS
464 static void stats_tls_tally() {
465 global_stats.pick.push.attempt += tls.pick.push.attempt;
466 global_stats.pick.push.success += tls.pick.push.success;
467 global_stats.pick.push.local += tls.pick.push.local;
468 global_stats.pick.pop .attempt += tls.pick.pop.attempt;
469 global_stats.pick.pop .success += tls.pick.pop.success;
470 global_stats.pick.pop .mask_attempt += tls.pick.pop.mask_attempt;
471 global_stats.pick.pop .mask_reset += tls.pick.pop.mask_reset;
472 global_stats.pick.pop .local += tls.pick.pop.local;
473
474 global_stats.qstat.push.value += tls.empty.push.value;
475 global_stats.qstat.push.count += tls.empty.push.count;
476 global_stats.qstat.pop .value += tls.empty.pop .value;
477 global_stats.qstat.pop .count += tls.empty.pop .count;
478 }
479
480private:
481 static struct GlobalStats {
482 struct {
483 struct {
484 std::atomic_size_t attempt = { 0 };
485 std::atomic_size_t success = { 0 };
486 std::atomic_size_t local = { 0 };
487 } push;
488 struct {
489 std::atomic_size_t attempt = { 0 };
490 std::atomic_size_t success = { 0 };
491 std::atomic_size_t mask_attempt = { 0 };
492 std::atomic_size_t mask_reset = { 0 };
493 std::atomic_size_t local = { 0 };
494 } pop;
495 } pick;
496 struct {
497 struct {
498 std::atomic_size_t value = { 0 };
499 std::atomic_size_t count = { 0 };
500 } push;
501 struct {
502 std::atomic_size_t value = { 0 };
503 std::atomic_size_t count = { 0 };
504 } pop;
505 } qstat;
506 } global_stats;
507
508public:
509 static void stats_print(std::ostream & os ) {
510 std::cout << "----- Relaxed List Stats -----" << std::endl;
511
512 const auto & global = global_stats;
513
514 double push_sur = (100.0 * double(global.pick.push.success) / global.pick.push.attempt);
515 double pop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .attempt);
516 double mpop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .mask_attempt);
517 double rpop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .mask_reset);
518
519 double push_len = double(global.pick.push.attempt ) / global.pick.push.success;
520 double pop_len = double(global.pick.pop .attempt ) / global.pick.pop .success;
521 double mpop_len = double(global.pick.pop .mask_attempt) / global.pick.pop .success;
522 double rpop_len = double(global.pick.pop .mask_reset ) / global.pick.pop .success;
523
524 os << "Push Pick : " << push_sur << " %, len " << push_len << " (" << global.pick.push.attempt << " / " << global.pick.push.success << ")\n";
525 os << "Pop Pick : " << pop_sur << " %, len " << pop_len << " (" << global.pick.pop .attempt << " / " << global.pick.pop .success << ")\n";
526 os << "TryPop Pick : " << mpop_sur << " %, len " << mpop_len << " (" << global.pick.pop .mask_attempt << " / " << global.pick.pop .success << ")\n";
527 os << "Pop M Reset : " << rpop_sur << " %, len " << rpop_len << " (" << global.pick.pop .mask_reset << " / " << global.pick.pop .success << ")\n";
528
529 double avgQ_push = double(global.qstat.push.value) / global.qstat.push.count;
530 double avgQ_pop = double(global.qstat.pop .value) / global.qstat.pop .count;
531 double avgQ = double(global.qstat.push.value + global.qstat.pop .value) / (global.qstat.push.count + global.qstat.pop .count);
532 os << "Push Avg Qs : " << avgQ_push << " (" << global.qstat.push.count << "ops)\n";
533 os << "Pop Avg Qs : " << avgQ_pop << " (" << global.qstat.pop .count << "ops)\n";
534 os << "Global Avg Qs : " << avgQ << " (" << (global.qstat.push.count + global.qstat.pop .count) << "ops)\n";
535
536 os << "Local Push : " << global.pick.push.local << "\n";
537 os << "Local Pop : " << global.pick.pop .local << "\n";
538 }
539#endif
540};
Note: See TracBrowser for help on using the repository browser.