source: doc/theses/thierry_delisle_PhD/code/relaxed_list.hpp@ 8291293

ADT arm-eh ast-experimental enum forall-pointer-decay jacob/cs343-translation new-ast new-ast-unique-expr pthread-emulation qualifiedEnum
Last change on this file since 8291293 was b232745, checked in by Thierry Delisle <tdelisle@…>, 6 years ago

Several changes to relaxed list prototype and added workstealing for comparison

  • Property mode set to 100644
File size: 14.1 KB
Line 
1#pragma once
2#define LIST_VARIANT relaxed_list
3
4#define VANILLA 0
5#define SNZI 1
6#define BITMASK 2
7#define DISCOVER 3
8#define SNZM 4
9#define BIAS 5
10
11#ifndef VARIANT
12#define VARIANT VANILLA
13#endif
14
15#ifndef NO_STATS
16#include <iostream>
17#endif
18
19#include <cmath>
20#include <memory>
21#include <mutex>
22#include <type_traits>
23
24#include "assert.hpp"
25#include "utils.hpp"
26#include "links.hpp"
27#include "snzi.hpp"
28#include "snzm.hpp"
29
30using namespace std;
31
32struct pick_stat {
33 struct {
34 size_t attempt = 0;
35 size_t success = 0;
36 size_t local = 0;
37 } push;
38 struct {
39 size_t attempt = 0;
40 size_t success = 0;
41 size_t mask_attempt = 0;
42 size_t mask_reset = 0;
43 size_t local = 0;
44 } pop;
45};
46
47struct empty_stat {
48 struct {
49 size_t value = 0;
50 size_t count = 0;
51 } push;
52 struct {
53 size_t value = 0;
54 size_t count = 0;
55 } pop;
56};
57
58template<typename node_t>
59class __attribute__((aligned(128))) relaxed_list {
60 static_assert(std::is_same<decltype(node_t::_links), _LinksFields_t<node_t>>::value, "Node must have a links field");
61
62public:
63 static const char * name() {
64 const char * names[] = {
65 "RELAXED: VANILLA",
66 "RELAXED: SNZI",
67 "RELAXED: BITMASK",
68 "RELAXED: SNZI + DISCOVERED MASK",
69 "RELAXED: SNZI + MASK",
70 "RELAXED: SNZI + LOCAL BIAS"
71 };
72 return names[VARIANT];
73 }
74
75 relaxed_list(unsigned numThreads, unsigned numQueues)
76 : numLists(numThreads * numQueues)
77 , lists(new intrusive_queue_t<node_t>[numLists])
78 #if VARIANT == SNZI || VARIANT == BIAS
79 , snzi( std::log2( numLists / (2 * numQueues) ), 2 )
80 #elif VARIANT == SNZM || VARIANT == DISCOVER
81 , snzm( numLists )
82 #endif
83 {
84 assertf(7 * 8 * 8 >= numLists, "List currently only supports 448 sublists");
85 std::cout << "Constructing Relaxed List with " << numLists << std::endl;
86 }
87
88 ~relaxed_list() {
89 std::cout << "Destroying Relaxed List" << std::endl;
90 lists.reset();
91 }
92
93 __attribute__((noinline, hot)) void push(node_t * node) {
94 node->_links.ts = rdtscl();
95
96 while(true) {
97 // Pick a random list
98 #if VARIANT == BIAS
99 unsigned r = tls.rng.next();
100 unsigned i;
101 if(0 == (r & 0xF)) {
102 i = r >> 4;
103 } else {
104 i = tls.my_queue + ((r >> 4) % 4);
105 tls.pick.push.local++;
106 }
107 i %= numLists;
108 #else
109 unsigned i = tls.rng.next() % numLists;
110 #endif
111
112 #ifndef NO_STATS
113 tls.pick.push.attempt++;
114 #endif
115
116 // If we can't lock it retry
117 if( !lists[i].lock.try_lock() ) continue;
118
119 #if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER && VARIANT != BIAS
120 __attribute__((unused)) int num = numNonEmpty;
121 #endif
122
123 // Actually push it
124 if(lists[i].push(node)) {
125 #if VARIANT == DISCOVER
126 size_t qword = i >> 6ull;
127 size_t bit = i & 63ull;
128 assert(qword == 0);
129 bts(tls.mask, bit);
130 snzm.arrive(i);
131 #elif VARIANT == SNZI || VARIANT == BIAS
132 snzi.arrive(i);
133 #elif VARIANT == SNZM
134 snzm.arrive(i);
135 #elif VARIANT == BITMASK
136 numNonEmpty++;
137 size_t qword = i >> 6ull;
138 size_t bit = i & 63ull;
139 assertf((list_mask[qword] & (1ul << bit)) == 0, "Before set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
140 __attribute__((unused)) bool ret = bts(list_mask[qword], bit);
141 assert(!ret);
142 assertf((list_mask[qword] & (1ul << bit)) != 0, "After set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
143 #else
144 numNonEmpty++;
145 #endif
146 }
147 #if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER && VARIANT != BIAS
148 assert(numNonEmpty <= (int)numLists);
149 #endif
150
151 // Unlock and return
152 lists[i].lock.unlock();
153
154 #ifndef NO_STATS
155 tls.pick.push.success++;
156 #if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER && VARIANT != BIAS
157 tls.empty.push.value += num;
158 tls.empty.push.count += 1;
159 #endif
160 #endif
161 return;
162 }
163 }
164
165 __attribute__((noinline, hot)) node_t * pop() {
166 #if VARIANT == DISCOVER
167 assert(numLists <= 64);
168 while(snzm.query()) {
169 tls.pick.pop.mask_attempt++;
170 unsigned i, j;
171 {
172 // Pick first list totally randomly
173 i = tls.rng.next() % numLists;
174
175 // Pick the other according to the bitmask
176 unsigned r = tls.rng.next();
177
178 size_t mask = tls.mask.load(std::memory_order_relaxed);
179 if(mask == 0) {
180 tls.pick.pop.mask_reset++;
181 mask = (1U << numLists) - 1;
182 tls.mask.store(mask, std::memory_order_relaxed);
183 }
184
185 unsigned b = rand_bit(r, mask);
186
187 assertf(b < 64, "%zu %u", mask, b);
188
189 j = b;
190
191 assert(j < numLists);
192 }
193
194 if(auto node = try_pop(i, j)) return node;
195 }
196 #elif VARIANT == SNZI
197 while(snzi.query()) {
198 // Pick two lists at random
199 int i = tls.rng.next() % numLists;
200 // int j = tls.rng.next() % numLists;
201
202 if(auto node = try_pop(i, j)) return node;
203 }
204
205 #elif VARIANT == BIAS
206 while(snzi.query()) {
207 // Pick two lists at random
208 unsigned ri = tls.rng.next();
209 unsigned i;
210 unsigned j = tls.rng.next();
211 if(0 == (ri & 0xF)) {
212 i = (ri >> 4) % numLists;
213 } else {
214 i = tls.my_queue + ((ri >> 4) % 4);
215 j = tls.my_queue + ((j >> 4) % 4);
216 tls.pick.pop.local++;
217 }
218 i %= numLists;
219 j %= numLists;
220
221 if(auto node = try_pop(i, j)) return node;
222 }
223 #elif VARIANT == SNZM
224 //*
225 while(snzm.query()) {
226 tls.pick.pop.mask_attempt++;
227 unsigned i, j;
228 {
229 // Pick two random number
230 unsigned ri = tls.rng.next();
231 unsigned rj = tls.rng.next();
232
233 // Pick two nodes from it
234 unsigned wdxi = ri & snzm.mask;
235 // unsigned wdxj = rj & snzm.mask;
236
237 // Get the masks from the nodes
238 // size_t maski = snzm.masks(wdxi);
239 size_t maskj = snzm.masks(wdxj);
240
241 if(maski == 0 && maskj == 0) continue;
242
243 #if defined(__BMI2__)
244 uint64_t idxsi = _pext_u64(snzm.indexes, maski);
245 // uint64_t idxsj = _pext_u64(snzm.indexes, maskj);
246
247 auto pi = __builtin_popcountll(maski);
248 // auto pj = __builtin_popcountll(maskj);
249
250 ri = pi ? ri & ((pi >> 3) - 1) : 0;
251 rj = pj ? rj & ((pj >> 3) - 1) : 0;
252
253 unsigned bi = (idxsi >> (ri << 3)) & 0xff;
254 unsigned bj = (idxsj >> (rj << 3)) & 0xff;
255 #else
256 unsigned bi = rand_bit(ri >> snzm.depth, maski);
257 unsigned bj = rand_bit(rj >> snzm.depth, maskj);
258 #endif
259
260 i = (bi << snzm.depth) | wdxi;
261 j = (bj << snzm.depth) | wdxj;
262
263 /* paranoid */ assertf(i < numLists, "%u %u", bj, wdxi);
264 /* paranoid */ assertf(j < numLists, "%u %u", bj, wdxj);
265 }
266
267 if(auto node = try_pop(i, j)) return node;
268 }
269 /*/
270 while(snzm.query()) {
271 // Pick two lists at random
272 int i = tls.rng.next() % numLists;
273 int j = tls.rng.next() % numLists;
274
275 if(auto node = try_pop(i, j)) return node;
276 }
277 //*/
278 #elif VARIANT == BITMASK
279 int nnempty;
280 while(0 != (nnempty = numNonEmpty)) {
281 tls.pick.pop.mask_attempt++;
282 unsigned i, j;
283 {
284 // Pick two lists at random
285 unsigned num = ((numLists - 1) >> 6) + 1;
286
287 unsigned ri = tls.rng.next();
288 unsigned rj = tls.rng.next();
289
290 unsigned wdxi = (ri >> 6u) % num;
291 unsigned wdxj = (rj >> 6u) % num;
292
293 size_t maski = list_mask[wdxi].load(std::memory_order_relaxed);
294 size_t maskj = list_mask[wdxj].load(std::memory_order_relaxed);
295
296 if(maski == 0 && maskj == 0) continue;
297
298 unsigned bi = rand_bit(ri, maski);
299 unsigned bj = rand_bit(rj, maskj);
300
301 assertf(bi < 64, "%zu %u", maski, bi);
302 assertf(bj < 64, "%zu %u", maskj, bj);
303
304 i = bi | (wdxi << 6);
305 j = bj | (wdxj << 6);
306
307 assertf(i < numLists, "%u", wdxi << 6);
308 assertf(j < numLists, "%u", wdxj << 6);
309 }
310
311 if(auto node = try_pop(i, j)) return node;
312 }
313 #else
314 while(numNonEmpty != 0) {
315 // Pick two lists at random
316 int i = tls.rng.next() % numLists;
317 int j = tls.rng.next() % numLists;
318
319 if(auto node = try_pop(i, j)) return node;
320 }
321 #endif
322
323 return nullptr;
324 }
325
326private:
327 node_t * try_pop(unsigned i, unsigned j) {
328 #ifndef NO_STATS
329 tls.pick.pop.attempt++;
330 #endif
331
332 #if VARIANT == DISCOVER
333 if(lists[i].ts() > 0) bts(tls.mask, i); else btr(tls.mask, i);
334 if(lists[j].ts() > 0) bts(tls.mask, j); else btr(tls.mask, j);
335 #endif
336
337 // Pick the bet list
338 int w = i;
339 if( __builtin_expect(lists[j].ts() != 0, true) ) {
340 w = (lists[i].ts() < lists[j].ts()) ? i : j;
341 }
342
343 auto & list = lists[w];
344 // If list looks empty retry
345 if( list.ts() == 0 ) return nullptr;
346
347 // If we can't get the lock retry
348 if( !list.lock.try_lock() ) return nullptr;
349
350 #if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER && VARIANT != BIAS
351 __attribute__((unused)) int num = numNonEmpty;
352 #endif
353
354 // If list is empty, unlock and retry
355 if( list.ts() == 0 ) {
356 list.lock.unlock();
357 return nullptr;
358 }
359
360 // Actually pop the list
361 node_t * node;
362 bool emptied;
363 std::tie(node, emptied) = list.pop();
364 assert(node);
365
366 if(emptied) {
367 #if VARIANT == DISCOVER
368 size_t qword = w >> 6ull;
369 size_t bit = w & 63ull;
370 assert(qword == 0);
371 __attribute__((unused)) bool ret = btr(tls.mask, bit);
372 snzm.depart(w);
373 #elif VARIANT == SNZI || VARIANT == BIAS
374 snzi.depart(w);
375 #elif VARIANT == SNZM
376 snzm.depart(w);
377 #elif VARIANT == BITMASK
378 numNonEmpty--;
379 size_t qword = w >> 6ull;
380 size_t bit = w & 63ull;
381 assert((list_mask[qword] & (1ul << bit)) != 0);
382 __attribute__((unused)) bool ret = btr(list_mask[qword], bit);
383 assert(ret);
384 assert((list_mask[qword] & (1ul << bit)) == 0);
385 #else
386 numNonEmpty--;
387 #endif
388 }
389
390 // Unlock and return
391 list.lock.unlock();
392 #if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER && VARIANT != BIAS
393 assert(numNonEmpty >= 0);
394 #endif
395 #ifndef NO_STATS
396 tls.pick.pop.success++;
397 #if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER && VARIANT != BIAS
398 tls.empty.pop.value += num;
399 tls.empty.pop.count += 1;
400 #endif
401 #endif
402 return node;
403 }
404
405
406public:
407
408 static __attribute__((aligned(128))) thread_local struct TLS {
409 Random rng = { int(rdtscl()) };
410 unsigned my_queue = (ticket++) * 4;
411 pick_stat pick;
412 empty_stat empty;
413 __attribute__((aligned(64))) std::atomic_size_t mask = { 0 };
414 } tls;
415
416private:
417 const unsigned numLists;
418 __attribute__((aligned(64))) std::unique_ptr<intrusive_queue_t<node_t> []> lists;
419private:
420 #if VARIANT == SNZI || VARIANT == BIAS
421 snzi_t snzi;
422 #elif VARIANT == SNZM || VARIANT == DISCOVER
423 snzm_t snzm;
424 #else
425 std::atomic_int numNonEmpty = { 0 }; // number of non-empty lists
426 #endif
427 #if VARIANT == BITMASK
428 std::atomic_size_t list_mask[7] = { {0}, {0}, {0}, {0}, {0}, {0}, {0} }; // which queues are empty
429 #endif
430
431public:
432 static const constexpr size_t sizeof_queue = sizeof(intrusive_queue_t<node_t>);
433 static std::atomic_uint32_t ticket;
434
435#ifndef NO_STATS
436 static void stats_tls_tally() {
437 global_stats.pick.push.attempt += tls.pick.push.attempt;
438 global_stats.pick.push.success += tls.pick.push.success;
439 global_stats.pick.push.local += tls.pick.push.local;
440 global_stats.pick.pop .attempt += tls.pick.pop.attempt;
441 global_stats.pick.pop .success += tls.pick.pop.success;
442 global_stats.pick.pop .mask_attempt += tls.pick.pop.mask_attempt;
443 global_stats.pick.pop .mask_reset += tls.pick.pop.mask_reset;
444 global_stats.pick.pop .local += tls.pick.pop.local;
445
446 global_stats.qstat.push.value += tls.empty.push.value;
447 global_stats.qstat.push.count += tls.empty.push.count;
448 global_stats.qstat.pop .value += tls.empty.pop .value;
449 global_stats.qstat.pop .count += tls.empty.pop .count;
450 }
451
452private:
453 static struct GlobalStats {
454 struct {
455 struct {
456 std::atomic_size_t attempt = { 0 };
457 std::atomic_size_t success = { 0 };
458 std::atomic_size_t local = { 0 };
459 } push;
460 struct {
461 std::atomic_size_t attempt = { 0 };
462 std::atomic_size_t success = { 0 };
463 std::atomic_size_t mask_attempt = { 0 };
464 std::atomic_size_t mask_reset = { 0 };
465 std::atomic_size_t local = { 0 };
466 } pop;
467 } pick;
468 struct {
469 struct {
470 std::atomic_size_t value = { 0 };
471 std::atomic_size_t count = { 0 };
472 } push;
473 struct {
474 std::atomic_size_t value = { 0 };
475 std::atomic_size_t count = { 0 };
476 } pop;
477 } qstat;
478 } global_stats;
479
480public:
481 static void stats_print(std::ostream & os ) {
482 std::cout << "----- Relaxed List Stats -----" << std::endl;
483
484 const auto & global = global_stats;
485
486 double push_sur = (100.0 * double(global.pick.push.success) / global.pick.push.attempt);
487 double pop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .attempt);
488 double mpop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .mask_attempt);
489 double rpop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .mask_reset);
490
491 double push_len = double(global.pick.push.attempt ) / global.pick.push.success;
492 double pop_len = double(global.pick.pop .attempt ) / global.pick.pop .success;
493 double mpop_len = double(global.pick.pop .mask_attempt) / global.pick.pop .success;
494 double rpop_len = double(global.pick.pop .mask_reset ) / global.pick.pop .success;
495
496 os << "Push Pick : " << push_sur << " %, len " << push_len << " (" << global.pick.push.attempt << " / " << global.pick.push.success << ")\n";
497 os << "Pop Pick : " << pop_sur << " %, len " << pop_len << " (" << global.pick.pop .attempt << " / " << global.pick.pop .success << ")\n";
498 os << "TryPop Pick : " << mpop_sur << " %, len " << mpop_len << " (" << global.pick.pop .mask_attempt << " / " << global.pick.pop .success << ")\n";
499 os << "Pop M Reset : " << rpop_sur << " %, len " << rpop_len << " (" << global.pick.pop .mask_reset << " / " << global.pick.pop .success << ")\n";
500
501 double avgQ_push = double(global.qstat.push.value) / global.qstat.push.count;
502 double avgQ_pop = double(global.qstat.pop .value) / global.qstat.pop .count;
503 double avgQ = double(global.qstat.push.value + global.qstat.pop .value) / (global.qstat.push.count + global.qstat.pop .count);
504 os << "Push Avg Qs : " << avgQ_push << " (" << global.qstat.push.count << "ops)\n";
505 os << "Pop Avg Qs : " << avgQ_pop << " (" << global.qstat.pop .count << "ops)\n";
506 os << "Global Avg Qs : " << avgQ << " (" << (global.qstat.push.count + global.qstat.pop .count) << "ops)\n";
507
508 os << "Local Push : " << global.pick.push.local << "\n";
509 os << "Local Pop : " << global.pick.pop .local << "\n";
510 }
511#endif
512};
Note: See TracBrowser for help on using the repository browser.