source: doc/theses/thierry_delisle_PhD/code/relaxed_list.hpp@ 9da5a50

ADT arm-eh ast-experimental enum forall-pointer-decay jacob/cs343-translation new-ast new-ast-unique-expr pthread-emulation qualifiedEnum
Last change on this file since 9da5a50 was 9da5a50, checked in by Thierry Delisle <tdelisle@…>, 6 years ago

Added new DISCOVER_BITMASK algorithm as a potential ready queue sub-algorithm.
It offers may be an improvement but doesn't reduce the contention for the counter

  • Property mode set to 100644
File size: 14.4 KB
Line 
1#pragma once
2
3#ifndef NO_STATS
4#include <iostream>
5#endif
6
7#include <memory>
8#include <mutex>
9#include <type_traits>
10
11#include "assert.hpp"
12#include "utils.hpp"
13
14using namespace std;
15
16struct spinlock_t {
17 std::atomic_bool ll = { false };
18
19 inline void lock() {
20 while( __builtin_expect(ll.exchange(true),false) ) {
21 while(ll.load(std::memory_order_relaxed))
22 asm volatile("pause");
23 }
24 }
25
26 inline bool try_lock() {
27 return false == ll.exchange(true);
28 }
29
30 inline void unlock() {
31 ll.store(false, std::memory_order_release);
32 }
33
34 inline explicit operator bool() {
35 return ll.load(std::memory_order_relaxed);
36 }
37};
38
39static inline bool bts(std::atomic_size_t & target, size_t bit ) {
40 //*
41 int result = 0;
42 asm volatile(
43 "LOCK btsq %[bit], %[target]\n\t"
44 :"=@ccc" (result)
45 : [target] "m" (target), [bit] "r" (bit)
46 );
47 return result != 0;
48 /*/
49 size_t mask = 1ul << bit;
50 size_t ret = target.fetch_or(mask, std::memory_order_relaxed);
51 return (ret & mask) != 0;
52 //*/
53}
54
55static inline bool btr(std::atomic_size_t & target, size_t bit ) {
56 //*
57 int result = 0;
58 asm volatile(
59 "LOCK btrq %[bit], %[target]\n\t"
60 :"=@ccc" (result)
61 : [target] "m" (target), [bit] "r" (bit)
62 );
63 return result != 0;
64 /*/
65 size_t mask = 1ul << bit;
66 size_t ret = target.fetch_and(~mask, std::memory_order_relaxed);
67 return (ret & mask) != 0;
68 //*/
69}
70
71extern bool enable_stats;
72
73struct pick_stat {
74 struct {
75 size_t attempt = 0;
76 size_t success = 0;
77 } push;
78 struct {
79 size_t attempt = 0;
80 size_t success = 0;
81 size_t mask_attempt = 0;
82 size_t mask_reset = 0;
83 } pop;
84};
85
86struct empty_stat {
87 struct {
88 size_t value = 0;
89 size_t count = 0;
90 } push;
91 struct {
92 size_t value = 0;
93 size_t count = 0;
94 } pop;
95};
96
97template<typename node_t>
98struct _LinksFields_t {
99 node_t * prev = nullptr;
100 node_t * next = nullptr;
101 unsigned long long ts = 0;
102};
103
104template<typename node_t>
105class __attribute__((aligned(128))) relaxed_list {
106 static_assert(std::is_same<decltype(node_t::_links), _LinksFields_t<node_t>>::value, "Node must have a links field");
107
108public:
109 relaxed_list(unsigned numLists)
110 : lists(new intrusive_queue_t[numLists])
111 , numLists(numLists)
112 {
113 assertf(7 * 8 * 8 >= numLists, "List currently only supports 448 sublists");
114 // assert(sizeof(*this) == 128);
115 std::cout << "Constructing Relaxed List with " << numLists << std::endl;
116
117 #ifndef NO_STATS
118 if(head) this->next = head;
119 head = this;
120 #endif
121 }
122
123 ~relaxed_list() {
124 std::cout << "Destroying Relaxed List" << std::endl;
125 lists.reset();
126 }
127
128 __attribute__((noinline, hot)) void push(node_t * node) {
129 node->_links.ts = rdtscl();
130
131 while(true) {
132 // Pick a random list
133 unsigned i = tls.rng.next() % numLists;
134
135 #ifndef NO_STATS
136 tls.pick.push.attempt++;
137 #endif
138
139 // If we can't lock it retry
140 if( !lists[i].lock.try_lock() ) continue;
141
142 __attribute__((unused)) int num = numNonEmpty;
143
144 // Actually push it
145 if(lists[i].push(node)) {
146 #if defined(DISCOVER_BITMASK)
147 size_t qword = i >> 6ull;
148 size_t bit = i & 63ull;
149 assert(qword == 0);
150 bts(tls.mask, bit);
151 #elif !defined(NO_BITMASK)
152 numNonEmpty++;
153 size_t qword = i >> 6ull;
154 size_t bit = i & 63ull;
155 assertf((list_mask[qword] & (1ul << bit)) == 0, "Before set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
156 __attribute__((unused)) bool ret = bts(list_mask[qword], bit);
157 assert(!ret);
158 assertf((list_mask[qword] & (1ul << bit)) != 0, "After set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
159 #else
160 numNonEmpty++;
161 #endif
162 }
163 assert(numNonEmpty <= (int)numLists);
164
165 // Unlock and return
166 lists[i].lock.unlock();
167
168 #ifndef NO_STATS
169 tls.pick.push.success++;
170 tls.empty.push.value += num;
171 tls.empty.push.count += 1;
172 #endif
173 return;
174 }
175 }
176
177 __attribute__((noinline, hot)) node_t * pop() {
178 #if defined(DISCOVER_BITMASK)
179 assert(numLists <= 64);
180 while(true) {
181 tls.pick.pop.mask_attempt++;
182 unsigned i, j;
183 {
184 // Pick two lists at random
185 unsigned num = ((numLists - 1) >> 6) + 1;
186
187 // Pick first list totally randomly
188 i = tls.rng.next() % numLists;
189
190 // Pick the other according to the bitmask
191 unsigned r = tls.rng.next();
192
193 size_t mask = tls.mask.load(std::memory_order_relaxed);
194 if(mask == 0) {
195 tls.pick.pop.mask_reset++;
196 mask = (1U << numLists) - 1;
197 }
198
199 unsigned b = rand_bit(r, mask);
200
201 assertf(b < 64, "%zu %u", mask, b);
202
203 j = b;
204
205 assert(j < numLists);
206 }
207
208 if(auto node = try_pop(i, j)) return node;
209 }
210 #elif !defined(NO_BITMASK)
211 int nnempty;
212 while(0 != (nnempty = numNonEmpty)) {
213 tls.pick.pop.mask_attempt++;
214 unsigned i, j;
215 {
216 // Pick two lists at random
217 unsigned num = ((numLists - 1) >> 6) + 1;
218
219 unsigned ri = tls.rng.next();
220 unsigned rj = tls.rng.next();
221
222 unsigned wdxi = (ri >> 6u) % num;
223 unsigned wdxj = (rj >> 6u) % num;
224
225 size_t maski = list_mask[wdxi].load(std::memory_order_relaxed);
226 size_t maskj = list_mask[wdxj].load(std::memory_order_relaxed);
227
228 if(maski == 0 && maskj == 0) continue;
229
230 unsigned bi = rand_bit(ri, maski);
231 unsigned bj = rand_bit(rj, maskj);
232
233 assertf(bi < 64, "%zu %u", maski, bi);
234 assertf(bj < 64, "%zu %u", maskj, bj);
235
236 i = bi | (wdxi << 6);
237 j = bj | (wdxj << 6);
238
239 assertf(i < numLists, "%u", wdxi << 6);
240 assertf(j < numLists, "%u", wdxj << 6);
241 }
242
243 if(auto node = try_pop(i, j)) return node;
244 }
245 #else
246 while(numNonEmpty != 0) {
247 // Pick two lists at random
248 int i = tls.rng.next() % numLists;
249 int j = tls.rng.next() % numLists;
250
251 if(auto node = try_pop(i, j)) return node;
252 }
253 #endif
254
255 return nullptr;
256 }
257
258private:
259 node_t * try_pop(unsigned i, unsigned j) {
260 #ifndef NO_STATS
261 tls.pick.pop.attempt++;
262 #endif
263
264 #if defined(DISCOVER_BITMASK)
265 if(lists[i].ts() > 0) bts(tls.mask, i); else btr(tls.mask, i);
266 if(lists[j].ts() > 0) bts(tls.mask, j); else btr(tls.mask, j);
267 #endif
268
269 // Pick the bet list
270 int w = i;
271 if( __builtin_expect(lists[j].ts() != 0, true) ) {
272 w = (lists[i].ts() < lists[j].ts()) ? i : j;
273 }
274
275 auto & list = lists[w];
276 // If list looks empty retry
277 if( list.ts() == 0 ) return nullptr;
278
279 // If we can't get the lock retry
280 if( !list.lock.try_lock() ) return nullptr;
281
282 __attribute__((unused)) int num = numNonEmpty;
283
284 // If list is empty, unlock and retry
285 if( list.ts() == 0 ) {
286 list.lock.unlock();
287 return nullptr;
288 }
289
290 // Actually pop the list
291 node_t * node;
292 bool emptied;
293 std::tie(node, emptied) = list.pop();
294 assert(node);
295
296 if(emptied) {
297 #if defined(DISCOVER_BITMASK)
298 size_t qword = w >> 6ull;
299 size_t bit = w & 63ull;
300 assert(qword == 0);
301 __attribute__((unused)) bool ret = btr(tls.mask, bit);
302 #elif !defined(NO_BITMASK)
303 numNonEmpty--;
304 size_t qword = w >> 6ull;
305 size_t bit = w & 63ull;
306 assert((list_mask[qword] & (1ul << bit)) != 0);
307 __attribute__((unused)) bool ret = btr(list_mask[qword], bit);
308 assert(ret);
309 assert((list_mask[qword] & (1ul << bit)) == 0);
310 #else
311 numNonEmpty--;
312 #endif
313 }
314
315 // Unlock and return
316 list.lock.unlock();
317 assert(numNonEmpty >= 0);
318 #ifndef NO_STATS
319 tls.pick.pop.success++;
320 tls.empty.pop.value += num;
321 tls.empty.pop.count += 1;
322 #endif
323 return node;
324 }
325
326private:
327
328 class __attribute__((aligned(128))) intrusive_queue_t {
329 public:
330 typedef spinlock_t lock_t;
331
332 friend class relaxed_list<node_t>;
333
334 struct stat {
335 ssize_t diff = 0;
336 size_t push = 0;
337 size_t pop = 0;
338 // size_t value = 0;
339 // size_t count = 0;
340 };
341
342 private:
343 struct sentinel_t {
344 _LinksFields_t<node_t> _links;
345 };
346
347 lock_t lock;
348 sentinel_t before;
349 sentinel_t after;
350 #ifndef NO_STATS
351 stat s;
352 #endif
353
354#pragma GCC diagnostic push
355#pragma GCC diagnostic ignored "-Winvalid-offsetof"
356 static constexpr auto fields_offset = offsetof( node_t, _links );
357#pragma GCC diagnostic pop
358 public:
359 intrusive_queue_t()
360 : before{{ nullptr, tail() }}
361 , after {{ head(), nullptr }}
362 {
363 /* paranoid */ assert((reinterpret_cast<uintptr_t>( head() ) + fields_offset) == reinterpret_cast<uintptr_t>(&before));
364 /* paranoid */ assert((reinterpret_cast<uintptr_t>( tail() ) + fields_offset) == reinterpret_cast<uintptr_t>(&after ));
365 /* paranoid */ assert(head()->_links.prev == nullptr);
366 /* paranoid */ assert(head()->_links.next == tail() );
367 /* paranoid */ assert(tail()->_links.next == nullptr);
368 /* paranoid */ assert(tail()->_links.prev == head() );
369 /* paranoid */ assert(sizeof(*this) == 128);
370 /* paranoid */ assert((intptr_t(this) % 128) == 0);
371 }
372
373 ~intrusive_queue_t() = default;
374
375 inline node_t * head() const {
376 node_t * rhead = reinterpret_cast<node_t *>(
377 reinterpret_cast<uintptr_t>( &before ) - fields_offset
378 );
379 assert(rhead);
380 return rhead;
381 }
382
383 inline node_t * tail() const {
384 node_t * rtail = reinterpret_cast<node_t *>(
385 reinterpret_cast<uintptr_t>( &after ) - fields_offset
386 );
387 assert(rtail);
388 return rtail;
389 }
390
391 inline bool push(node_t * node) {
392 assert(lock);
393 assert(node->_links.ts != 0);
394 node_t * tail = this->tail();
395
396 node_t * prev = tail->_links.prev;
397 // assertf(node->_links.ts >= prev->_links.ts,
398 // "New node has smaller timestamp: %llu < %llu", node->_links.ts, prev->_links.ts);
399 node->_links.next = tail;
400 node->_links.prev = prev;
401 prev->_links.next = node;
402 tail->_links.prev = node;
403 #ifndef NO_STATS
404 if(enable_stats) {
405 s.diff++;
406 s.push++;
407 }
408 #endif
409 if(before._links.ts == 0l) {
410 before._links.ts = node->_links.ts;
411 assert(node->_links.prev == this->head());
412 return true;
413 }
414 return false;
415 }
416
417 inline std::pair<node_t *, bool> pop() {
418 assert(lock);
419 node_t * head = this->head();
420 node_t * tail = this->tail();
421
422 node_t * node = head->_links.next;
423 node_t * next = node->_links.next;
424 if(node == tail) return {nullptr, false};
425
426 head->_links.next = next;
427 next->_links.prev = head;
428
429 #ifndef NO_STATS
430 if(enable_stats) {
431 s.diff--;
432 s.pop ++;
433 }
434 #endif
435 if(next == tail) {
436 before._links.ts = 0l;
437 return {node, true};
438 }
439 else {
440 assert(next->_links.ts != 0);
441 before._links.ts = next->_links.ts;
442 assert(before._links.ts != 0);
443 return {node, false};
444 }
445 }
446
447 long long ts() const {
448 return before._links.ts;
449 }
450 };
451
452
453public:
454
455 static __attribute__((aligned(128))) thread_local struct TLS {
456 Random rng = { int(rdtscl()) };
457 pick_stat pick;
458 empty_stat empty;
459 __attribute__((aligned(64))) std::atomic_size_t mask = { 0 };
460 } tls;
461
462public:
463 std::atomic_int numNonEmpty = { 0 }; // number of non-empty lists
464 std::atomic_size_t list_mask[7] = { {0}, {0}, {0}, {0}, {0}, {0}, {0} }; // which queues are empty
465private:
466 __attribute__((aligned(64))) std::unique_ptr<intrusive_queue_t []> lists;
467 const unsigned numLists;
468
469public:
470 static const constexpr size_t sizeof_queue = sizeof(intrusive_queue_t);
471
472#ifndef NO_STATS
473 static void stats_print(std::ostream & os) {
474 auto it = head;
475 while(it) {
476 it->stats_print_local(os);
477 it = it->next;
478 }
479 }
480
481 static void stats_tls_tally() {
482 global_stats.pick.push.attempt += tls.pick.push.attempt;
483 global_stats.pick.push.success += tls.pick.push.success;
484 global_stats.pick.pop .attempt += tls.pick.pop.attempt;
485 global_stats.pick.pop .success += tls.pick.pop.success;
486 global_stats.pick.pop .mask_attempt += tls.pick.pop.mask_attempt;
487 global_stats.pick.pop .mask_reset += tls.pick.pop.mask_reset;
488
489 global_stats.qstat.push.value += tls.empty.push.value;
490 global_stats.qstat.push.count += tls.empty.push.count;
491 global_stats.qstat.pop .value += tls.empty.pop .value;
492 global_stats.qstat.pop .count += tls.empty.pop .count;
493 }
494
495private:
496 static struct GlobalStats {
497 struct {
498 struct {
499 std::atomic_size_t attempt = { 0 };
500 std::atomic_size_t success = { 0 };
501 } push;
502 struct {
503 std::atomic_size_t attempt = { 0 };
504 std::atomic_size_t success = { 0 };
505 std::atomic_size_t mask_attempt = { 0 };
506 std::atomic_size_t mask_reset = { 0 };
507 } pop;
508 } pick;
509 struct {
510 struct {
511 std::atomic_size_t value = { 0 };
512 std::atomic_size_t count = { 0 };
513 } push;
514 struct {
515 std::atomic_size_t value = { 0 };
516 std::atomic_size_t count = { 0 };
517 } pop;
518 } qstat;
519 } global_stats;
520
521 // Link list of all lists for stats
522 __attribute__((aligned(64))) relaxed_list<node_t> * next = nullptr;
523
524 static relaxed_list<node_t> * head;
525
526 void stats_print_local(std::ostream & os ) {
527 std::cout << "----- Relaxed List Stats -----" << std::endl;
528 {
529 ssize_t diff = 0;
530 size_t num = 0;
531 ssize_t max = 0;
532
533 for(size_t i = 0; i < numLists; i++) {
534 const auto & list = lists[i];
535 diff+= list.s.diff;
536 num ++;
537 max = std::abs(max) > std::abs(list.s.diff) ? max : list.s.diff;
538 os << "Local Q ops : " << (list.s.push + list.s.pop) << "(" << list.s.push << "i, " << list.s.pop << "o)\n";
539 }
540
541 os << "Difference : " << ssize_t(double(diff) / num ) << " avg\t" << max << "max" << std::endl;
542 }
543
544 const auto & global = global_stats;
545
546 double push_sur = (100.0 * double(global.pick.push.success) / global.pick.push.attempt);
547 double pop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .attempt);
548 double mpop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .mask_attempt);
549 double rpop_sur = (100.0 * double(global.pick.pop .mask_reset) / global.pick.pop .mask_attempt);
550
551 os << "Push Pick % : " << push_sur << "(" << global.pick.push.success << " / " << global.pick.push.attempt << ")\n";
552 os << "Pop Pick % : " << pop_sur << "(" << global.pick.pop .success << " / " << global.pick.pop .attempt << ")\n";
553 os << "TryPop Pick % : " << mpop_sur << "(" << global.pick.pop .success << " / " << global.pick.pop .mask_attempt << ")\n";
554 os << "Pop M Reset % : " << rpop_sur << "(" << global.pick.pop .mask_reset << " / " << global.pick.pop .mask_attempt << ")\n";
555
556 double avgQ_push = double(global.qstat.push.value) / global.qstat.push.count;
557 double avgQ_pop = double(global.qstat.pop .value) / global.qstat.pop .count;
558 double avgQ = double(global.qstat.push.value + global.qstat.pop .value) / (global.qstat.push.count + global.qstat.pop .count);
559 os << "Push Avg Qs : " << avgQ_push << " (" << global.qstat.push.count << "ops)\n";
560 os << "Pop Avg Qs : " << avgQ_pop << " (" << global.qstat.pop .count << "ops)\n";
561 os << "Global Avg Qs : " << avgQ << " (" << (global.qstat.push.count + global.qstat.pop .count) << "ops)\n";
562 }
563#endif
564};
Note: See TracBrowser for help on using the repository browser.