source: doc/theses/thierry_delisle_PhD/code/relaxed_list.hpp@ 5569a31

ADT arm-eh ast-experimental enum forall-pointer-decay jacob/cs343-translation new-ast new-ast-unique-expr pthread-emulation qualifiedEnum
Last change on this file since 5569a31 was 807a632, checked in by Thierry Delisle <tdelisle@…>, 6 years ago

Adding current version of the C++ relaxed_list code and benchmark

  • Property mode set to 100644
File size: 13.1 KB
Line 
1#pragma once
2
3#ifndef NO_STATS
4#include <iostream>
5#endif
6
7#include <memory>
8#include <mutex>
9#include <type_traits>
10
11#include "assert.hpp"
12#include "utils.hpp"
13
14using namespace std;
15
16struct spinlock_t {
17 std::atomic_bool ll = { false };
18
19 inline void lock() {
20 while( __builtin_expect(ll.exchange(true),false) ) {
21 while(ll.load(std::memory_order_relaxed))
22 asm volatile("pause");
23 }
24 }
25
26 inline bool try_lock() {
27 return false == ll.exchange(true);
28 }
29
30 inline void unlock() {
31 ll.store(false, std::memory_order_release);
32 }
33
34 inline explicit operator bool() {
35 return ll.load(std::memory_order_relaxed);
36 }
37};
38
39static inline bool bts(std::atomic_size_t & target, size_t bit ) {
40 //*
41 int result = 0;
42 asm volatile(
43 "LOCK btsq %[bit], %[target]\n\t"
44 :"=@ccc" (result)
45 : [target] "m" (target), [bit] "r" (bit)
46 );
47 return result != 0;
48 /*/
49 size_t mask = 1ul << bit;
50 size_t ret = target.fetch_or(mask, std::memory_order_relaxed);
51 return (ret & mask) != 0;
52 //*/
53}
54
55static inline bool btr(std::atomic_size_t & target, size_t bit ) {
56 //*
57 int result = 0;
58 asm volatile(
59 "LOCK btrq %[bit], %[target]\n\t"
60 :"=@ccc" (result)
61 : [target] "m" (target), [bit] "r" (bit)
62 );
63 return result != 0;
64 /*/
65 size_t mask = 1ul << bit;
66 size_t ret = target.fetch_and(~mask, std::memory_order_relaxed);
67 return (ret & mask) != 0;
68 //*/
69}
70
71extern bool enable_stats;
72
73struct pick_stat {
74 struct {
75 size_t attempt = 0;
76 size_t success = 0;
77 } push;
78 struct {
79 size_t attempt = 0;
80 size_t success = 0;
81 size_t mask_attempt = 0;
82 } pop;
83};
84
85struct empty_stat {
86 struct {
87 size_t value = 0;
88 size_t count = 0;
89 } push;
90 struct {
91 size_t value = 0;
92 size_t count = 0;
93 } pop;
94};
95
96template<typename node_t>
97struct _LinksFields_t {
98 node_t * prev = nullptr;
99 node_t * next = nullptr;
100 unsigned long long ts = 0;
101};
102
103template<typename node_t>
104class __attribute__((aligned(128))) relaxed_list {
105 static_assert(std::is_same<decltype(node_t::_links), _LinksFields_t<node_t>>::value, "Node must have a links field");
106
107public:
108 relaxed_list(unsigned numLists)
109 : lists(new intrusive_queue_t[numLists])
110 , numLists(numLists)
111 {
112 assertf(7 * 8 * 8 >= numLists, "List currently only supports 448 sublists");
113 // assert(sizeof(*this) == 128);
114 std::cout << "Constructing Relaxed List with " << numLists << std::endl;
115
116 #ifndef NO_STATS
117 if(head) this->next = head;
118 head = this;
119 #endif
120 }
121
122 ~relaxed_list() {
123 std::cout << "Destroying Relaxed List" << std::endl;
124 lists.reset();
125 }
126
127 __attribute__((noinline, hot)) void push(node_t * node) {
128 node->_links.ts = rdtscl();
129
130 while(true) {
131 // Pick a random list
132 unsigned i = tls.rng.next() % numLists;
133
134 #ifndef NO_STATS
135 tls.pick.push.attempt++;
136 #endif
137
138 // If we can't lock it retry
139 if( !lists[i].lock.try_lock() ) continue;
140
141 __attribute__((unused)) int num = numNonEmpty;
142
143 // Actually push it
144 if(lists[i].push(node)) {
145 numNonEmpty++;
146 size_t qword = i >> 6ull;
147 size_t bit = i & 63ull;
148 assertf((list_mask[qword] & (1ul << bit)) == 0, "Before set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
149 __attribute__((unused)) bool ret = bts(list_mask[qword], bit);
150 assert(!ret);
151 assertf((list_mask[qword] & (1ul << bit)) != 0, "After set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
152 }
153 assert(numNonEmpty <= (int)numLists);
154
155 // Unlock and return
156 lists[i].lock.unlock();
157
158 #ifndef NO_STATS
159 tls.pick.push.success++;
160 tls.empty.push.value += num;
161 tls.empty.push.count += 1;
162 #endif
163 return;
164 }
165 }
166
167 __attribute__((noinline, hot)) node_t * pop() {
168 #if !defined(NO_BITMASK)
169 // for(int r = 0; r < 10 && numNonEmpty != 0; r++) {
170 // // Pick two lists at random
171 // unsigned i = tls.rng.next() % numLists;
172 // unsigned j = tls.rng.next() % numLists;
173
174 // if(auto node = try_pop(i, j)) return node;
175 // }
176 int nnempty;
177 while(0 != (nnempty = numNonEmpty)) {
178 tls.pick.pop.mask_attempt++;
179 unsigned i, j;
180 // if( numLists < 4 || (numLists / nnempty) < 4 ) {
181 // // Pick two lists at random
182 // i = tls.rng.next() % numLists;
183 // j = tls.rng.next() % numLists;
184 // } else
185 {
186 #ifndef NO_STATS
187 // tls.pick.push.mask_attempt++;
188 #endif
189
190 // Pick two lists at random
191 unsigned num = ((numLists - 1) >> 6) + 1;
192
193 unsigned ri = tls.rng.next();
194 unsigned rj = tls.rng.next();
195
196 unsigned wdxi = (ri >> 6u) % num;
197 unsigned wdxj = (rj >> 6u) % num;
198
199 size_t maski = list_mask[wdxi].load(std::memory_order_relaxed);
200 size_t maskj = list_mask[wdxj].load(std::memory_order_relaxed);
201
202 if(maski == 0 && maskj == 0) continue;
203
204 unsigned bi = rand_bit(ri, maski);
205 unsigned bj = rand_bit(rj, maskj);
206
207 assertf(bi < 64, "%zu %u", maski, bi);
208 assertf(bj < 64, "%zu %u", maskj, bj);
209
210 i = bi | (wdxi << 6);
211 j = bj | (wdxj << 6);
212
213 assertf(i < numLists, "%u", wdxi << 6);
214 assertf(j < numLists, "%u", wdxj << 6);
215 }
216
217 if(auto node = try_pop(i, j)) return node;
218 }
219 #else
220 while(numNonEmpty != 0) {
221 // Pick two lists at random
222 int i = tls.rng.next() % numLists;
223 int j = tls.rng.next() % numLists;
224
225 if(auto node = try_pop(i, j)) return node;
226 }
227 #endif
228
229 return nullptr;
230 }
231
232private:
233 node_t * try_pop(unsigned i, unsigned j) {
234 #ifndef NO_STATS
235 tls.pick.pop.attempt++;
236 #endif
237
238 // Pick the bet list
239 int w = i;
240 if( __builtin_expect(lists[j].ts() != 0, true) ) {
241 w = (lists[i].ts() < lists[j].ts()) ? i : j;
242 }
243
244 auto & list = lists[w];
245 // If list looks empty retry
246 if( list.ts() == 0 ) return nullptr;
247
248 // If we can't get the lock retry
249 if( !list.lock.try_lock() ) return nullptr;
250
251 __attribute__((unused)) int num = numNonEmpty;
252
253 // If list is empty, unlock and retry
254 if( list.ts() == 0 ) {
255 list.lock.unlock();
256 return nullptr;
257 }
258
259 // Actually pop the list
260 node_t * node;
261 bool emptied;
262 std::tie(node, emptied) = list.pop();
263 assert(node);
264
265 if(emptied) {
266 numNonEmpty--;
267 size_t qword = w >> 6ull;
268 size_t bit = w & 63ull;
269 assert((list_mask[qword] & (1ul << bit)) != 0);
270 __attribute__((unused)) bool ret = btr(list_mask[qword], bit);
271 assert(ret);
272 assert((list_mask[qword] & (1ul << bit)) == 0);
273 }
274
275 // Unlock and return
276 list.lock.unlock();
277 assert(numNonEmpty >= 0);
278 #ifndef NO_STATS
279 tls.pick.pop.success++;
280 tls.empty.pop.value += num;
281 tls.empty.pop.count += 1;
282 #endif
283 return node;
284 }
285
286private:
287
288 class __attribute__((aligned(128))) intrusive_queue_t {
289 public:
290 typedef spinlock_t lock_t;
291
292 friend class relaxed_list<node_t>;
293
294 struct stat {
295 ssize_t diff = 0;
296 size_t push = 0;
297 size_t pop = 0;
298 // size_t value = 0;
299 // size_t count = 0;
300 };
301
302 private:
303 struct sentinel_t {
304 _LinksFields_t<node_t> _links;
305 };
306
307 lock_t lock;
308 sentinel_t before;
309 sentinel_t after;
310 #ifndef NO_STATS
311 stat s;
312 #endif
313
314#pragma GCC diagnostic push
315#pragma GCC diagnostic ignored "-Winvalid-offsetof"
316 static constexpr auto fields_offset = offsetof( node_t, _links );
317#pragma GCC diagnostic pop
318 public:
319 intrusive_queue_t()
320 : before{{ nullptr, tail() }}
321 , after {{ head(), nullptr }}
322 {
323 /* paranoid */ assert((reinterpret_cast<uintptr_t>( head() ) + fields_offset) == reinterpret_cast<uintptr_t>(&before));
324 /* paranoid */ assert((reinterpret_cast<uintptr_t>( tail() ) + fields_offset) == reinterpret_cast<uintptr_t>(&after ));
325 /* paranoid */ assert(head()->_links.prev == nullptr);
326 /* paranoid */ assert(head()->_links.next == tail() );
327 /* paranoid */ assert(tail()->_links.next == nullptr);
328 /* paranoid */ assert(tail()->_links.prev == head() );
329 /* paranoid */ assert(sizeof(*this) == 128);
330 /* paranoid */ assert((intptr_t(this) % 128) == 0);
331 }
332
333 ~intrusive_queue_t() = default;
334
335 inline node_t * head() const {
336 node_t * rhead = reinterpret_cast<node_t *>(
337 reinterpret_cast<uintptr_t>( &before ) - fields_offset
338 );
339 assert(rhead);
340 return rhead;
341 }
342
343 inline node_t * tail() const {
344 node_t * rtail = reinterpret_cast<node_t *>(
345 reinterpret_cast<uintptr_t>( &after ) - fields_offset
346 );
347 assert(rtail);
348 return rtail;
349 }
350
351 inline bool push(node_t * node) {
352 assert(lock);
353 assert(node->_links.ts != 0);
354 node_t * tail = this->tail();
355
356 node_t * prev = tail->_links.prev;
357 // assertf(node->_links.ts >= prev->_links.ts,
358 // "New node has smaller timestamp: %llu < %llu", node->_links.ts, prev->_links.ts);
359 node->_links.next = tail;
360 node->_links.prev = prev;
361 prev->_links.next = node;
362 tail->_links.prev = node;
363 #ifndef NO_STATS
364 if(enable_stats) {
365 s.diff++;
366 s.push++;
367 }
368 #endif
369 if(before._links.ts == 0l) {
370 before._links.ts = node->_links.ts;
371 assert(node->_links.prev == this->head());
372 return true;
373 }
374 return false;
375 }
376
377 inline std::pair<node_t *, bool> pop() {
378 assert(lock);
379 node_t * head = this->head();
380 node_t * tail = this->tail();
381
382 node_t * node = head->_links.next;
383 node_t * next = node->_links.next;
384 if(node == tail) return {nullptr, false};
385
386 head->_links.next = next;
387 next->_links.prev = head;
388
389 #ifndef NO_STATS
390 if(enable_stats) {
391 s.diff--;
392 s.pop ++;
393 }
394 #endif
395 if(next == tail) {
396 before._links.ts = 0l;
397 return {node, true};
398 }
399 else {
400 assert(next->_links.ts != 0);
401 before._links.ts = next->_links.ts;
402 assert(before._links.ts != 0);
403 return {node, false};
404 }
405 }
406
407 long long ts() const {
408 return before._links.ts;
409 }
410 };
411
412
413public:
414
415 static __attribute__((aligned(128))) thread_local struct TLS {
416 Random rng = { int(rdtscl()) };
417 pick_stat pick;
418 empty_stat empty;
419 } tls;
420
421public:
422 std::atomic_int numNonEmpty = { 0 }; // number of non-empty lists
423 std::atomic_size_t list_mask[7] = { {0}, {0}, {0}, {0}, {0}, {0}, {0} }; // which queues are empty
424private:
425 __attribute__((aligned(64))) std::unique_ptr<intrusive_queue_t []> lists;
426 const unsigned numLists;
427
428public:
429 static const constexpr size_t sizeof_queue = sizeof(intrusive_queue_t);
430
431#ifndef NO_STATS
432 static void stats_print(std::ostream & os) {
433 auto it = head;
434 while(it) {
435 it->stats_print_local(os);
436 it = it->next;
437 }
438 }
439
440 static void stats_tls_tally() {
441 global_stats.pick.push.attempt += tls.pick.push.attempt;
442 global_stats.pick.push.success += tls.pick.push.success;
443 global_stats.pick.pop .attempt += tls.pick.pop.attempt;
444 global_stats.pick.pop .success += tls.pick.pop.success;
445 global_stats.pick.pop .mask_attempt += tls.pick.pop.mask_attempt;
446
447 global_stats.qstat.push.value += tls.empty.push.value;
448 global_stats.qstat.push.count += tls.empty.push.count;
449 global_stats.qstat.pop .value += tls.empty.pop .value;
450 global_stats.qstat.pop .count += tls.empty.pop .count;
451 }
452
453private:
454 static struct GlobalStats {
455 struct {
456 struct {
457 std::atomic_size_t attempt = { 0 };
458 std::atomic_size_t success = { 0 };
459 } push;
460 struct {
461 std::atomic_size_t attempt = { 0 };
462 std::atomic_size_t success = { 0 };
463 std::atomic_size_t mask_attempt = { 0 };
464 } pop;
465 } pick;
466 struct {
467 struct {
468 std::atomic_size_t value = { 0 };
469 std::atomic_size_t count = { 0 };
470 } push;
471 struct {
472 std::atomic_size_t value = { 0 };
473 std::atomic_size_t count = { 0 };
474 } pop;
475 } qstat;
476 } global_stats;
477
478 // Link list of all lists for stats
479 __attribute__((aligned(64))) relaxed_list<node_t> * next = nullptr;
480
481 static relaxed_list<node_t> * head;
482
483 void stats_print_local(std::ostream & os ) {
484 std::cout << "----- Relaxed List Stats -----" << std::endl;
485 {
486 ssize_t diff = 0;
487 size_t num = 0;
488 ssize_t max = 0;
489
490 for(size_t i = 0; i < numLists; i++) {
491 const auto & list = lists[i];
492 diff+= list.s.diff;
493 num ++;
494 max = std::abs(max) > std::abs(list.s.diff) ? max : list.s.diff;
495 os << "Local Q ops : " << (list.s.push + list.s.pop) << "(" << list.s.push << "i, " << list.s.pop << "o)\n";
496 }
497
498 os << "Difference : " << ssize_t(double(diff) / num ) << " avg\t" << max << "max" << std::endl;
499 }
500
501 const auto & global = global_stats;
502
503 double push_sur = (100.0 * double(global.pick.push.success) / global.pick.push.attempt);
504 double pop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .attempt);
505 double mpop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .mask_attempt);
506
507 os << "Push Pick % : " << push_sur << "(" << global.pick.push.success << " / " << global.pick.push.attempt << ")\n";
508 os << "Pop Pick % : " << pop_sur << "(" << global.pick.pop .success << " / " << global.pick.pop .attempt << ")\n";
509 os << "TryPop Pick % : " << mpop_sur << "(" << global.pick.pop .success << " / " << global.pick.pop .mask_attempt << ")\n";
510
511 double avgQ_push = double(global.qstat.push.value) / global.qstat.push.count;
512 double avgQ_pop = double(global.qstat.pop .value) / global.qstat.pop .count;
513 double avgQ = double(global.qstat.push.value + global.qstat.pop .value) / (global.qstat.push.count + global.qstat.pop .count);
514 os << "Push Avg Qs : " << avgQ_push << " (" << global.qstat.push.count << "ops)\n";
515 os << "Pop Avg Qs : " << avgQ_pop << " (" << global.qstat.pop .count << "ops)\n";
516 os << "Global Avg Qs : " << avgQ << " (" << (global.qstat.push.count + global.qstat.pop .count) << "ops)\n";
517 }
518#endif
519};
Note: See TracBrowser for help on using the repository browser.