source: doc/theses/thierry_delisle_PhD/code/work_stealing.hpp@ 433d352

ADT arm-eh ast-experimental enum forall-pointer-decay jacob/cs343-translation new-ast-unique-expr pthread-emulation qualifiedEnum
Last change on this file since 433d352 was b232745, checked in by Thierry Delisle <tdelisle@…>, 5 years ago

Several changes to relaxed list prototype and added workstealing for comparison

  • Property mode set to 100644
File size: 5.1 KB
Line 
1#pragma once
2#define LIST_VARIANT work_stealing
3
4#include <cmath>
5#include <iomanip>
6#include <memory>
7#include <mutex>
8#include <type_traits>
9
10#include "assert.hpp"
11#include "utils.hpp"
12#include "links.hpp"
13#include "snzi.hpp"
14
15using namespace std;
16
17template<typename node_t>
18class __attribute__((aligned(128))) work_stealing {
19 static_assert(std::is_same<decltype(node_t::_links), _LinksFields_t<node_t>>::value, "Node must have a links field");
20
21public:
22 static const char * name() {
23 return "Work Stealing";
24 }
25
26 work_stealing(unsigned _numThreads, unsigned)
27 : numThreads(_numThreads)
28 , lists(new intrusive_queue_t<node_t>[numThreads])
29 , snzi( std::log2( numThreads / 2 ), 2 )
30
31 {
32 std::cout << "Constructing Work Stealer with " << numThreads << std::endl;
33 }
34
35 ~work_stealing() {
36 std::cout << "Destroying Work Stealer" << std::endl;
37 lists.reset();
38 }
39
40 __attribute__((noinline, hot)) void push(node_t * node) {
41 node->_links.ts = rdtscl();
42 if( node->_links.hint > numThreads ) {
43 node->_links.hint = tls.rng.next() % numThreads;
44 tls.stat.push.nhint++;
45 }
46
47 unsigned i = node->_links.hint;
48 auto & list = lists[i];
49 list.lock.lock();
50
51 if(list.push( node )) {
52 snzi.arrive(i);
53 }
54
55 list.lock.unlock();
56 }
57
58 __attribute__((noinline, hot)) node_t * pop() {
59 node_t * node;
60 while(true) {
61 if(!snzi.query()) {
62 return nullptr;
63 }
64
65 {
66 unsigned i = tls.my_queue;
67 auto & list = lists[i];
68 if( list.ts() != 0 ) {
69 list.lock.lock();
70 if((node = try_pop(i))) {
71 tls.stat.pop.local.success++;
72 break;
73 }
74 else {
75 tls.stat.pop.local.elock++;
76 }
77 }
78 else {
79 tls.stat.pop.local.espec++;
80 }
81 }
82
83 tls.stat.pop.steal.tried++;
84
85 int i = tls.rng.next() % numThreads;
86 auto & list = lists[i];
87 if( list.ts() == 0 ) {
88 tls.stat.pop.steal.empty++;
89 continue;
90 }
91
92 if( !list.lock.try_lock() ) {
93 tls.stat.pop.steal.locked++;
94 continue;
95 }
96
97 if((node = try_pop(i))) {
98 tls.stat.pop.steal.success++;
99 break;
100 }
101 }
102
103 #if defined(READ)
104 const unsigned f = READ;
105 if(0 == (tls.it % f)) {
106 unsigned i = tls.it / f;
107 lists[i % numThreads].ts();
108 }
109 // lists[tls.it].ts();
110 tls.it++;
111 #endif
112
113
114 return node;
115 }
116
117private:
118 node_t * try_pop(unsigned i) {
119 auto & list = lists[i];
120
121 // If list is empty, unlock and retry
122 if( list.ts() == 0 ) {
123 list.lock.unlock();
124 return nullptr;
125 }
126
127 // Actually pop the list
128 node_t * node;
129 bool emptied;
130 std::tie(node, emptied) = list.pop();
131 assert(node);
132
133 if(emptied) {
134 snzi.depart(i);
135 }
136
137 // Unlock and return
138 list.lock.unlock();
139 return node;
140 }
141
142
143public:
144
145 static std::atomic_uint32_t ticket;
146 static __attribute__((aligned(128))) thread_local struct TLS {
147 Random rng = { int(rdtscl()) };
148 unsigned my_queue = ticket++;
149 #if defined(READ)
150 unsigned it = 0;
151 #endif
152 struct {
153 struct {
154 std::size_t nhint = { 0 };
155 } push;
156 struct {
157 struct {
158 std::size_t success = { 0 };
159 std::size_t espec = { 0 };
160 std::size_t elock = { 0 };
161 } local;
162 struct {
163 std::size_t tried = { 0 };
164 std::size_t locked = { 0 };
165 std::size_t empty = { 0 };
166 std::size_t success = { 0 };
167 } steal;
168 } pop;
169 } stat;
170 } tls;
171
172private:
173 const unsigned numThreads;
174 std::unique_ptr<intrusive_queue_t<node_t> []> lists;
175 __attribute__((aligned(64))) snzi_t snzi;
176
177#ifndef NO_STATS
178private:
179 static struct GlobalStats {
180 struct {
181 std::atomic_size_t nhint = { 0 };
182 } push;
183 struct {
184 struct {
185 std::atomic_size_t success = { 0 };
186 std::atomic_size_t espec = { 0 };
187 std::atomic_size_t elock = { 0 };
188 } local;
189 struct {
190 std::atomic_size_t tried = { 0 };
191 std::atomic_size_t locked = { 0 };
192 std::atomic_size_t empty = { 0 };
193 std::atomic_size_t success = { 0 };
194 } steal;
195 } pop;
196 } global_stats;
197
198public:
199 static void stats_tls_tally() {
200 global_stats.push.nhint += tls.stat.push.nhint;
201 global_stats.pop.local.success += tls.stat.pop.local.success;
202 global_stats.pop.local.espec += tls.stat.pop.local.espec ;
203 global_stats.pop.local.elock += tls.stat.pop.local.elock ;
204 global_stats.pop.steal.tried += tls.stat.pop.steal.tried ;
205 global_stats.pop.steal.locked += tls.stat.pop.steal.locked ;
206 global_stats.pop.steal.empty += tls.stat.pop.steal.empty ;
207 global_stats.pop.steal.success += tls.stat.pop.steal.success;
208 }
209
210 static void stats_print(std::ostream & os ) {
211 std::cout << "----- Work Stealing Stats -----" << std::endl;
212
213 double stealSucc = double(global_stats.pop.steal.success) / global_stats.pop.steal.tried;
214 os << "Push to new Q : " << std::setw(15) << global_stats.push.nhint << "\n";
215 os << "Local Pop : " << std::setw(15) << global_stats.pop.local.success << "\n";
216 os << "Steal Pop : " << std::setw(15) << global_stats.pop.steal.success << "(" << global_stats.pop.local.espec << "s, " << global_stats.pop.local.elock << "l)\n";
217 os << "Steal Success : " << std::setw(15) << stealSucc << "(" << global_stats.pop.steal.tried << " tries)\n";
218 os << "Steal Fails : " << std::setw(15) << global_stats.pop.steal.empty << "e, " << global_stats.pop.steal.locked << "l\n";
219 }
220private:
221#endif
222};
Note: See TracBrowser for help on using the repository browser.