1 |
|
---|
2 | Scheduling Multithreaded Computations by Work Stealing
|
---|
3 |
|
---|
4 | % ===============================================================================
|
---|
5 | % Work stealing examples
|
---|
6 | % ===============================================================================
|
---|
7 |
|
---|
8 | % Cilk [4],
|
---|
9 | @article{frigo1998implementation,
|
---|
10 | title={The implementation of the Cilk-5 multithreaded language},
|
---|
11 | author={Frigo, Matteo and Leiserson, Charles E and Randall, Keith H},
|
---|
12 | journal={ACM Sigplan Notices},
|
---|
13 | volume={33},
|
---|
14 | number={5},
|
---|
15 | pages={212--223},
|
---|
16 | year={1998},
|
---|
17 | publisher={ACM}
|
---|
18 | }
|
---|
19 |
|
---|
20 | % Cilk-NOW
|
---|
21 | @inproceedings{blumofe1997adaptive,
|
---|
22 | title={Adaptive and reliable parallel computing on networks of workstations},
|
---|
23 | author={Blumofe, Robert D and Lisiecki, Philip A and others},
|
---|
24 | booktitle={USENIX 1997 Annual Technical Conference on UNIX and Advanced Computing Systems},
|
---|
25 | pages={133--147},
|
---|
26 | year={1997}
|
---|
27 | }
|
---|
28 |
|
---|
29 | % Intel Threading Building Blocks (TBB)
|
---|
30 | @article{intelTBB,
|
---|
31 | title={Intel R Threading Building Blocks Documentation. Sept. 2013},
|
---|
32 | author={Intel, R},
|
---|
33 | journal={URL: https://www.threadingbuildingblocks.org/docs/help/index.htm}
|
---|
34 | }
|
---|
35 |
|
---|
36 | % Java's Fork-join Framework
|
---|
37 | @inproceedings{lea2000java,
|
---|
38 | title={A Java fork/join framework},
|
---|
39 | author={Lea, Doug},
|
---|
40 | booktitle={Proceedings of the ACM 2000 conference on Java Grande},
|
---|
41 | pages={36--43},
|
---|
42 | year={2000},
|
---|
43 | organization={ACM}
|
---|
44 | }
|
---|
45 |
|
---|
46 | % Microsoft Task Parallel Library
|
---|
47 | @article{leijen2009design,
|
---|
48 | title={The design of a task parallel library},
|
---|
49 | author={Leijen, Daan and Schulte, Wolfram and Burckhardt, Sebastian},
|
---|
50 | journal={Acm Sigplan Notices},
|
---|
51 | volume={44},
|
---|
52 | number={10},
|
---|
53 | pages={227--242},
|
---|
54 | year={2009},
|
---|
55 | publisher={ACM}
|
---|
56 | }
|
---|
57 |
|
---|
58 | % StackThreads/MP
|
---|
59 | @book{taura1999stackthreads,
|
---|
60 | title={Stackthreads/mp: integrating futures into calling standards},
|
---|
61 | author={Taura, Kenjiro and Tabata, Kunio and Yonezawa, Akinori},
|
---|
62 | volume={34},
|
---|
63 | number={8},
|
---|
64 | year={1999},
|
---|
65 | publisher={ACM}
|
---|
66 | }
|
---|
67 |
|
---|
68 | @article{feldmann1993game,
|
---|
69 | title={Game tree search on massively parallel systems},
|
---|
70 | author={Feldmann, Rainer and Mysliwietz, P and Monien, B},
|
---|
71 | journal={Advances in Computer Chess},
|
---|
72 | volume={7},
|
---|
73 | year={1993},
|
---|
74 | publisher={Citeseer}
|
---|
75 | }
|
---|
76 |
|
---|
77 | @article{finkel1987dib,
|
---|
78 | title={DIB-a distributed implementation of backtracking},
|
---|
79 | author={Finkel, Raphael and Manber, Udi},
|
---|
80 | journal={ACM Transactions on Programming Languages and Systems (TOPLAS)},
|
---|
81 | volume={9},
|
---|
82 | number={2},
|
---|
83 | pages={235--256},
|
---|
84 | year={1987},
|
---|
85 | publisher={ACM}
|
---|
86 | }
|
---|
87 |
|
---|
88 | @phdthesis{halbherr1994mimd,
|
---|
89 | title={MIMD-style parallel programming based on continuation-passing threads},
|
---|
90 | author={Halbherr, Michael Roland Sven},
|
---|
91 | year={1994},
|
---|
92 | school={ETH Zurich}
|
---|
93 | }
|
---|
94 |
|
---|
95 | @phdthesis{kuszmaul1994synchronized,
|
---|
96 | title={Synchronized MIMD computing},
|
---|
97 | author={Kuszmaul, Bradley Clair},
|
---|
98 | year={1994},
|
---|
99 | school={Massachusetts Institute of Technology}
|
---|
100 | }
|
---|
101 |
|
---|
102 | @article{mohr1991lazy,
|
---|
103 | title={Lazy task creation: A technique for increasing the granularity of parallel programs},
|
---|
104 | author={Mohr, Eric and Kranz, David A and Halstead, Robert H},
|
---|
105 | journal={IEEE transactions on Parallel and Distributed Systems},
|
---|
106 | volume={2},
|
---|
107 | number={3},
|
---|
108 | pages={264--280},
|
---|
109 | year={1991},
|
---|
110 | publisher={IEEE}
|
---|
111 | }
|
---|
112 |
|
---|
113 | @article{vandevoorde1988workcrews,
|
---|
114 | title={WorkCrews: An abstraction for controlling parallelism},
|
---|
115 | author={Vandevoorde, Mark T and Roberts, Eric S},
|
---|
116 | journal={International Journal of Parallel Programming},
|
---|
117 | volume={17},
|
---|
118 | number={4},
|
---|
119 | pages={347--366},
|
---|
120 | year={1988},
|
---|
121 | publisher={Springer}
|
---|
122 | }
|
---|
123 |
|
---|
124 |
|
---|
125 | % ===============================================================================
|
---|
126 | % Work stealing examples
|
---|
127 | % ===============================================================================
|
---|
128 |
|
---|
129 | %----------
|
---|
130 | % discussion about best case and worst case for full computation
|
---|
131 | % cited by 1720
|
---|
132 | @article{blumofe1999scheduling,
|
---|
133 | title={Scheduling multithreaded computations by work stealing},
|
---|
134 | author={Blumofe, Robert D and Leiserson, Charles E},
|
---|
135 | journal={Journal of the ACM (JACM)},
|
---|
136 | volume={46},
|
---|
137 | number={5},
|
---|
138 | pages={720--748},
|
---|
139 | year={1999},
|
---|
140 | publisher={ACM}
|
---|
141 | }
|
---|
142 |
|
---|
143 | %----------
|
---|
144 | % executing process trees in parallel
|
---|
145 | % mostly used with lazy evaluation and dependency graphs
|
---|
146 | % cited by 267
|
---|
147 | % cited by blumofe1999scheduling as first 'work-stealing idea'
|
---|
148 | @inproceedings{burton1981executing,
|
---|
149 | title={Executing functional programs on a virtual tree of processors},
|
---|
150 | author={Burton, F Warren and Sleep, M Ronan},
|
---|
151 | booktitle={Proceedings of the 1981 conference on Functional programming languages and computer architecture},
|
---|
152 | pages={187--194},
|
---|
153 | year={1981},
|
---|
154 | organization={ACM}
|
---|
155 | }
|
---|
156 |
|
---|
157 |
|
---|
158 | %----------
|
---|
159 | % work-stealing used for multilisp.
|
---|
160 | % "unfair scheduling policy"
|
---|
161 | % cited by 285
|
---|
162 | % cited by blumofe1999scheduling as first 'work-stealing idea' implementation
|
---|
163 | @inproceedings{halstead1984implementation,
|
---|
164 | title={Implementation of Multilisp: Lisp on a multiprocessor},
|
---|
165 | author={Halstead Jr, Robert H},
|
---|
166 | booktitle={Proceedings of the 1984 ACM Symposium on LISP and functional programming},
|
---|
167 | pages={9--17},
|
---|
168 | year={1984},
|
---|
169 | organization={ACM}
|
---|
170 | }
|
---|
171 |
|
---|
172 |
|
---|
173 | % ===============================================================================
|
---|
174 | % HPC schedulers with bounds
|
---|
175 | % ===============================================================================
|
---|
176 |
|
---|
177 | %----------
|
---|
178 | % discussion about bounds
|
---|
179 | % cited by blumofe1999scheduling
|
---|
180 | % mentionned as not practical
|
---|
181 | % CAN'T find it
|
---|
182 | @article{burton1996guaranteeing,
|
---|
183 | title={Guaranteeing good memory bounds for parallel programs},
|
---|
184 | author={Burton, F Warren},
|
---|
185 | journal={IEEE Transactions on software engineering},
|
---|
186 | number={10},
|
---|
187 | pages={762--773},
|
---|
188 | year={1996},
|
---|
189 | publisher={IEEE}
|
---|
190 | }
|
---|
191 |
|
---|
192 | %----------
|
---|
193 | % discussion about bounds
|
---|
194 | % cited by blumofe1999scheduling
|
---|
195 | % mentionned as not practical
|
---|
196 | % - from abstract
|
---|
197 | % talks about implicit concurrency
|
---|
198 | % does not work online or for something other than HPC
|
---|
199 | @inproceedings{blelloch1995provably,
|
---|
200 | title={Provably efficient scheduling for languages with fine-grained parallelism},
|
---|
201 | author={Blelloch, Guy E and Gibbons, Phillip B and Matias, Yossi},
|
---|
202 | booktitle={Proceedings of the seventh annual ACM symposium on Parallel algorithms and architectures},
|
---|
203 | pages={1--12},
|
---|
204 | year={1995},
|
---|
205 | organization={ACM}
|
---|
206 | }
|
---|
207 |
|
---|
208 | %----------
|
---|
209 | % discussion about bounds
|
---|
210 | % cited by blumofe1999scheduling
|
---|
211 | % mentionned as not practical
|
---|
212 | % - from abstract
|
---|
213 | % talks about bound on total time and space
|
---|
214 | % no concept of fairness or latency among tasks
|
---|
215 | @inproceedings{blelloch1997space,
|
---|
216 | title={Space-efficient scheduling of parallelism with synchronization variables},
|
---|
217 | author={Blelloch, Guy E and Gibbons, Phillip B and Matias, Yossi and Narlikar, Girija J},
|
---|
218 | booktitle={Proceedings of the ninth annual ACM symposium on Parallel algorithms and architectures},
|
---|
219 | pages={12--23},
|
---|
220 | year={1997},
|
---|
221 | organization={ACM}
|
---|
222 | }
|
---|
223 |
|
---|
224 | % ===============================================================================
|
---|
225 | % Algorithms
|
---|
226 | % ===============================================================================
|
---|
227 | @article{michael2004hazard,
|
---|
228 | title={Hazard pointers: Safe memory reclamation for lock-free objects},
|
---|
229 | author={Michael, Maged M},
|
---|
230 | journal={IEEE Transactions on Parallel and Distributed Systems},
|
---|
231 | volume={15},
|
---|
232 | number={6},
|
---|
233 | pages={491--504},
|
---|
234 | year={2004},
|
---|
235 | publisher={IEEE}
|
---|
236 | }
|
---|
237 |
|
---|
238 | @inproceedings{brown2015reclaiming,
|
---|
239 | title={Reclaiming memory for lock-free data structures: There has to be a better way},
|
---|
240 | author={Brown, Trevor Alexander},
|
---|
241 | booktitle={Proceedings of the 2015 ACM Symposium on Principles of Distributed Computing},
|
---|
242 | pages={261--270},
|
---|
243 | year={2015}
|
---|
244 | }
|
---|
245 |
|
---|
246 | % Trevor's relaxed FIFO list
|
---|
247 | @inproceedings{alistarh2018relaxed,
|
---|
248 | title={Relaxed schedulers can efficiently parallelize iterative algorithms},
|
---|
249 | author={Alistarh, Dan and Brown, Trevor and Kopinsky, Justin and Nadiradze, Giorgi},
|
---|
250 | booktitle={Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing},
|
---|
251 | pages={377--386},
|
---|
252 | year={2018}
|
---|
253 | }
|
---|
254 |
|
---|
255 | % Scalable counters which only support is !0
|
---|
256 | @inproceedings{ellen2007snzi,
|
---|
257 | title={SNZI: Scalable nonzero indicators},
|
---|
258 | author={Ellen, Faith and Lev, Yossi and Luchangco, Victor and Moir, Mark},
|
---|
259 | booktitle={Proceedings of the twenty-sixth annual ACM symposium on Principles of distributed computing},
|
---|
260 | pages={13--22},
|
---|
261 | year={2007}
|
---|
262 | }
|
---|
263 |
|
---|
264 | % ===============================================================================
|
---|
265 | % Linux Man Pages
|
---|
266 | % ===============================================================================
|
---|
267 | @manual{open,
|
---|
268 | key = "open",
|
---|
269 | title = "open(2) Linux User's Manual",
|
---|
270 | year = "2020",
|
---|
271 | month = "February",
|
---|
272 | }
|
---|
273 |
|
---|
274 | @manual{epoll,
|
---|
275 | key = "epoll",
|
---|
276 | title = "epoll(7) Linux User's Manual",
|
---|
277 | year = "2019",
|
---|
278 | month = "March",
|
---|
279 | }
|
---|
280 |
|
---|
281 | @manual{select,
|
---|
282 | key = "select",
|
---|
283 | title = "select(7) Linux User's Manual",
|
---|
284 | year = "2019",
|
---|
285 | month = "March",
|
---|
286 | }
|
---|
287 |
|
---|
288 | @misc{io_uring,
|
---|
289 | title = {Efficient IO with io\_uring},
|
---|
290 | author = {Axboe, Jens},
|
---|
291 | year = "2019",
|
---|
292 | month = "March",
|
---|
293 | version = {0,4},
|
---|
294 | howpublished = {\url{https://kernel.dk/io_uring.pdf}}
|
---|
295 | }
|
---|
296 |
|
---|
297 | @misc{libuv,
|
---|
298 | key = "libuv",
|
---|
299 | title = {libuv},
|
---|
300 | howpublished = {\url{https://github.com/libuv/libuv}}
|
---|
301 | }
|
---|
302 |
|
---|
303 | % ===============================================================================
|
---|
304 | % MISC
|
---|
305 | % ===============================================================================
|
---|
306 |
|
---|
307 | @misc{nginx-design,
|
---|
308 | key = "nginx",
|
---|
309 | title={Inside {NGINX}: How We Designed for Performance \& Scale},
|
---|
310 | howpublished= {\href{https://www.nginx.com/blog/inside-nginx-how-we-designed-for-performance-scale}
|
---|
311 | {https://\-www.nginx.com/\-blog/\-inside\--nginx\--how\--we\--designed\--for\--performance\--scale}},
|
---|
312 | }
|
---|
313 |
|
---|
314 | @article{schillings1996engineering,
|
---|
315 | title={Be engineering insights: Benaphores},
|
---|
316 | author={Schillings, Benoit},
|
---|
317 | journal={Be Newsletters},
|
---|
318 | volume={1},
|
---|
319 | number={26},
|
---|
320 | year={1996}
|
---|
321 | }
|
---|
322 |
|
---|
323 | @misc{wiki:thunderherd,
|
---|
324 | author = "{Wikipedia contributors}",
|
---|
325 | title = "Thundering herd problem --- {W}ikipedia{,} The Free Encyclopedia",
|
---|
326 | year = "2020",
|
---|
327 | howpublished = {\href{https://en.wikipedia.org/wiki/Thundering_herd_problem}
|
---|
328 | {https://\-en.wikipedia.org/\-wiki/\-Thundering\_herd\_problem}},},
|
---|
329 | note = "[Online; accessed 14-April-2020]"
|
---|
330 | }
|
---|
331 |
|
---|
332 | @misc{wiki:ubuntu-linux,
|
---|
333 | author = "{Wikipedia contributors}",
|
---|
334 | title = "Ubuntu version history : Table of versions --- {W}ikipedia{,} The Free Encyclopedia",
|
---|
335 | year = "2020",
|
---|
336 | howpublished = {\href{https://en.wikipedia.org/wiki/Ubuntu_version_history\#Table_of_versions}
|
---|
337 | {https://\-en.wikipedia.org/\-wiki/\-Ubuntu\_version\_history\#Table\_of\_versions}},
|
---|
338 | note = "[Online; accessed 15-April-2020]"
|
---|
339 | }
|
---|