| 1 |
|
|---|
| 2 | Scheduling Multithreaded Computations by Work Stealing
|
|---|
| 3 |
|
|---|
| 4 | % ===============================================================================
|
|---|
| 5 | % Work stealing examples
|
|---|
| 6 | % ===============================================================================
|
|---|
| 7 |
|
|---|
| 8 | % Cilk [4],
|
|---|
| 9 | @article{frigo1998implementation,
|
|---|
| 10 | title={The implementation of the Cilk-5 multithreaded language},
|
|---|
| 11 | author={Frigo, Matteo and Leiserson, Charles E and Randall, Keith H},
|
|---|
| 12 | journal={ACM Sigplan Notices},
|
|---|
| 13 | volume={33},
|
|---|
| 14 | number={5},
|
|---|
| 15 | pages={212--223},
|
|---|
| 16 | year={1998},
|
|---|
| 17 | publisher={ACM}
|
|---|
| 18 | }
|
|---|
| 19 |
|
|---|
| 20 | % Cilk-NOW
|
|---|
| 21 | @inproceedings{blumofe1997adaptive,
|
|---|
| 22 | title={Adaptive and reliable parallel computing on networks of workstations},
|
|---|
| 23 | author={Blumofe, Robert D and Lisiecki, Philip A and others},
|
|---|
| 24 | booktitle={USENIX 1997 Annual Technical Conference on UNIX and Advanced Computing Systems},
|
|---|
| 25 | pages={133--147},
|
|---|
| 26 | year={1997}
|
|---|
| 27 | }
|
|---|
| 28 |
|
|---|
| 29 | % Intel Threading Building Blocks (TBB)
|
|---|
| 30 | @article{intelTBB,
|
|---|
| 31 | title={Intel R Threading Building Blocks Documentation. Sept. 2013},
|
|---|
| 32 | author={Intel, R},
|
|---|
| 33 | journal={URL: https://www.threadingbuildingblocks.org/docs/help/index.htm}
|
|---|
| 34 | }
|
|---|
| 35 |
|
|---|
| 36 | % Java's Fork-join Framework
|
|---|
| 37 | @inproceedings{lea2000java,
|
|---|
| 38 | title={A Java fork/join framework},
|
|---|
| 39 | author={Lea, Doug},
|
|---|
| 40 | booktitle={Proceedings of the ACM 2000 conference on Java Grande},
|
|---|
| 41 | pages={36--43},
|
|---|
| 42 | year={2000},
|
|---|
| 43 | organization={ACM}
|
|---|
| 44 | }
|
|---|
| 45 |
|
|---|
| 46 | % Microsoft Task Parallel Library
|
|---|
| 47 | @article{leijen2009design,
|
|---|
| 48 | title={The design of a task parallel library},
|
|---|
| 49 | author={Leijen, Daan and Schulte, Wolfram and Burckhardt, Sebastian},
|
|---|
| 50 | journal={Acm Sigplan Notices},
|
|---|
| 51 | volume={44},
|
|---|
| 52 | number={10},
|
|---|
| 53 | pages={227--242},
|
|---|
| 54 | year={2009},
|
|---|
| 55 | publisher={ACM}
|
|---|
| 56 | }
|
|---|
| 57 |
|
|---|
| 58 | % StackThreads/MP
|
|---|
| 59 | @book{taura1999stackthreads,
|
|---|
| 60 | title={Stackthreads/mp: integrating futures into calling standards},
|
|---|
| 61 | author={Taura, Kenjiro and Tabata, Kunio and Yonezawa, Akinori},
|
|---|
| 62 | volume={34},
|
|---|
| 63 | number={8},
|
|---|
| 64 | year={1999},
|
|---|
| 65 | publisher={ACM}
|
|---|
| 66 | }
|
|---|
| 67 |
|
|---|
| 68 | @article{feldmann1993game,
|
|---|
| 69 | title={Game tree search on massively parallel systems},
|
|---|
| 70 | author={Feldmann, Rainer and Mysliwietz, P and Monien, B},
|
|---|
| 71 | journal={Advances in Computer Chess},
|
|---|
| 72 | volume={7},
|
|---|
| 73 | year={1993},
|
|---|
| 74 | publisher={Citeseer}
|
|---|
| 75 | }
|
|---|
| 76 |
|
|---|
| 77 | @article{finkel1987dib,
|
|---|
| 78 | title={DIB—a distributed implementation of backtracking},
|
|---|
| 79 | author={Finkel, Raphael and Manber, Udi},
|
|---|
| 80 | journal={ACM Transactions on Programming Languages and Systems (TOPLAS)},
|
|---|
| 81 | volume={9},
|
|---|
| 82 | number={2},
|
|---|
| 83 | pages={235--256},
|
|---|
| 84 | year={1987},
|
|---|
| 85 | publisher={ACM}
|
|---|
| 86 | }
|
|---|
| 87 |
|
|---|
| 88 | @phdthesis{halbherr1994mimd,
|
|---|
| 89 | title={MIMD-style parallel programming based on continuation-passing threads},
|
|---|
| 90 | author={Halbherr, Michael Roland Sven},
|
|---|
| 91 | year={1994},
|
|---|
| 92 | school={ETH Zurich}
|
|---|
| 93 | }
|
|---|
| 94 |
|
|---|
| 95 | @phdthesis{kuszmaul1994synchronized,
|
|---|
| 96 | title={Synchronized MIMD computing},
|
|---|
| 97 | author={Kuszmaul, Bradley Clair},
|
|---|
| 98 | year={1994},
|
|---|
| 99 | school={Massachusetts Institute of Technology}
|
|---|
| 100 | }
|
|---|
| 101 |
|
|---|
| 102 | @article{mohr1991lazy,
|
|---|
| 103 | title={Lazy task creation: A technique for increasing the granularity of parallel programs},
|
|---|
| 104 | author={Mohr, Eric and Kranz, David A and Halstead, Robert H},
|
|---|
| 105 | journal={IEEE transactions on Parallel and Distributed Systems},
|
|---|
| 106 | volume={2},
|
|---|
| 107 | number={3},
|
|---|
| 108 | pages={264--280},
|
|---|
| 109 | year={1991},
|
|---|
| 110 | publisher={IEEE}
|
|---|
| 111 | }
|
|---|
| 112 |
|
|---|
| 113 | @article{vandevoorde1988workcrews,
|
|---|
| 114 | title={WorkCrews: An abstraction for controlling parallelism},
|
|---|
| 115 | author={Vandevoorde, Mark T and Roberts, Eric S},
|
|---|
| 116 | journal={International Journal of Parallel Programming},
|
|---|
| 117 | volume={17},
|
|---|
| 118 | number={4},
|
|---|
| 119 | pages={347--366},
|
|---|
| 120 | year={1988},
|
|---|
| 121 | publisher={Springer}
|
|---|
| 122 | }
|
|---|
| 123 |
|
|---|
| 124 |
|
|---|
| 125 | % ===============================================================================
|
|---|
| 126 | % Work stealing examples
|
|---|
| 127 | % ===============================================================================
|
|---|
| 128 |
|
|---|
| 129 | %----------
|
|---|
| 130 | % discussion about best case and worst case for full computation
|
|---|
| 131 | % cited by 1720
|
|---|
| 132 | @article{blumofe1999scheduling,
|
|---|
| 133 | title={Scheduling multithreaded computations by work stealing},
|
|---|
| 134 | author={Blumofe, Robert D and Leiserson, Charles E},
|
|---|
| 135 | journal={Journal of the ACM (JACM)},
|
|---|
| 136 | volume={46},
|
|---|
| 137 | number={5},
|
|---|
| 138 | pages={720--748},
|
|---|
| 139 | year={1999},
|
|---|
| 140 | publisher={ACM}
|
|---|
| 141 | }
|
|---|
| 142 |
|
|---|
| 143 | %----------
|
|---|
| 144 | % executing process trees in parallel
|
|---|
| 145 | % mostly used with lazy evaluation and dependency graphs
|
|---|
| 146 | % cited by 267
|
|---|
| 147 | % cited by blumofe1999scheduling as first 'work-stealing idea'
|
|---|
| 148 | @inproceedings{burton1981executing,
|
|---|
| 149 | title={Executing functional programs on a virtual tree of processors},
|
|---|
| 150 | author={Burton, F Warren and Sleep, M Ronan},
|
|---|
| 151 | booktitle={Proceedings of the 1981 conference on Functional programming languages and computer architecture},
|
|---|
| 152 | pages={187--194},
|
|---|
| 153 | year={1981},
|
|---|
| 154 | organization={ACM}
|
|---|
| 155 | }
|
|---|
| 156 |
|
|---|
| 157 |
|
|---|
| 158 | %----------
|
|---|
| 159 | % work-stealing used for multilisp.
|
|---|
| 160 | % "unfair scheduling policy"
|
|---|
| 161 | % cited by 285
|
|---|
| 162 | % cited by blumofe1999scheduling as first 'work-stealing idea' implementation
|
|---|
| 163 | @inproceedings{halstead1984implementation,
|
|---|
| 164 | title={Implementation of Multilisp: Lisp on a multiprocessor},
|
|---|
| 165 | author={Halstead Jr, Robert H},
|
|---|
| 166 | booktitle={Proceedings of the 1984 ACM Symposium on LISP and functional programming},
|
|---|
| 167 | pages={9--17},
|
|---|
| 168 | year={1984},
|
|---|
| 169 | organization={ACM}
|
|---|
| 170 | }
|
|---|
| 171 |
|
|---|
| 172 |
|
|---|
| 173 | % ===============================================================================
|
|---|
| 174 | % HPC schedulers with bounds
|
|---|
| 175 | % ===============================================================================
|
|---|
| 176 |
|
|---|
| 177 | %----------
|
|---|
| 178 | % discussion about bounds
|
|---|
| 179 | % cited by blumofe1999scheduling
|
|---|
| 180 | % mentionned as not practical
|
|---|
| 181 | % CAN'T find it
|
|---|
| 182 | @article{burton1996guaranteeing,
|
|---|
| 183 | title={Guaranteeing good memory bounds for parallel programs},
|
|---|
| 184 | author={Burton, F Warren},
|
|---|
| 185 | journal={IEEE Transactions on software engineering},
|
|---|
| 186 | number={10},
|
|---|
| 187 | pages={762--773},
|
|---|
| 188 | year={1996},
|
|---|
| 189 | publisher={IEEE}
|
|---|
| 190 | }
|
|---|
| 191 |
|
|---|
| 192 | %----------
|
|---|
| 193 | % discussion about bounds
|
|---|
| 194 | % cited by blumofe1999scheduling
|
|---|
| 195 | % mentionned as not practical
|
|---|
| 196 | % - from abstract
|
|---|
| 197 | % talks about implicit concurrency
|
|---|
| 198 | % does not work online or for something other than HPC
|
|---|
| 199 | @inproceedings{blelloch1995provably,
|
|---|
| 200 | title={Provably efficient scheduling for languages with fine-grained parallelism},
|
|---|
| 201 | author={Blelloch, Guy E and Gibbons, Phillip B and Matias, Yossi},
|
|---|
| 202 | booktitle={Proceedings of the seventh annual ACM symposium on Parallel algorithms and architectures},
|
|---|
| 203 | pages={1--12},
|
|---|
| 204 | year={1995},
|
|---|
| 205 | organization={ACM}
|
|---|
| 206 | }
|
|---|
| 207 |
|
|---|
| 208 | %----------
|
|---|
| 209 | % discussion about bounds
|
|---|
| 210 | % cited by blumofe1999scheduling
|
|---|
| 211 | % mentionned as not practical
|
|---|
| 212 | % - from abstract
|
|---|
| 213 | % talks about bound on total time and space
|
|---|
| 214 | % no concept of fairness or latency among tasks
|
|---|
| 215 | @inproceedings{blelloch1997space,
|
|---|
| 216 | title={Space-efficient scheduling of parallelism with synchronization variables},
|
|---|
| 217 | author={Blelloch, Guy E and Gibbons, Phillip B and Matias, Yossi and Narlikar, Girija J},
|
|---|
| 218 | booktitle={Proceedings of the ninth annual ACM symposium on Parallel algorithms and architectures},
|
|---|
| 219 | pages={12--23},
|
|---|
| 220 | year={1997},
|
|---|
| 221 | organization={ACM}
|
|---|
| 222 | }
|
|---|
| 223 |
|
|---|
| 224 | % ===============================================================================
|
|---|
| 225 | % MISC
|
|---|
| 226 | % ===============================================================================
|
|---|
| 227 | % Trevor's relaxed FIFO list
|
|---|
| 228 | @inproceedings{alistarh2018relaxed,
|
|---|
| 229 | title={Relaxed schedulers can efficiently parallelize iterative algorithms},
|
|---|
| 230 | author={Alistarh, Dan and Brown, Trevor and Kopinsky, Justin and Nadiradze, Giorgi},
|
|---|
| 231 | booktitle={Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing},
|
|---|
| 232 | pages={377--386},
|
|---|
| 233 | year={2018}
|
|---|
| 234 | }
|
|---|
| 235 |
|
|---|
| 236 | % Scalable counters which only support is !0
|
|---|
| 237 | @inproceedings{ellen2007snzi,
|
|---|
| 238 | title={SNZI: Scalable nonzero indicators},
|
|---|
| 239 | author={Ellen, Faith and Lev, Yossi and Luchangco, Victor and Moir, Mark},
|
|---|
| 240 | booktitle={Proceedings of the twenty-sixth annual ACM symposium on Principles of distributed computing},
|
|---|
| 241 | pages={13--22},
|
|---|
| 242 | year={2007}
|
|---|
| 243 | }
|
|---|