Changeset 5407cdc for doc/theses/thierry_delisle_PhD
- Timestamp:
- Apr 28, 2021, 4:56:50 PM (5 years ago)
- Branches:
- ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast-unique-expr, pthread-emulation, qualifiedEnum
- Children:
- 8d66610
- Parents:
- feacef9 (diff), b7fd2db6 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)links above to see all the changes relative to each parent. - Location:
- doc/theses/thierry_delisle_PhD/code/readyQ_proto
- Files:
-
- 3 added
- 9 edited
-
dynamic_entropy.hpp (added)
-
links.hpp (modified) (1 diff)
-
links2.hpp (added)
-
ntmove.cpp (added)
-
processor_list.hpp (modified) (5 diffs)
-
processor_list_good.cpp (modified) (1 diff)
-
randbit.cpp (modified) (1 diff)
-
relaxed_list.cpp (modified) (11 diffs)
-
snzi-packed.hpp (modified) (2 diffs)
-
snzi.hpp (modified) (1 diff)
-
utils.hpp (modified) (4 diffs)
-
work_stealing.hpp (modified) (7 diffs)
Legend:
- Unmodified
- Added
- Removed
-
doc/theses/thierry_delisle_PhD/code/readyQ_proto/links.hpp
rfeacef9 r5407cdc 117 117 } 118 118 119 long long ts() const {119 unsigned long long ts() const { 120 120 return before._links.ts; 121 121 } -
doc/theses/thierry_delisle_PhD/code/readyQ_proto/processor_list.hpp
rfeacef9 r5407cdc 39 39 while( __builtin_expect(ll.exchange(true),false) ) { 40 40 while(ll.load(std::memory_order_relaxed)) 41 asm volatile("pause");41 Pause(); 42 42 } 43 43 /* paranoid */ assert(ll); … … 93 93 && ready.compare_exchange_weak(copy, n + 1) ) 94 94 break; 95 asm volatile("pause");95 Pause(); 96 96 } 97 97 … … 133 133 // Step 1 : make sure no writer are in the middle of the critical section 134 134 while(lock.load(std::memory_order_relaxed)) 135 asm volatile("pause");135 Pause(); 136 136 137 137 // Fence needed because we don't want to start trying to acquire the lock … … 195 195 // to simply lock their own lock and enter. 196 196 while(lock.load(std::memory_order_relaxed)) 197 asm volatile("pause");197 Pause(); 198 198 199 199 // Step 2 : lock per-proc lock … … 204 204 for(uint_fast32_t i = 0; i < s; i++) { 205 205 while(data[i].lock.load(std::memory_order_relaxed)) 206 asm volatile("pause");206 Pause(); 207 207 } 208 208 -
doc/theses/thierry_delisle_PhD/code/readyQ_proto/processor_list_good.cpp
rfeacef9 r5407cdc 21 21 target = (target - (target % total)) + total; 22 22 while(waiting < target) 23 asm volatile("pause");23 Pause(); 24 24 25 25 assert(waiting < (1ul << 60)); -
doc/theses/thierry_delisle_PhD/code/readyQ_proto/randbit.cpp
rfeacef9 r5407cdc 123 123 target = (target - (target % total)) + total; 124 124 while(waiting < target) 125 asm volatile("pause");125 Pause(); 126 126 127 127 assert(waiting < (1ul << 60)); -
doc/theses/thierry_delisle_PhD/code/readyQ_proto/relaxed_list.cpp
rfeacef9 r5407cdc 206 206 std::cout << "Total ops : " << ops << "(" << global.in << "i, " << global.out << "o, " << global.empty << "e)\n"; 207 207 #ifndef NO_STATS 208 LIST_VARIANT<Node>::stats_print(std::cout );208 LIST_VARIANT<Node>::stats_print(std::cout, duration); 209 209 #endif 210 210 } … … 368 368 369 369 for(Node * & node : nodes) { 370 node = list.pop(); 371 assert(node); 370 node = nullptr; 371 while(!node) { 372 node = list.pop(); 373 } 372 374 local.crc_out += node->value; 373 375 local.out++; … … 691 693 692 694 for(const auto & n : nodes) { 693 local.valmax = max(local.valmax, size_t(n.value));694 local.valmin = min(local.valmin, size_t(n.value));695 local.valmax = std::max(local.valmax, size_t(n.value)); 696 local.valmin = std::min(local.valmin, size_t(n.value)); 695 697 } 696 698 … … 773 775 try { 774 776 arg = optarg = argv[optind]; 775 nnodes = st oul(optarg, &len);777 nnodes = std::stoul(optarg, &len); 776 778 if(len != arg.size()) { throw std::invalid_argument(""); } 777 779 } catch(std::invalid_argument &) { … … 792 794 try { 793 795 arg = optarg = argv[optind]; 794 nnodes = st oul(optarg, &len);796 nnodes = std::stoul(optarg, &len); 795 797 if(len != arg.size()) { throw std::invalid_argument(""); } 796 798 } catch(std::invalid_argument &) { … … 812 814 try { 813 815 arg = optarg = argv[optind]; 814 nnodes = st oul(optarg, &len);816 nnodes = std::stoul(optarg, &len); 815 817 if(len != arg.size()) { throw std::invalid_argument(""); } 816 818 nslots = nnodes; … … 823 825 try { 824 826 arg = optarg = argv[optind]; 825 nnodes = st oul(optarg, &len);827 nnodes = std::stoul(optarg, &len); 826 828 if(len != arg.size()) { throw std::invalid_argument(""); } 827 829 } catch(std::invalid_argument &) { … … 831 833 try { 832 834 arg = optarg = argv[optind + 1]; 833 nslots = st oul(optarg, &len);835 nslots = std::stoul(optarg, &len); 834 836 if(len != arg.size()) { throw std::invalid_argument(""); } 835 837 } catch(std::invalid_argument &) { … … 884 886 case 'd': 885 887 try { 886 duration = st od(optarg, &len);888 duration = std::stod(optarg, &len); 887 889 if(len != arg.size()) { throw std::invalid_argument(""); } 888 890 } catch(std::invalid_argument &) { … … 893 895 case 't': 894 896 try { 895 nthreads = st oul(optarg, &len);897 nthreads = std::stoul(optarg, &len); 896 898 if(len != arg.size()) { throw std::invalid_argument(""); } 897 899 } catch(std::invalid_argument &) { … … 902 904 case 'q': 903 905 try { 904 nqueues = st oul(optarg, &len);906 nqueues = std::stoul(optarg, &len); 905 907 if(len != arg.size()) { throw std::invalid_argument(""); } 906 908 } catch(std::invalid_argument &) { -
doc/theses/thierry_delisle_PhD/code/readyQ_proto/snzi-packed.hpp
rfeacef9 r5407cdc 168 168 for(int i = 0; i < width; i++) { 169 169 int idx = i % hwdith; 170 std::cout << i << " -> " << idx + width << std::endl;171 170 leafs[i].parent = &nodes[ idx ]; 172 171 } … … 174 173 for(int i = 0; i < root; i++) { 175 174 int idx = (i / 2) + hwdith; 176 std::cout << i + width << " -> " << idx + width << std::endl;177 175 nodes[i].parent = &nodes[ idx ]; 178 176 } -
doc/theses/thierry_delisle_PhD/code/readyQ_proto/snzi.hpp
rfeacef9 r5407cdc 159 159 std::cout << "SNZI: " << depth << "x" << width << "(" << mask - 1 << ") " << (sizeof(snzi_t::node) * (root + 1)) << " bytes" << std::endl; 160 160 for(int i = 0; i < root; i++) { 161 std::cout << i << " -> " << (i / base) + width << std::endl;162 161 nodes[i].parent = &nodes[(i / base) + width]; 163 162 } -
doc/theses/thierry_delisle_PhD/code/readyQ_proto/utils.hpp
rfeacef9 r5407cdc 11 11 #include <sys/sysinfo.h> 12 12 13 #include <x86intrin.h> 14 15 // Barrier from 16 class barrier_t { 17 public: 18 barrier_t(size_t total) 19 : waiting(0) 20 , total(total) 21 {} 22 23 void wait(unsigned) { 24 size_t target = waiting++; 25 target = (target - (target % total)) + total; 26 while(waiting < target) 27 asm volatile("pause"); 28 29 assert(waiting < (1ul << 60)); 30 } 31 32 private: 33 std::atomic<size_t> waiting; 34 size_t total; 35 }; 13 // #include <x86intrin.h> 36 14 37 15 // class Random { … … 102 80 }; 103 81 104 static inline long long rdtscl(void) { 105 unsigned int lo, hi; 106 __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); 107 return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 ); 108 } 82 static inline long long int rdtscl(void) { 83 #if defined( __i386 ) || defined( __x86_64 ) 84 unsigned int lo, hi; 85 __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); 86 return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 ); 87 #elif defined( __aarch64__ ) || defined( __arm__ ) 88 // https://github.com/google/benchmark/blob/v1.1.0/src/cycleclock.h#L116 89 long long int virtual_timer_value; 90 asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value)); 91 return virtual_timer_value; 92 #else 93 #error unsupported hardware architecture 94 #endif 95 } 96 97 #if defined( __i386 ) || defined( __x86_64 ) 98 #define Pause() __asm__ __volatile__ ( "pause" : : : ) 99 #elif defined( __ARM_ARCH ) 100 #define Pause() __asm__ __volatile__ ( "YIELD" : : : ) 101 #else 102 #error unsupported architecture 103 #endif 109 104 110 105 static inline void affinity(int tid) { … … 195 190 } 196 191 192 // Barrier from 193 class barrier_t { 194 public: 195 barrier_t(size_t total) 196 : waiting(0) 197 , total(total) 198 {} 199 200 void wait(unsigned) { 201 size_t target = waiting++; 202 target = (target - (target % total)) + total; 203 while(waiting < target) 204 Pause(); 205 206 assert(waiting < (1ul << 60)); 207 } 208 209 private: 210 std::atomic<size_t> waiting; 211 size_t total; 212 }; 213 197 214 struct spinlock_t { 198 215 std::atomic_bool ll = { false }; … … 201 218 while( __builtin_expect(ll.exchange(true),false) ) { 202 219 while(ll.load(std::memory_order_relaxed)) 203 asm volatile("pause");220 Pause(); 204 221 } 205 222 } -
doc/theses/thierry_delisle_PhD/code/readyQ_proto/work_stealing.hpp
rfeacef9 r5407cdc 6 6 #include <memory> 7 7 #include <mutex> 8 #include <thread> 8 9 #include <type_traits> 9 10 … … 11 12 #include "utils.hpp" 12 13 #include "links.hpp" 14 #include "links2.hpp" 13 15 #include "snzi.hpp" 14 16 17 // #include <x86intrin.h> 18 15 19 using namespace std; 20 21 static const long long lim = 2000; 22 static const unsigned nqueues = 2; 23 24 struct __attribute__((aligned(128))) timestamp_t { 25 volatile unsigned long long val = 0; 26 }; 27 28 template<typename node_t> 29 struct __attribute__((aligned(128))) localQ_t { 30 #ifdef NO_MPSC 31 intrusive_queue_t<node_t> list; 32 33 inline auto ts() { return list.ts(); } 34 inline auto lock() { return list.lock.lock(); } 35 inline auto try_lock() { return list.lock.try_lock(); } 36 inline auto unlock() { return list.lock.unlock(); } 37 38 inline auto push( node_t * node ) { return list.push( node ); } 39 inline auto pop() { return list.pop(); } 40 #else 41 mpsc_queue<node_t> queue = {}; 42 spinlock_t _lock = {}; 43 44 inline auto ts() { auto h = queue.head(); return h ? h->_links.ts : 0ull; } 45 inline auto lock() { return _lock.lock(); } 46 inline auto try_lock() { return _lock.try_lock(); } 47 inline auto unlock() { return _lock.unlock(); } 48 49 inline auto push( node_t * node ) { return queue.push( node ); } 50 inline auto pop() { return queue.pop(); } 51 #endif 52 53 54 }; 16 55 17 56 template<typename node_t> … … 25 64 26 65 work_stealing(unsigned _numThreads, unsigned) 27 : numThreads(_numThreads) 28 , lists(new intrusive_queue_t<node_t>[numThreads]) 29 , snzi( std::log2( numThreads / 2 ), 2 ) 66 : numThreads(_numThreads * nqueues) 67 , lists(new localQ_t<node_t>[numThreads]) 68 // , lists(new intrusive_queue_t<node_t>[numThreads]) 69 , times(new timestamp_t[numThreads]) 70 // , snzi( std::log2( numThreads / 2 ), 2 ) 30 71 31 72 { … … 40 81 __attribute__((noinline, hot)) void push(node_t * node) { 41 82 node->_links.ts = rdtscl(); 42 if( node->_links.hint > numThreads ) { 43 node->_links.hint = tls.rng.next() % numThreads; 44 tls.stat.push.nhint++; 83 // node->_links.ts = 1; 84 85 auto & list = *({ 86 unsigned i; 87 #ifdef NO_MPSC 88 do { 89 #endif 90 tls.stats.push.attempt++; 91 // unsigned r = tls.rng1.next(); 92 unsigned r = tls.it++; 93 if(tls.my_queue == outside) { 94 i = r % numThreads; 95 } else { 96 i = tls.my_queue + (r % nqueues); 97 } 98 #ifdef NO_MPSC 99 } while(!lists[i].try_lock()); 100 #endif 101 &lists[i]; 102 }); 103 104 list.push( node ); 105 #ifdef NO_MPSC 106 list.unlock(); 107 #endif 108 // tls.rng2.set_raw_state( tls.rng1.get_raw_state()); 109 // count++; 110 tls.stats.push.success++; 111 } 112 113 __attribute__((noinline, hot)) node_t * pop() { 114 if(tls.my_queue != outside) { 115 // if( tls.myfriend == outside ) { 116 // auto r = tls.rng1.next(); 117 // tls.myfriend = r % numThreads; 118 // // assert(lists[(tls.it % nqueues) + tls.my_queue].ts() >= lists[((tls.it + 1) % nqueues) + tls.my_queue].ts()); 119 // tls.mytime = std::min(lists[(tls.it % nqueues) + tls.my_queue].ts(), lists[((tls.it + 1) % nqueues) + tls.my_queue].ts()); 120 // // times[tls.myfriend].val = 0; 121 // // lists[tls.myfriend].val = 0; 122 // } 123 // // else if(times[tls.myfriend].val == 0) { 124 // // else if(lists[tls.myfriend].val == 0) { 125 // else if(times[tls.myfriend].val < tls.mytime) { 126 // // else if(times[tls.myfriend].val < lists[(tls.it % nqueues) + tls.my_queue].ts()) { 127 // node_t * n = try_pop(tls.myfriend, tls.stats.pop.help); 128 // tls.stats.help++; 129 // tls.myfriend = outside; 130 // if(n) return n; 131 // } 132 // if( tls.myfriend == outside ) { 133 // auto r = tls.rng1.next(); 134 // tls.myfriend = r % numThreads; 135 // tls.mytime = lists[((tls.it + 1) % nqueues) + tls.my_queue].ts(); 136 // } 137 // else { 138 // if(times[tls.myfriend].val + 1000 < tls.mytime) { 139 // node_t * n = try_pop(tls.myfriend, tls.stats.pop.help); 140 // tls.stats.help++; 141 // if(n) return n; 142 // } 143 // tls.myfriend = outside; 144 // } 145 146 node_t * n = local(); 147 if(n) return n; 45 148 } 46 149 47 unsigned i = node->_links.hint; 48 auto & list = lists[i]; 49 list.lock.lock(); 50 51 if(list.push( node )) { 52 snzi.arrive(i); 150 // try steal 151 for(int i = 0; i < 25; i++) { 152 node_t * n = steal(); 153 if(n) return n; 53 154 } 54 155 55 list.lock.unlock(); 56 } 57 58 __attribute__((noinline, hot)) node_t * pop() { 59 node_t * node; 60 while(true) { 61 if(!snzi.query()) { 62 return nullptr; 63 } 64 65 { 66 unsigned i = tls.my_queue; 67 auto & list = lists[i]; 68 if( list.ts() != 0 ) { 69 list.lock.lock(); 70 if((node = try_pop(i))) { 71 tls.stat.pop.local.success++; 72 break; 73 } 74 else { 75 tls.stat.pop.local.elock++; 76 } 77 } 78 else { 79 tls.stat.pop.local.espec++; 80 } 81 } 82 83 tls.stat.pop.steal.tried++; 84 85 int i = tls.rng.next() % numThreads; 86 auto & list = lists[i]; 87 if( list.ts() == 0 ) { 88 tls.stat.pop.steal.empty++; 89 continue; 90 } 91 92 if( !list.lock.try_lock() ) { 93 tls.stat.pop.steal.locked++; 94 continue; 95 } 96 97 if((node = try_pop(i))) { 98 tls.stat.pop.steal.success++; 99 break; 156 return search(); 157 } 158 159 private: 160 inline node_t * local() { 161 unsigned i = (--tls.it % nqueues) + tls.my_queue; 162 node_t * n = try_pop(i, tls.stats.pop.local); 163 if(n) return n; 164 i = (--tls.it % nqueues) + tls.my_queue; 165 return try_pop(i, tls.stats.pop.local); 166 } 167 168 inline node_t * steal() { 169 unsigned i = tls.rng2.prev() % numThreads; 170 return try_pop(i, tls.stats.pop.steal); 171 } 172 173 inline node_t * search() { 174 unsigned offset = tls.rng2.prev(); 175 for(unsigned i = 0; i < numThreads; i++) { 176 unsigned idx = (offset + i) % numThreads; 177 node_t * thrd = try_pop(idx, tls.stats.pop.search); 178 if(thrd) { 179 return thrd; 100 180 } 101 181 } 102 182 103 #if defined(READ) 104 const unsigned f = READ; 105 if(0 == (tls.it % f)) { 106 unsigned i = tls.it / f; 107 lists[i % numThreads].ts(); 108 } 109 // lists[tls.it].ts(); 110 tls.it++; 111 #endif 112 113 114 return node; 115 } 116 117 private: 118 node_t * try_pop(unsigned i) { 183 return nullptr; 184 } 185 186 private: 187 struct attempt_stat_t { 188 std::size_t attempt = { 0 }; 189 std::size_t elock = { 0 }; 190 std::size_t eempty = { 0 }; 191 std::size_t espec = { 0 }; 192 std::size_t success = { 0 }; 193 }; 194 195 node_t * try_pop(unsigned i, attempt_stat_t & stat) { 196 assert(i < numThreads); 119 197 auto & list = lists[i]; 198 stat.attempt++; 199 200 // If the list is empty, don't try 201 if(list.ts() == 0) { stat.espec++; return nullptr; } 202 203 // If we can't get the lock, move on 204 if( !list.try_lock() ) { stat.elock++; return nullptr; } 120 205 121 206 // If list is empty, unlock and retry 122 207 if( list.ts() == 0 ) { 123 list.lock.unlock(); 208 list.unlock(); 209 stat.eempty++; 124 210 return nullptr; 125 211 } 126 212 127 // Actually pop the list 128 node_t * node; 129 bool emptied; 130 std::tie(node, emptied) = list.pop(); 131 assert(node); 132 133 if(emptied) { 134 snzi.depart(i); 135 } 136 137 // Unlock and return 138 list.lock.unlock(); 139 return node; 213 auto node = list.pop(); 214 list.unlock(); 215 stat.success++; 216 #ifdef NO_MPSC 217 // times[i].val = 1; 218 times[i].val = node.first->_links.ts; 219 // lists[i].val = node.first->_links.ts; 220 return node.first; 221 #else 222 times[i].val = node->_links.ts; 223 return node; 224 #endif 140 225 } 141 226 … … 144 229 145 230 static std::atomic_uint32_t ticket; 231 static const unsigned outside = 0xFFFFFFFF; 232 233 static inline unsigned calc_preferred() { 234 unsigned t = ticket++; 235 if(t == 0) return outside; 236 unsigned i = (t - 1) * nqueues; 237 return i; 238 } 239 146 240 static __attribute__((aligned(128))) thread_local struct TLS { 147 Random rng = { int(rdtscl()) }; 148 unsigned my_queue = ticket++; 241 Random rng1 = { unsigned(std::hash<std::thread::id>{}(std::this_thread::get_id()) ^ rdtscl()) }; 242 Random rng2 = { unsigned(std::hash<std::thread::id>{}(std::this_thread::get_id()) ^ rdtscl()) }; 243 unsigned it = 0; 244 unsigned my_queue = calc_preferred(); 245 unsigned myfriend = outside; 246 unsigned long long int mytime = 0; 149 247 #if defined(READ) 150 248 unsigned it = 0; … … 152 250 struct { 153 251 struct { 154 std::size_t nhint = { 0 }; 252 std::size_t attempt = { 0 }; 253 std::size_t success = { 0 }; 155 254 } push; 156 255 struct { 157 struct { 158 std::size_t success = { 0 }; 159 std::size_t espec = { 0 }; 160 std::size_t elock = { 0 }; 161 } local; 162 struct { 163 std::size_t tried = { 0 }; 164 std::size_t locked = { 0 }; 165 std::size_t empty = { 0 }; 166 std::size_t success = { 0 }; 167 } steal; 256 attempt_stat_t help; 257 attempt_stat_t local; 258 attempt_stat_t steal; 259 attempt_stat_t search; 168 260 } pop; 169 } stat; 261 std::size_t help = { 0 }; 262 } stats; 170 263 } tls; 171 264 172 265 private: 173 266 const unsigned numThreads; 174 std::unique_ptr<intrusive_queue_t<node_t> []> lists; 175 __attribute__((aligned(64))) snzi_t snzi; 267 std::unique_ptr<localQ_t<node_t> []> lists; 268 // std::unique_ptr<intrusive_queue_t<node_t> []> lists; 269 std::unique_ptr<timestamp_t []> times; 270 __attribute__((aligned(128))) std::atomic_size_t count; 176 271 177 272 #ifndef NO_STATS … … 179 274 static struct GlobalStats { 180 275 struct { 181 std::atomic_size_t nhint = { 0 }; 276 std::atomic_size_t attempt = { 0 }; 277 std::atomic_size_t success = { 0 }; 182 278 } push; 183 279 struct { 184 280 struct { 281 std::atomic_size_t attempt = { 0 }; 282 std::atomic_size_t elock = { 0 }; 283 std::atomic_size_t eempty = { 0 }; 284 std::atomic_size_t espec = { 0 }; 185 285 std::atomic_size_t success = { 0 }; 186 std::atomic_size_t espec = { 0 }; 187 std::atomic_size_t elock = { 0 }; 286 } help; 287 struct { 288 std::atomic_size_t attempt = { 0 }; 289 std::atomic_size_t elock = { 0 }; 290 std::atomic_size_t eempty = { 0 }; 291 std::atomic_size_t espec = { 0 }; 292 std::atomic_size_t success = { 0 }; 188 293 } local; 189 294 struct { 190 std::atomic_size_t tried = { 0 }; 191 std::atomic_size_t locked = { 0 }; 192 std::atomic_size_t empty = { 0 }; 295 std::atomic_size_t attempt = { 0 }; 296 std::atomic_size_t elock = { 0 }; 297 std::atomic_size_t eempty = { 0 }; 298 std::atomic_size_t espec = { 0 }; 193 299 std::atomic_size_t success = { 0 }; 194 300 } steal; 301 struct { 302 std::atomic_size_t attempt = { 0 }; 303 std::atomic_size_t elock = { 0 }; 304 std::atomic_size_t eempty = { 0 }; 305 std::atomic_size_t espec = { 0 }; 306 std::atomic_size_t success = { 0 }; 307 } search; 195 308 } pop; 309 std::atomic_size_t help = { 0 }; 196 310 } global_stats; 197 311 198 312 public: 199 313 static void stats_tls_tally() { 200 global_stats.push.nhint += tls.stat.push.nhint; 201 global_stats.pop.local.success += tls.stat.pop.local.success; 202 global_stats.pop.local.espec += tls.stat.pop.local.espec ; 203 global_stats.pop.local.elock += tls.stat.pop.local.elock ; 204 global_stats.pop.steal.tried += tls.stat.pop.steal.tried ; 205 global_stats.pop.steal.locked += tls.stat.pop.steal.locked ; 206 global_stats.pop.steal.empty += tls.stat.pop.steal.empty ; 207 global_stats.pop.steal.success += tls.stat.pop.steal.success; 208 } 209 210 static void stats_print(std::ostream & os ) { 314 global_stats.push.attempt += tls.stats.push.attempt; 315 global_stats.push.success += tls.stats.push.success; 316 global_stats.pop.help .attempt += tls.stats.pop.help .attempt; 317 global_stats.pop.help .elock += tls.stats.pop.help .elock ; 318 global_stats.pop.help .eempty += tls.stats.pop.help .eempty ; 319 global_stats.pop.help .espec += tls.stats.pop.help .espec ; 320 global_stats.pop.help .success += tls.stats.pop.help .success; 321 global_stats.pop.local .attempt += tls.stats.pop.local .attempt; 322 global_stats.pop.local .elock += tls.stats.pop.local .elock ; 323 global_stats.pop.local .eempty += tls.stats.pop.local .eempty ; 324 global_stats.pop.local .espec += tls.stats.pop.local .espec ; 325 global_stats.pop.local .success += tls.stats.pop.local .success; 326 global_stats.pop.steal .attempt += tls.stats.pop.steal .attempt; 327 global_stats.pop.steal .elock += tls.stats.pop.steal .elock ; 328 global_stats.pop.steal .eempty += tls.stats.pop.steal .eempty ; 329 global_stats.pop.steal .espec += tls.stats.pop.steal .espec ; 330 global_stats.pop.steal .success += tls.stats.pop.steal .success; 331 global_stats.pop.search.attempt += tls.stats.pop.search.attempt; 332 global_stats.pop.search.elock += tls.stats.pop.search.elock ; 333 global_stats.pop.search.eempty += tls.stats.pop.search.eempty ; 334 global_stats.pop.search.espec += tls.stats.pop.search.espec ; 335 global_stats.pop.search.success += tls.stats.pop.search.success; 336 global_stats.help += tls.stats.help; 337 } 338 339 static void stats_print(std::ostream & os, double duration ) { 211 340 std::cout << "----- Work Stealing Stats -----" << std::endl; 212 341 213 double stealSucc = double(global_stats.pop.steal.success) / global_stats.pop.steal.tried; 214 os << "Push to new Q : " << std::setw(15) << global_stats.push.nhint << "\n"; 215 os << "Local Pop : " << std::setw(15) << global_stats.pop.local.success << "\n"; 216 os << "Steal Pop : " << std::setw(15) << global_stats.pop.steal.success << "(" << global_stats.pop.local.espec << "s, " << global_stats.pop.local.elock << "l)\n"; 217 os << "Steal Success : " << std::setw(15) << stealSucc << "(" << global_stats.pop.steal.tried << " tries)\n"; 218 os << "Steal Fails : " << std::setw(15) << global_stats.pop.steal.empty << "e, " << global_stats.pop.steal.locked << "l\n"; 342 double push_suc = (100.0 * double(global_stats.push.success) / global_stats.push.attempt); 343 double push_len = double(global_stats.push.attempt ) / global_stats.push.success; 344 os << "Push Pick : " << push_suc << " %, len " << push_len << " (" << global_stats.push.attempt << " / " << global_stats.push.success << ")\n"; 345 346 double hlp_suc = (100.0 * double(global_stats.pop.help.success) / global_stats.pop.help.attempt); 347 double hlp_len = double(global_stats.pop.help.attempt ) / global_stats.pop.help.success; 348 os << "Help : " << hlp_suc << " %, len " << hlp_len << " (" << global_stats.pop.help.attempt << " / " << global_stats.pop.help.success << ")\n"; 349 os << "Help Fail : " << global_stats.pop.help.espec << "s, " << global_stats.pop.help.eempty << "e, " << global_stats.pop.help.elock << "l\n"; 350 351 double pop_suc = (100.0 * double(global_stats.pop.local.success) / global_stats.pop.local.attempt); 352 double pop_len = double(global_stats.pop.local.attempt ) / global_stats.pop.local.success; 353 os << "Local : " << pop_suc << " %, len " << pop_len << " (" << global_stats.pop.local.attempt << " / " << global_stats.pop.local.success << ")\n"; 354 os << "Local Fail : " << global_stats.pop.local.espec << "s, " << global_stats.pop.local.eempty << "e, " << global_stats.pop.local.elock << "l\n"; 355 356 double stl_suc = (100.0 * double(global_stats.pop.steal.success) / global_stats.pop.steal.attempt); 357 double stl_len = double(global_stats.pop.steal.attempt ) / global_stats.pop.steal.success; 358 os << "Steal : " << stl_suc << " %, len " << stl_len << " (" << global_stats.pop.steal.attempt << " / " << global_stats.pop.steal.success << ")\n"; 359 os << "Steal Fail : " << global_stats.pop.steal.espec << "s, " << global_stats.pop.steal.eempty << "e, " << global_stats.pop.steal.elock << "l\n"; 360 361 double srh_suc = (100.0 * double(global_stats.pop.search.success) / global_stats.pop.search.attempt); 362 double srh_len = double(global_stats.pop.search.attempt ) / global_stats.pop.search.success; 363 os << "Search : " << srh_suc << " %, len " << srh_len << " (" << global_stats.pop.search.attempt << " / " << global_stats.pop.search.success << ")\n"; 364 os << "Search Fail : " << global_stats.pop.search.espec << "s, " << global_stats.pop.search.eempty << "e, " << global_stats.pop.search.elock << "l\n"; 365 os << "Helps : " << std::setw(15) << std::scientific << global_stats.help / duration << "/sec (" << global_stats.help << ")\n"; 219 366 } 220 367 private:
Note:
See TracChangeset
for help on using the changeset viewer.