Changeset 3966d9a for doc/theses/thierry_delisle_PhD/code/relaxed_list.hpp
- Timestamp:
- Feb 10, 2020, 11:58:55 AM (6 years ago)
- Branches:
- ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, jenkins-sandbox, master, new-ast, new-ast-unique-expr, pthread-emulation, qualifiedEnum
- Children:
- 686cb63, 7102540
- Parents:
- 92a9768 (diff), 3b56166 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)links above to see all the changes relative to each parent. - File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
doc/theses/thierry_delisle_PhD/code/relaxed_list.hpp
r92a9768 r3966d9a 37 37 }; 38 38 39 static inline bool bts(std::atomic_size_t & target, size_t bit ) { 40 //* 41 int result = 0; 42 asm volatile( 43 "LOCK btsq %[bit], %[target]\n\t" 44 :"=@ccc" (result) 45 : [target] "m" (target), [bit] "r" (bit) 46 ); 47 return result != 0; 48 /*/ 49 size_t mask = 1ul << bit; 50 size_t ret = target.fetch_or(mask, std::memory_order_relaxed); 51 return (ret & mask) != 0; 52 //*/ 53 } 54 55 static inline bool btr(std::atomic_size_t & target, size_t bit ) { 56 //* 57 int result = 0; 58 asm volatile( 59 "LOCK btrq %[bit], %[target]\n\t" 60 :"=@ccc" (result) 61 : [target] "m" (target), [bit] "r" (bit) 62 ); 63 return result != 0; 64 /*/ 65 size_t mask = 1ul << bit; 66 size_t ret = target.fetch_and(~mask, std::memory_order_relaxed); 67 return (ret & mask) != 0; 68 //*/ 69 } 39 70 40 71 extern bool enable_stats; … … 48 79 size_t attempt = 0; 49 80 size_t success = 0; 81 size_t mask_attempt = 0; 82 } pop; 83 }; 84 85 struct empty_stat { 86 struct { 87 size_t value = 0; 88 size_t count = 0; 89 } push; 90 struct { 91 size_t value = 0; 92 size_t count = 0; 50 93 } pop; 51 94 }; … … 62 105 static_assert(std::is_same<decltype(node_t::_links), _LinksFields_t<node_t>>::value, "Node must have a links field"); 63 106 64 65 107 public: 66 108 relaxed_list(unsigned numLists) 67 : numNonEmpty{0} 68 , lists(new intrusive_queue_t[numLists]) 109 : lists(new intrusive_queue_t[numLists]) 69 110 , numLists(numLists) 70 {} 111 { 112 assertf(7 * 8 * 8 >= numLists, "List currently only supports 448 sublists"); 113 // assert(sizeof(*this) == 128); 114 std::cout << "Constructing Relaxed List with " << numLists << std::endl; 115 116 #ifndef NO_STATS 117 if(head) this->next = head; 118 head = this; 119 #endif 120 } 71 121 72 122 ~relaxed_list() { 123 std::cout << "Destroying Relaxed List" << std::endl; 73 124 lists.reset(); 74 #ifndef NO_STATS75 std::cout << "Difference : "76 << ssize_t(double(intrusive_queue_t::stat::dif.value) / intrusive_queue_t::stat::dif.num ) << " avg\t"77 << intrusive_queue_t::stat::dif.max << "max" << std::endl;78 #endif79 125 } 80 126 … … 84 130 while(true) { 85 131 // Pick a random list 86 inti = tls.rng.next() % numLists;132 unsigned i = tls.rng.next() % numLists; 87 133 88 134 #ifndef NO_STATS … … 93 139 if( !lists[i].lock.try_lock() ) continue; 94 140 141 __attribute__((unused)) int num = numNonEmpty; 142 95 143 // Actually push it 96 lists[i].push(node, numNonEmpty); 144 if(lists[i].push(node)) { 145 numNonEmpty++; 146 size_t qword = i >> 6ull; 147 size_t bit = i & 63ull; 148 assertf((list_mask[qword] & (1ul << bit)) == 0, "Before set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit)); 149 __attribute__((unused)) bool ret = bts(list_mask[qword], bit); 150 assert(!ret); 151 assertf((list_mask[qword] & (1ul << bit)) != 0, "After set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit)); 152 } 97 153 assert(numNonEmpty <= (int)numLists); 98 154 … … 102 158 #ifndef NO_STATS 103 159 tls.pick.push.success++; 160 tls.empty.push.value += num; 161 tls.empty.push.count += 1; 104 162 #endif 105 163 return; … … 108 166 109 167 __attribute__((noinline, hot)) node_t * pop() { 110 while(numNonEmpty != 0) { 111 // Pick two lists at random 112 int i = tls.rng.next() % numLists; 113 int j = tls.rng.next() % numLists; 114 115 #ifndef NO_STATS 116 tls.pick.pop.attempt++; 117 #endif 118 119 // Pick the bet list 120 int w = i; 121 if( __builtin_expect(lists[j].ts() != 0, true) ) { 122 w = (lists[i].ts() < lists[j].ts()) ? i : j; 123 } 124 125 auto & list = lists[w]; 126 // If list looks empty retry 127 if( list.ts() == 0 ) continue; 128 129 // If we can't get the lock retry 130 if( !list.lock.try_lock() ) continue; 131 132 // If list is empty, unlock and retry 133 if( list.ts() == 0 ) { 134 list.lock.unlock(); 135 continue; 136 } 137 138 // Actually pop the list 139 auto node = list.pop(numNonEmpty); 140 assert(node); 141 142 // Unlock and return 143 list.lock.unlock(); 144 assert(numNonEmpty >= 0); 145 #ifndef NO_STATS 146 tls.pick.pop.success++; 147 #endif 148 return node; 149 } 168 #if !defined(NO_BITMASK) 169 // for(int r = 0; r < 10 && numNonEmpty != 0; r++) { 170 // // Pick two lists at random 171 // unsigned i = tls.rng.next() % numLists; 172 // unsigned j = tls.rng.next() % numLists; 173 174 // if(auto node = try_pop(i, j)) return node; 175 // } 176 int nnempty; 177 while(0 != (nnempty = numNonEmpty)) { 178 tls.pick.pop.mask_attempt++; 179 unsigned i, j; 180 // if( numLists < 4 || (numLists / nnempty) < 4 ) { 181 // // Pick two lists at random 182 // i = tls.rng.next() % numLists; 183 // j = tls.rng.next() % numLists; 184 // } else 185 { 186 #ifndef NO_STATS 187 // tls.pick.push.mask_attempt++; 188 #endif 189 190 // Pick two lists at random 191 unsigned num = ((numLists - 1) >> 6) + 1; 192 193 unsigned ri = tls.rng.next(); 194 unsigned rj = tls.rng.next(); 195 196 unsigned wdxi = (ri >> 6u) % num; 197 unsigned wdxj = (rj >> 6u) % num; 198 199 size_t maski = list_mask[wdxi].load(std::memory_order_relaxed); 200 size_t maskj = list_mask[wdxj].load(std::memory_order_relaxed); 201 202 if(maski == 0 && maskj == 0) continue; 203 204 unsigned bi = rand_bit(ri, maski); 205 unsigned bj = rand_bit(rj, maskj); 206 207 assertf(bi < 64, "%zu %u", maski, bi); 208 assertf(bj < 64, "%zu %u", maskj, bj); 209 210 i = bi | (wdxi << 6); 211 j = bj | (wdxj << 6); 212 213 assertf(i < numLists, "%u", wdxi << 6); 214 assertf(j < numLists, "%u", wdxj << 6); 215 } 216 217 if(auto node = try_pop(i, j)) return node; 218 } 219 #else 220 while(numNonEmpty != 0) { 221 // Pick two lists at random 222 int i = tls.rng.next() % numLists; 223 int j = tls.rng.next() % numLists; 224 225 if(auto node = try_pop(i, j)) return node; 226 } 227 #endif 150 228 151 229 return nullptr; 152 230 } 231 232 private: 233 node_t * try_pop(unsigned i, unsigned j) { 234 #ifndef NO_STATS 235 tls.pick.pop.attempt++; 236 #endif 237 238 // Pick the bet list 239 int w = i; 240 if( __builtin_expect(lists[j].ts() != 0, true) ) { 241 w = (lists[i].ts() < lists[j].ts()) ? i : j; 242 } 243 244 auto & list = lists[w]; 245 // If list looks empty retry 246 if( list.ts() == 0 ) return nullptr; 247 248 // If we can't get the lock retry 249 if( !list.lock.try_lock() ) return nullptr; 250 251 __attribute__((unused)) int num = numNonEmpty; 252 253 // If list is empty, unlock and retry 254 if( list.ts() == 0 ) { 255 list.lock.unlock(); 256 return nullptr; 257 } 258 259 // Actually pop the list 260 node_t * node; 261 bool emptied; 262 std::tie(node, emptied) = list.pop(); 263 assert(node); 264 265 if(emptied) { 266 numNonEmpty--; 267 size_t qword = w >> 6ull; 268 size_t bit = w & 63ull; 269 assert((list_mask[qword] & (1ul << bit)) != 0); 270 __attribute__((unused)) bool ret = btr(list_mask[qword], bit); 271 assert(ret); 272 assert((list_mask[qword] & (1ul << bit)) == 0); 273 } 274 275 // Unlock and return 276 list.lock.unlock(); 277 assert(numNonEmpty >= 0); 278 #ifndef NO_STATS 279 tls.pick.pop.success++; 280 tls.empty.pop.value += num; 281 tls.empty.pop.count += 1; 282 #endif 283 return node; 284 } 153 285 154 286 private: … … 162 294 struct stat { 163 295 ssize_t diff = 0; 164 165 static struct Dif { 166 ssize_t value = 0; 167 size_t num = 0; 168 ssize_t max = 0; 169 } dif; 296 size_t push = 0; 297 size_t pop = 0; 298 // size_t value = 0; 299 // size_t count = 0; 170 300 }; 171 301 … … 178 308 sentinel_t before; 179 309 sentinel_t after; 180 stat s; 181 310 #ifndef NO_STATS 311 stat s; 312 #endif 313 314 #pragma GCC diagnostic push 315 #pragma GCC diagnostic ignored "-Winvalid-offsetof" 182 316 static constexpr auto fields_offset = offsetof( node_t, _links ); 317 #pragma GCC diagnostic pop 183 318 public: 184 319 intrusive_queue_t() … … 186 321 , after {{ head(), nullptr }} 187 322 { 188 assert((reinterpret_cast<uintptr_t>( head() ) + fields_offset) == reinterpret_cast<uintptr_t>(&before)); 189 assert((reinterpret_cast<uintptr_t>( tail() ) + fields_offset) == reinterpret_cast<uintptr_t>(&after )); 190 assert(head()->_links.prev == nullptr); 191 assert(head()->_links.next == tail() ); 192 assert(tail()->_links.next == nullptr); 193 assert(tail()->_links.prev == head() ); 194 assert(sizeof(*this) == 128); 195 assert((intptr_t(this) % 128) == 0); 196 } 197 198 ~intrusive_queue_t() { 199 #ifndef NO_STATS 200 stat::dif.value+= s.diff; 201 stat::dif.num ++; 202 stat::dif.max = std::abs(stat::dif.max) > std::abs(s.diff) ? stat::dif.max : s.diff; 203 #endif 204 } 323 /* paranoid */ assert((reinterpret_cast<uintptr_t>( head() ) + fields_offset) == reinterpret_cast<uintptr_t>(&before)); 324 /* paranoid */ assert((reinterpret_cast<uintptr_t>( tail() ) + fields_offset) == reinterpret_cast<uintptr_t>(&after )); 325 /* paranoid */ assert(head()->_links.prev == nullptr); 326 /* paranoid */ assert(head()->_links.next == tail() ); 327 /* paranoid */ assert(tail()->_links.next == nullptr); 328 /* paranoid */ assert(tail()->_links.prev == head() ); 329 /* paranoid */ assert(sizeof(*this) == 128); 330 /* paranoid */ assert((intptr_t(this) % 128) == 0); 331 } 332 333 ~intrusive_queue_t() = default; 205 334 206 335 inline node_t * head() const { … … 220 349 } 221 350 222 inline void push(node_t * node, std::atomic_int & nonEmpty) {351 inline bool push(node_t * node) { 223 352 assert(lock); 224 353 assert(node->_links.ts != 0); … … 232 361 prev->_links.next = node; 233 362 tail->_links.prev = node; 363 #ifndef NO_STATS 364 if(enable_stats) { 365 s.diff++; 366 s.push++; 367 } 368 #endif 234 369 if(before._links.ts == 0l) { 235 nonEmpty += 1;236 370 before._links.ts = node->_links.ts; 237 }238 #ifndef NO_STATS239 if(enable_stats) s.diff++;240 #endif241 } 242 243 inline node_t * pop(std::atomic_int & nonEmpty) {371 assert(node->_links.prev == this->head()); 372 return true; 373 } 374 return false; 375 } 376 377 inline std::pair<node_t *, bool> pop() { 244 378 assert(lock); 245 379 node_t * head = this->head(); … … 248 382 node_t * node = head->_links.next; 249 383 node_t * next = node->_links.next; 250 if(node == tail) return nullptr;384 if(node == tail) return {nullptr, false}; 251 385 252 386 head->_links.next = next; 253 387 next->_links.prev = head; 254 388 389 #ifndef NO_STATS 390 if(enable_stats) { 391 s.diff--; 392 s.pop ++; 393 } 394 #endif 255 395 if(next == tail) { 256 396 before._links.ts = 0l; 257 nonEmpty -= 1;397 return {node, true}; 258 398 } 259 399 else { … … 261 401 before._links.ts = next->_links.ts; 262 402 assert(before._links.ts != 0); 263 } 264 #ifndef NO_STATS 265 if(enable_stats) s.diff--; 266 #endif 267 return node; 403 return {node, false}; 404 } 268 405 } 269 406 … … 277 414 278 415 static __attribute__((aligned(128))) thread_local struct TLS { 279 Random rng = { int(rdtscl()) }; 280 pick_stat pick; 416 Random rng = { int(rdtscl()) }; 417 pick_stat pick; 418 empty_stat empty; 281 419 } tls; 282 420 421 public: 422 std::atomic_int numNonEmpty = { 0 }; // number of non-empty lists 423 std::atomic_size_t list_mask[7] = { {0}, {0}, {0}, {0}, {0}, {0}, {0} }; // which queues are empty 283 424 private: 284 std::atomic_int numNonEmpty; // number of non-empty lists285 425 __attribute__((aligned(64))) std::unique_ptr<intrusive_queue_t []> lists; 286 426 const unsigned numLists; … … 288 428 public: 289 429 static const constexpr size_t sizeof_queue = sizeof(intrusive_queue_t); 430 431 #ifndef NO_STATS 432 static void stats_print(std::ostream & os) { 433 auto it = head; 434 while(it) { 435 it->stats_print_local(os); 436 it = it->next; 437 } 438 } 439 440 static void stats_tls_tally() { 441 global_stats.pick.push.attempt += tls.pick.push.attempt; 442 global_stats.pick.push.success += tls.pick.push.success; 443 global_stats.pick.pop .attempt += tls.pick.pop.attempt; 444 global_stats.pick.pop .success += tls.pick.pop.success; 445 global_stats.pick.pop .mask_attempt += tls.pick.pop.mask_attempt; 446 447 global_stats.qstat.push.value += tls.empty.push.value; 448 global_stats.qstat.push.count += tls.empty.push.count; 449 global_stats.qstat.pop .value += tls.empty.pop .value; 450 global_stats.qstat.pop .count += tls.empty.pop .count; 451 } 452 453 private: 454 static struct GlobalStats { 455 struct { 456 struct { 457 std::atomic_size_t attempt = { 0 }; 458 std::atomic_size_t success = { 0 }; 459 } push; 460 struct { 461 std::atomic_size_t attempt = { 0 }; 462 std::atomic_size_t success = { 0 }; 463 std::atomic_size_t mask_attempt = { 0 }; 464 } pop; 465 } pick; 466 struct { 467 struct { 468 std::atomic_size_t value = { 0 }; 469 std::atomic_size_t count = { 0 }; 470 } push; 471 struct { 472 std::atomic_size_t value = { 0 }; 473 std::atomic_size_t count = { 0 }; 474 } pop; 475 } qstat; 476 } global_stats; 477 478 // Link list of all lists for stats 479 __attribute__((aligned(64))) relaxed_list<node_t> * next = nullptr; 480 481 static relaxed_list<node_t> * head; 482 483 void stats_print_local(std::ostream & os ) { 484 std::cout << "----- Relaxed List Stats -----" << std::endl; 485 { 486 ssize_t diff = 0; 487 size_t num = 0; 488 ssize_t max = 0; 489 490 for(size_t i = 0; i < numLists; i++) { 491 const auto & list = lists[i]; 492 diff+= list.s.diff; 493 num ++; 494 max = std::abs(max) > std::abs(list.s.diff) ? max : list.s.diff; 495 os << "Local Q ops : " << (list.s.push + list.s.pop) << "(" << list.s.push << "i, " << list.s.pop << "o)\n"; 496 } 497 498 os << "Difference : " << ssize_t(double(diff) / num ) << " avg\t" << max << "max" << std::endl; 499 } 500 501 const auto & global = global_stats; 502 503 double push_sur = (100.0 * double(global.pick.push.success) / global.pick.push.attempt); 504 double pop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .attempt); 505 double mpop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .mask_attempt); 506 507 os << "Push Pick % : " << push_sur << "(" << global.pick.push.success << " / " << global.pick.push.attempt << ")\n"; 508 os << "Pop Pick % : " << pop_sur << "(" << global.pick.pop .success << " / " << global.pick.pop .attempt << ")\n"; 509 os << "TryPop Pick % : " << mpop_sur << "(" << global.pick.pop .success << " / " << global.pick.pop .mask_attempt << ")\n"; 510 511 double avgQ_push = double(global.qstat.push.value) / global.qstat.push.count; 512 double avgQ_pop = double(global.qstat.pop .value) / global.qstat.pop .count; 513 double avgQ = double(global.qstat.push.value + global.qstat.pop .value) / (global.qstat.push.count + global.qstat.pop .count); 514 os << "Push Avg Qs : " << avgQ_push << " (" << global.qstat.push.count << "ops)\n"; 515 os << "Pop Avg Qs : " << avgQ_pop << " (" << global.qstat.pop .count << "ops)\n"; 516 os << "Global Avg Qs : " << avgQ << " (" << (global.qstat.push.count + global.qstat.pop .count) << "ops)\n"; 517 } 518 #endif 290 519 };
Note:
See TracChangeset
for help on using the changeset viewer.