Changes in / [3b56166:d231700]
- Location:
- doc
- Files:
-
- 18 deleted
- 3 edited
-
papers/ibm_CASCON19/ThreadingModels.fig (deleted)
-
papers/ibm_CASCON19/ThreadingModels.png (deleted)
-
papers/ibm_CASCON19/ThreadingModels.svg (deleted)
-
papers/ibm_CASCON19/abstract.txt (deleted)
-
papers/ibm_CASCON19/client.cfa (deleted)
-
papers/ibm_CASCON19/server.cfa (deleted)
-
papers/ibm_CASCON19/slides.pdf (deleted)
-
theses/thierry_delisle_PhD/.gitignore (deleted)
-
theses/thierry_delisle_PhD/code/Makefile (deleted)
-
theses/thierry_delisle_PhD/code/bts_test.cpp (deleted)
-
theses/thierry_delisle_PhD/code/randbit.cpp (deleted)
-
theses/thierry_delisle_PhD/code/relaxed_list.cpp (modified) (7 diffs)
-
theses/thierry_delisle_PhD/code/relaxed_list.hpp (modified) (16 diffs)
-
theses/thierry_delisle_PhD/code/relaxed_list_layout.cpp (deleted)
-
theses/thierry_delisle_PhD/code/scale.sh (deleted)
-
theses/thierry_delisle_PhD/code/utils.hpp (modified) (4 diffs)
-
theses/thierry_delisle_PhD/comp_II/Makefile (deleted)
-
theses/thierry_delisle_PhD/comp_II/comp_II.tex (deleted)
-
theses/thierry_delisle_PhD/comp_II/comp_II_too_big.tex (deleted)
-
theses/thierry_delisle_PhD/comp_II/glossary.tex (deleted)
-
theses/thierry_delisle_PhD/comp_II/local.bib (deleted)
Legend:
- Unmodified
- Added
- Removed
-
doc/theses/thierry_delisle_PhD/code/relaxed_list.cpp
r3b56166 rd231700 9 9 #include <vector> 10 10 11 #include <getopt.h>12 11 #include <unistd.h> 13 12 #include <sys/sysinfo.h> … … 22 21 23 22 int value; 24 int id; 25 26 Node() { creates++; } 27 Node(int value): value(value) { creates++; } 28 ~Node() { destroys++; } 23 Node(int value): value(value) { 24 creates++; 25 } 26 27 ~Node() { 28 destroys++; 29 } 29 30 }; 30 31 … … 32 33 std::atomic_size_t Node::destroys = { 0 }; 33 34 35 static const constexpr int nodes_per_threads = 128; 36 struct NodeArray { 37 __attribute__((aligned(64))) Node * array[nodes_per_threads]; 38 __attribute__((aligned(64))) char pad; 39 }; 40 34 41 bool enable_stats = false; 35 36 template<>37 thread_local relaxed_list<Node>::TLS relaxed_list<Node>::tls = {};38 39 template<>40 relaxed_list<Node> * relaxed_list<Node>::head = nullptr;41 42 #ifndef NO_STATS43 template<>44 relaxed_list<Node>::GlobalStats relaxed_list<Node>::global_stats = {};45 #endif46 47 // ================================================================================================48 // UTILS49 // ================================================================================================50 42 51 43 struct local_stat_t { … … 55 47 size_t crc_in = 0; 56 48 size_t crc_out = 0; 57 size_t valmax = 0;58 size_t valmin = 100000000ul;59 49 }; 60 50 61 struct global_stat_t { 62 std::atomic_size_t in = { 0 }; 63 std::atomic_size_t out = { 0 }; 64 std::atomic_size_t empty = { 0 }; 65 std::atomic_size_t crc_in = { 0 }; 66 std::atomic_size_t crc_out = { 0 }; 67 std::atomic_size_t valmax = { 0 }; 68 std::atomic_size_t valmin = { 100000000ul }; 69 }; 70 71 void atomic_max(std::atomic_size_t & target, size_t value) { 72 for(;;) { 73 size_t expect = target.load(std::memory_order_relaxed); 74 if(value <= expect) return; 75 bool success = target.compare_exchange_strong(expect, value); 76 if(success) return; 77 } 78 } 79 80 void atomic_min(std::atomic_size_t & target, size_t value) { 81 for(;;) { 82 size_t expect = target.load(std::memory_order_relaxed); 83 if(value >= expect) return; 84 bool success = target.compare_exchange_strong(expect, value); 85 if(success) return; 86 } 87 } 88 89 void tally_stats(global_stat_t & global, local_stat_t & local) { 90 91 global.in += local.in; 92 global.out += local.out; 93 global.empty += local.empty; 94 95 global.crc_in += local.crc_in; 96 global.crc_out += local.crc_out; 97 98 atomic_max(global.valmax, local.valmax); 99 atomic_min(global.valmin, local.valmin); 100 101 relaxed_list<Node>::stats_tls_tally(); 102 } 103 104 void waitfor(double & duration, barrier_t & barrier, std::atomic_bool & done) { 51 __attribute__((noinline)) void run_body( 52 std::atomic<bool>& done, 53 Random & rand, 54 Node * (&my_nodes)[128], 55 local_stat_t & local, 56 relaxed_list<Node> & list 57 ) { 58 while(__builtin_expect(!done.load(std::memory_order_relaxed), true)) { 59 int idx = rand.next() % nodes_per_threads; 60 if (auto node = my_nodes[idx]) { 61 local.crc_in += node->value; 62 list.push(node); 63 my_nodes[idx] = nullptr; 64 local.in++; 65 } 66 else if(auto node = list.pop()) { 67 local.crc_out += node->value; 68 my_nodes[idx] = node; 69 local.out++; 70 } 71 else { 72 local.empty++; 73 } 74 } 75 } 76 77 void run(unsigned nthread, unsigned nqueues, unsigned fill, double duration) { 78 // List being tested 79 relaxed_list<Node> list = { nthread * nqueues }; 80 81 // Barrier for synchronization 82 barrier_t barrier(nthread + 1); 83 84 // Data to check everything is OK 85 struct { 86 std::atomic_size_t in = { 0 }; 87 std::atomic_size_t out = { 0 }; 88 std::atomic_size_t empty = { 0 }; 89 std::atomic_size_t crc_in = { 0 }; 90 std::atomic_size_t crc_out = { 0 }; 91 struct { 92 struct { 93 std::atomic_size_t attempt = { 0 }; 94 std::atomic_size_t success = { 0 }; 95 } push; 96 struct { 97 std::atomic_size_t attempt = { 0 }; 98 std::atomic_size_t success = { 0 }; 99 } pop; 100 } pick; 101 } global; 102 103 // Flag to signal termination 104 std::atomic_bool done = { false }; 105 106 // Prep nodes 107 std::cout << "Initializing "; 108 size_t nnodes = 0; 109 size_t npushed = 0; 110 NodeArray all_nodes[nthread]; 111 for(auto & nodes : all_nodes) { 112 Random rand(rdtscl()); 113 for(auto & node : nodes.array) { 114 auto r = rand.next() % 100; 115 if(r < fill) { 116 node = new Node(rand.next() % 100); 117 nnodes++; 118 } else { 119 node = nullptr; 120 } 121 } 122 123 for(int i = 0; i < 10; i++) { 124 int idx = rand.next() % nodes_per_threads; 125 if (auto node = nodes.array[idx]) { 126 global.crc_in += node->value; 127 list.push(node); 128 npushed++; 129 nodes.array[idx] = nullptr; 130 } 131 } 132 } 133 134 std::cout << nnodes << " nodes " << fill << "% (" << npushed << " pushed)" << std::endl; 135 136 enable_stats = true; 137 138 std::thread * threads[nthread]; 139 unsigned i = 1; 140 for(auto & t : threads) { 141 auto & my_nodes = all_nodes[i - 1].array; 142 t = new std::thread([&done, &list, &barrier, &global, &my_nodes](unsigned tid) { 143 Random rand(tid + rdtscl()); 144 145 local_stat_t local; 146 147 // affinity(tid); 148 149 barrier.wait(tid); 150 151 // EXPERIMENT START 152 153 run_body(done, rand, my_nodes, local, list); 154 155 // EXPERIMENT END 156 157 barrier.wait(tid); 158 159 global.in += local.in; 160 global.out += local.out; 161 global.empty += local.empty; 162 163 for(auto node : my_nodes) { 164 delete node; 165 } 166 167 global.crc_in += local.crc_in; 168 global.crc_out += local.crc_out; 169 170 global.pick.push.attempt += relaxed_list<Node>::tls.pick.push.attempt; 171 global.pick.push.success += relaxed_list<Node>::tls.pick.push.success; 172 global.pick.pop .attempt += relaxed_list<Node>::tls.pick.pop.attempt; 173 global.pick.pop .success += relaxed_list<Node>::tls.pick.pop.success; 174 }, i++); 175 } 176 105 177 std::cout << "Starting" << std::endl; 106 178 auto before = Clock::now(); … … 124 196 duration = durr.count(); 125 197 std::cout << "\rClosing down" << std::endl; 126 } 127 128 void waitfor(double & duration, barrier_t & barrier, const std::atomic_size_t & count) { 129 std::cout << "Starting" << std::endl; 130 auto before = Clock::now(); 131 barrier.wait(0); 132 133 while(true) { 134 usleep(100000); 135 size_t c = count.load(); 136 if( c == 0 ) { 137 break; 138 } 139 std::cout << "\r" << c; 140 std::cout.flush(); 141 } 142 143 barrier.wait(0); 144 auto after = Clock::now(); 145 duration_t durr = after - before; 146 duration = durr.count(); 147 std::cout << "\rClosing down" << std::endl; 148 } 149 150 void print_stats(double duration, unsigned nthread, global_stat_t & global) { 198 199 for(auto t : threads) { 200 t->join(); 201 delete t; 202 } 203 204 enable_stats = false; 205 206 while(auto node = list.pop()) { 207 global.crc_out += node->value; 208 delete node; 209 } 210 151 211 assert(Node::creates == Node::destroys); 152 212 assert(global.crc_in == global.crc_out); … … 164 224 std::cout << "Ops/sec : " << ops_sec << "\n"; 165 225 std::cout << "Total ops : " << ops << "(" << global.in << "i, " << global.out << "o, " << global.empty << "e)\n"; 166 if(global.valmax != 0) {167 std::cout << "Max runs : " << global.valmax << "\n";168 std::cout << "Min runs : " << global.valmin << "\n";169 }170 226 #ifndef NO_STATS 171 relaxed_list<Node>::stats_print(std::cout); 227 double push_sur = (100.0 * double(global.pick.push.success) / global.pick.push.attempt); 228 double pop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .attempt); 229 std::cout << "Push Pick % : " << push_sur << "(" << global.pick.push.success << " / " << global.pick.push.attempt << ")\n"; 230 std::cout << "Pop Pick % : " << pop_sur << "(" << global.pick.pop .success << " / " << global.pick.pop .attempt << ")\n"; 172 231 #endif 173 232 } 174 233 175 void save_fairness(const int data[], int factor, unsigned nthreads, size_t columns, size_t rows, const std::string & output); 176 177 // ================================================================================================ 178 // EXPERIMENTS 179 // ================================================================================================ 180 181 // ================================================================================================ 182 __attribute__((noinline)) void runChurn_body( 183 std::atomic<bool>& done, 184 Random & rand, 185 Node * my_nodes[], 186 unsigned nslots, 187 local_stat_t & local, 188 relaxed_list<Node> & list 189 ) { 190 while(__builtin_expect(!done.load(std::memory_order_relaxed), true)) { 191 int idx = rand.next() % nslots; 192 if (auto node = my_nodes[idx]) { 193 local.crc_in += node->value; 194 list.push(node); 195 my_nodes[idx] = nullptr; 196 local.in++; 197 } 198 else if(auto node = list.pop()) { 199 local.crc_out += node->value; 200 my_nodes[idx] = node; 201 local.out++; 202 } 203 else { 204 local.empty++; 205 } 206 } 207 } 208 209 void runChurn(unsigned nthread, unsigned nqueues, double duration, unsigned nnodes, const unsigned nslots) { 210 std::cout << "Churn Benchmark" << std::endl; 211 assert(nnodes <= nslots); 212 // List being tested 213 214 // Barrier for synchronization 215 barrier_t barrier(nthread + 1); 216 217 // Data to check everything is OK 218 global_stat_t global; 219 220 // Flag to signal termination 221 std::atomic_bool done = { false }; 222 223 // Prep nodes 224 std::cout << "Initializing "; 225 size_t npushed = 0; 226 relaxed_list<Node> list = { nthread * nqueues }; 227 { 228 Node** all_nodes[nthread]; 229 for(auto & nodes : all_nodes) { 230 nodes = new __attribute__((aligned(64))) Node*[nslots + 8]; 231 Random rand(rdtscl()); 232 for(unsigned i = 0; i < nnodes; i++) { 233 nodes[i] = new Node(rand.next() % 100); 234 } 235 236 for(unsigned i = nnodes; i < nslots; i++) { 237 nodes[i] = nullptr; 238 } 239 240 for(int i = 0; i < 10 && i < (int)nslots; i++) { 241 int idx = rand.next() % nslots; 242 if (auto node = nodes[idx]) { 243 global.crc_in += node->value; 244 list.push(node); 245 npushed++; 246 nodes[idx] = nullptr; 247 } 248 } 249 } 250 251 std::cout << nnodes << " nodes (" << nslots << " slots)" << std::endl; 252 253 enable_stats = true; 254 255 std::thread * threads[nthread]; 256 unsigned i = 1; 257 for(auto & t : threads) { 258 auto & my_nodes = all_nodes[i - 1]; 259 t = new std::thread([&done, &list, &barrier, &global, &my_nodes, nslots](unsigned tid) { 260 Random rand(tid + rdtscl()); 261 262 local_stat_t local; 263 264 // affinity(tid); 265 266 barrier.wait(tid); 267 268 // EXPERIMENT START 269 270 runChurn_body(done, rand, my_nodes, nslots, local, list); 271 272 // EXPERIMENT END 273 274 barrier.wait(tid); 275 276 tally_stats(global, local); 277 278 for(unsigned i = 0; i < nslots; i++) { 279 delete my_nodes[i]; 280 } 281 }, i++); 282 } 283 284 waitfor(duration, barrier, done); 285 286 for(auto t : threads) { 287 t->join(); 288 delete t; 289 } 290 291 enable_stats = false; 292 293 while(auto node = list.pop()) { 294 global.crc_out += node->value; 295 delete node; 296 } 297 298 for(auto nodes : all_nodes) { 299 delete[] nodes; 300 } 301 } 302 303 print_stats(duration, nthread, global); 304 } 305 306 // ================================================================================================ 307 __attribute__((noinline)) void runPingPong_body( 308 std::atomic<bool>& done, 309 Node initial_nodes[], 310 unsigned nnodes, 311 local_stat_t & local, 312 relaxed_list<Node> & list 313 ) { 314 Node * nodes[nnodes]; 315 { 316 unsigned i = 0; 317 for(auto & n : nodes) { 318 n = &initial_nodes[i++]; 319 } 320 } 321 322 while(__builtin_expect(!done.load(std::memory_order_relaxed), true)) { 323 324 for(Node * & node : nodes) { 325 local.crc_in += node->value; 326 list.push(node); 327 local.in++; 328 } 329 330 // ----- 331 332 for(Node * & node : nodes) { 333 node = list.pop(); 334 assert(node); 335 local.crc_out += node->value; 336 local.out++; 337 } 338 } 339 } 340 341 void runPingPong(unsigned nthread, unsigned nqueues, double duration, unsigned nnodes) { 342 std::cout << "PingPong Benchmark" << std::endl; 343 344 345 // Barrier for synchronization 346 barrier_t barrier(nthread + 1); 347 348 // Data to check everything is OK 349 global_stat_t global; 350 351 // Flag to signal termination 352 std::atomic_bool done = { false }; 353 354 std::cout << "Initializing "; 355 // List being tested 356 relaxed_list<Node> list = { nthread * nqueues }; 357 { 358 enable_stats = true; 359 360 std::thread * threads[nthread]; 361 unsigned i = 1; 362 for(auto & t : threads) { 363 t = new std::thread([&done, &list, &barrier, &global, nnodes](unsigned tid) { 364 Random rand(tid + rdtscl()); 365 366 Node nodes[nnodes]; 367 for(auto & n : nodes) { 368 n.value = (int)rand.next() % 100; 369 } 370 371 local_stat_t local; 372 373 // affinity(tid); 374 375 barrier.wait(tid); 376 377 // EXPERIMENT START 378 379 runPingPong_body(done, nodes, nnodes, local, list); 380 381 // EXPERIMENT END 382 383 barrier.wait(tid); 384 385 tally_stats(global, local); 386 }, i++); 387 } 388 389 waitfor(duration, barrier, done); 390 391 for(auto t : threads) { 392 t->join(); 393 delete t; 394 } 395 396 enable_stats = false; 397 } 398 399 print_stats(duration, nthread, global); 400 } 401 402 // ================================================================================================ 403 __attribute__((noinline)) void runFairness_body( 404 unsigned tid, 405 size_t width, 406 size_t length, 407 int output[], 408 std::atomic_size_t & count, 409 Node initial_nodes[], 410 unsigned nnodes, 411 local_stat_t & local, 412 relaxed_list<Node> & list 413 ) { 414 Node * nodes[nnodes]; 415 { 416 unsigned i = 0; 417 for(auto & n : nodes) { 418 n = &initial_nodes[i++]; 419 } 420 } 421 422 while(__builtin_expect(0 != count.load(std::memory_order_relaxed), true)) { 423 424 for(Node * & node : nodes) { 425 local.crc_in += node->id; 426 list.push(node); 427 local.in++; 428 } 429 430 // ----- 431 432 for(Node * & node : nodes) { 433 node = list.pop(); 434 assert(node); 435 436 if (unsigned(node->value) < length) { 437 size_t idx = (node->value * width) + node->id; 438 assert(idx < (width * length)); 439 output[idx] = tid; 440 } 441 442 node->value++; 443 if(unsigned(node->value) == length) count--; 444 445 local.crc_out += node->id; 446 local.out++; 447 } 448 } 449 } 450 451 void runFairness(unsigned nthread, unsigned nqueues, double duration, unsigned nnodes, const std::string & output) { 452 std::cout << "Fairness Benchmark, outputing to : " << output << std::endl; 453 454 // Barrier for synchronization 455 barrier_t barrier(nthread + 1); 456 457 // Data to check everything is OK 458 global_stat_t global; 459 460 std::cout << "Initializing "; 461 462 // Check fairness by creating a png of where the threads ran 463 size_t width = nthread * nnodes; 464 size_t length = 100000; 465 466 std::unique_ptr<int[]> data_out { new int[width * length] }; 467 468 // Flag to signal termination 469 std::atomic_size_t count = width; 470 471 // List being tested 472 relaxed_list<Node> list = { nthread * nqueues }; 473 { 474 enable_stats = true; 475 476 std::thread * threads[nthread]; 477 unsigned i = 1; 478 for(auto & t : threads) { 479 t = new std::thread([&count, &list, &barrier, &global, nnodes, width, length, data_out = data_out.get()](unsigned tid) { 480 unsigned int start = (tid - 1) * nnodes; 481 Node nodes[nnodes]; 482 for(auto & n : nodes) { 483 n.id = start; 484 n.value = 0; 485 start++; 486 } 487 488 local_stat_t local; 489 490 // affinity(tid); 491 492 barrier.wait(tid); 493 494 // EXPERIMENT START 495 496 runFairness_body(tid, width, length, data_out, count, nodes, nnodes, local, list); 497 498 // EXPERIMENT END 499 500 barrier.wait(tid); 501 502 for(const auto & n : nodes) { 503 local.valmax = max(local.valmax, size_t(n.value)); 504 local.valmin = min(local.valmin, size_t(n.value)); 505 } 506 507 tally_stats(global, local); 508 }, i++); 509 } 510 511 waitfor(duration, barrier, count); 512 513 for(auto t : threads) { 514 t->join(); 515 delete t; 516 } 517 518 enable_stats = false; 519 } 520 521 print_stats(duration, nthread, global); 522 523 save_fairness(data_out.get(), 100, nthread, width, length, output); 524 } 525 526 // ================================================================================================ 527 528 bool iequals(const std::string& a, const std::string& b) 529 { 530 return std::equal(a.begin(), a.end(), 531 b.begin(), b.end(), 532 [](char a, char b) { 533 return std::tolower(a) == std::tolower(b); 534 }); 234 void usage(char * argv[]) { 235 std::cerr << argv[0] << ": [DURATION (FLOAT:SEC)] [NTHREADS] [NQUEUES] [FILL]" << std::endl;; 236 std::exit(1); 535 237 } 536 238 … … 539 241 double duration = 5.0; 540 242 unsigned nthreads = 2; 541 unsigned nqueues = 4; 542 unsigned nnodes = 100; 543 unsigned nslots = 100; 544 std::string out = "fairness.png"; 545 546 enum { 547 Churn, 548 PingPong, 549 Fairness, 550 NONE 551 } benchmark = NONE; 243 unsigned nqueues = 2; 244 unsigned fill = 100; 552 245 553 246 std::cout.imbue(std::locale("")); 554 247 555 for(;;) { 556 static struct option options[] = { 557 {"duration", required_argument, 0, 'd'}, 558 {"nthreads", required_argument, 0, 't'}, 559 {"nqueues", required_argument, 0, 'q'}, 560 {"benchmark", required_argument, 0, 'b'}, 561 {0, 0, 0, 0} 562 }; 563 564 int idx = 0; 565 int opt = getopt_long(argc, argv, "d:t:q:b:", options, &idx); 566 567 std::string arg = optarg ? optarg : ""; 568 size_t len = 0; 569 switch(opt) { 570 // Exit Case 571 case -1: 572 /* paranoid */ assert(optind <= argc); 573 switch(benchmark) { 574 case NONE: 575 std::cerr << "Must specify a benchmark" << std::endl; 576 goto usage; 577 case PingPong: 578 nnodes = 1; 579 nslots = 1; 580 switch(argc - optind) { 581 case 0: break; 582 case 1: 583 try { 584 arg = optarg = argv[optind]; 585 nnodes = stoul(optarg, &len); 586 if(len != arg.size()) { throw std::invalid_argument(""); } 587 } catch(std::invalid_argument &) { 588 std::cerr << "Number of nodes must be a positive integer, was " << arg << std::endl; 589 goto usage; 590 } 591 break; 592 default: 593 std::cerr << "'PingPong' benchmark doesn't accept more than 2 extra arguments" << std::endl; 594 goto usage; 595 } 596 break; 597 case Churn: 598 nnodes = 100; 599 nslots = 100; 600 switch(argc - optind) { 601 case 0: break; 602 case 1: 603 try { 604 arg = optarg = argv[optind]; 605 nnodes = stoul(optarg, &len); 606 if(len != arg.size()) { throw std::invalid_argument(""); } 607 nslots = nnodes; 608 } catch(std::invalid_argument &) { 609 std::cerr << "Number of nodes must be a positive integer, was " << arg << std::endl; 610 goto usage; 611 } 612 break; 613 case 2: 614 try { 615 arg = optarg = argv[optind]; 616 nnodes = stoul(optarg, &len); 617 if(len != arg.size()) { throw std::invalid_argument(""); } 618 } catch(std::invalid_argument &) { 619 std::cerr << "Number of nodes must be a positive integer, was " << arg << std::endl; 620 goto usage; 621 } 622 try { 623 arg = optarg = argv[optind + 1]; 624 nslots = stoul(optarg, &len); 625 if(len != arg.size()) { throw std::invalid_argument(""); } 626 } catch(std::invalid_argument &) { 627 std::cerr << "Number of slots must be a positive integer, was " << arg << std::endl; 628 goto usage; 629 } 630 break; 631 default: 632 std::cerr << "'Churn' benchmark doesn't accept more than 2 extra arguments" << std::endl; 633 goto usage; 634 } 635 break; 636 case Fairness: 637 nnodes = 1; 638 switch(argc - optind) { 639 case 0: break; 640 case 1: 641 arg = optarg = argv[optind]; 642 out = arg; 643 break; 644 default: 645 std::cerr << "'Churn' benchmark doesn't accept more than 2 extra arguments" << std::endl; 646 goto usage; 647 } 648 } 649 goto run; 650 // Benchmarks 651 case 'b': 652 if(benchmark != NONE) { 653 std::cerr << "Only when benchmark can be run" << std::endl; 654 goto usage; 655 } 656 if(iequals(arg, "churn")) { 657 benchmark = Churn; 658 break; 659 } 660 if(iequals(arg, "pingpong")) { 661 benchmark = PingPong; 662 break; 663 } 664 if(iequals(arg, "fairness")) { 665 benchmark = Fairness; 666 break; 667 } 668 std::cerr << "Unkown benchmark " << arg << std::endl; 669 goto usage; 670 // Numeric Arguments 671 case 'd': 672 try { 673 duration = stod(optarg, &len); 674 if(len != arg.size()) { throw std::invalid_argument(""); } 675 } catch(std::invalid_argument &) { 676 std::cerr << "Duration must be a valid double, was " << arg << std::endl; 677 goto usage; 678 } 679 break; 680 case 't': 681 try { 682 nthreads = stoul(optarg, &len); 683 if(len != arg.size()) { throw std::invalid_argument(""); } 684 } catch(std::invalid_argument &) { 685 std::cerr << "Number of threads must be a positive integer, was " << arg << std::endl; 686 goto usage; 687 } 688 break; 689 case 'q': 690 try { 691 nqueues = stoul(optarg, &len); 692 if(len != arg.size()) { throw std::invalid_argument(""); } 693 } catch(std::invalid_argument &) { 694 std::cerr << "Number of queues must be a positive integer, was " << arg << std::endl; 695 goto usage; 696 } 697 break; 698 // Other cases 699 default: /* ? */ 700 std::cerr << opt << std::endl; 701 usage: 702 std::cerr << "Usage: " << argv[0] << ": [options] -b churn [NNODES] [NSLOTS = NNODES]" << std::endl; 703 std::cerr << " or: " << argv[0] << ": [options] -b pingpong [NNODES]" << std::endl; 704 std::cerr << std::endl; 705 std::cerr << " -d, --duration=DURATION Duration of the experiment, in seconds" << std::endl; 706 std::cerr << " -t, --nthreads=NTHREADS Number of kernel threads" << std::endl; 707 std::cerr << " -q, --nqueues=NQUEUES Number of queues per threads" << std::endl; 708 std::exit(1); 709 } 710 } 711 run: 248 switch (argc) 249 { 250 case 5: 251 fill = std::stoul(argv[4]); 252 [[fallthrough]]; 253 case 4: 254 nqueues = std::stoul(argv[3]); 255 [[fallthrough]]; 256 case 3: 257 nthreads = std::stoul(argv[2]); 258 [[fallthrough]]; 259 case 2: 260 duration = std::stod(argv[1]); 261 if( duration <= 0.0 ) { 262 std::cerr << "Duration must be positive, was " << argv[1] << "(" << duration << ")" << std::endl; 263 usage(argv); 264 } 265 [[fallthrough]]; 266 case 1: 267 break; 268 default: 269 usage(argv); 270 break; 271 } 712 272 713 273 check_cache_line_size(); 714 274 715 275 std::cout << "Running " << nthreads << " threads (" << (nthreads * nqueues) << " queues) for " << duration << " seconds" << std::endl; 716 switch(benchmark) { 717 case Churn: 718 runChurn(nthreads, nqueues, duration, nnodes, nslots); 719 break; 720 case PingPong: 721 runPingPong(nthreads, nqueues, duration, nnodes); 722 break; 723 case Fairness: 724 runFairness(nthreads, nqueues, duration, nnodes, out); 725 break; 726 default: 727 abort(); 728 } 276 run(nthreads, nqueues, fill, duration); 277 729 278 return 0; 730 279 } 731 280 281 template<> 282 thread_local relaxed_list<Node>::TLS relaxed_list<Node>::tls = {}; 283 284 template<> 285 relaxed_list<Node>::intrusive_queue_t::stat::Dif relaxed_list<Node>::intrusive_queue_t::stat::dif = {}; 286 732 287 const char * __my_progname = "Relaxed List"; 733 734 struct rgb_t {735 double r; // a fraction between 0 and 1736 double g; // a fraction between 0 and 1737 double b; // a fraction between 0 and 1738 };739 740 struct hsv_t {741 double h; // angle in degrees742 double s; // a fraction between 0 and 1743 double v; // a fraction between 0 and 1744 };745 746 rgb_t hsv2rgb(hsv_t in) {747 double hh, p, q, t, ff;748 long i;749 rgb_t out;750 751 if(in.s <= 0.0) { // < is bogus, just shuts up warnings752 out.r = in.v;753 out.g = in.v;754 out.b = in.v;755 return out;756 }757 hh = in.h;758 if(hh >= 360.0) hh = 0.0;759 hh /= 60.0;760 i = (long)hh;761 ff = hh - i;762 p = in.v * (1.0 - in.s);763 q = in.v * (1.0 - (in.s * ff));764 t = in.v * (1.0 - (in.s * (1.0 - ff)));765 766 switch(i) {767 case 0:768 out.r = in.v;769 out.g = t;770 out.b = p;771 break;772 case 1:773 out.r = q;774 out.g = in.v;775 out.b = p;776 break;777 case 2:778 out.r = p;779 out.g = in.v;780 out.b = t;781 break;782 783 case 3:784 out.r = p;785 out.g = q;786 out.b = in.v;787 break;788 case 4:789 out.r = t;790 out.g = p;791 out.b = in.v;792 break;793 case 5:794 default:795 out.r = in.v;796 out.g = p;797 out.b = q;798 break;799 }800 return out;801 }802 803 void save_fairness(const int data[], int factor, unsigned nthreads, size_t columns, size_t rows, const std::string & output) {804 std::ofstream os(output);805 os << "<html>\n";806 os << "<head>\n";807 os << "<style>\n";808 os << "</style>\n";809 os << "</head>\n";810 os << "<body>\n";811 os << "<table style=\"width=100%\">\n";812 813 size_t idx = 0;814 for(size_t r = 0ul; r < rows; r++) {815 os << "<tr>\n";816 for(size_t c = 0ul; c < columns; c++) {817 os << "<td class=\"custom custom" << data[idx] << "\"></td>\n";818 idx++;819 }820 os << "</tr>\n";821 }822 823 os << "</table>\n";824 os << "</body>\n";825 os << "</html>\n";826 os << std::endl;827 }828 829 #include <png.h>830 #include <setjmp.h>831 832 /*833 void save_fairness(const int data[], int factor, unsigned nthreads, size_t columns, size_t rows, const std::string & output) {834 int width = columns * factor;835 int height = rows / factor;836 837 int code = 0;838 int idx = 0;839 FILE *fp = NULL;840 png_structp png_ptr = NULL;841 png_infop info_ptr = NULL;842 png_bytep row = NULL;843 844 // Open file for writing (binary mode)845 fp = fopen(output.c_str(), "wb");846 if (fp == NULL) {847 fprintf(stderr, "Could not open file %s for writing\n", output.c_str());848 code = 1;849 goto finalise;850 }851 852 // Initialize write structure853 png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);854 if (png_ptr == NULL) {855 fprintf(stderr, "Could not allocate write struct\n");856 code = 1;857 goto finalise;858 }859 860 // Initialize info structure861 info_ptr = png_create_info_struct(png_ptr);862 if (info_ptr == NULL) {863 fprintf(stderr, "Could not allocate info struct\n");864 code = 1;865 goto finalise;866 }867 868 // Setup Exception handling869 if (setjmp(png_jmpbuf(png_ptr))) {870 fprintf(stderr, "Error during png creation\n");871 code = 1;872 goto finalise;873 }874 875 png_init_io(png_ptr, fp);876 877 // Write header (8 bit colour depth)878 png_set_IHDR(png_ptr, info_ptr, width, height,879 8, PNG_COLOR_TYPE_RGB, PNG_INTERLACE_NONE,880 PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE);881 882 png_write_info(png_ptr, info_ptr);883 884 // Allocate memory for one row (3 bytes per pixel - RGB)885 row = (png_bytep) malloc(3 * width * sizeof(png_byte));886 887 // Write image data888 int x, y;889 for (y=0 ; y<height ; y++) {890 for (x=0 ; x<width ; x++) {891 auto & r = row[(x * 3) + 0];892 auto & g = row[(x * 3) + 1];893 auto & b = row[(x * 3) + 2];894 assert(idx < (rows * columns));895 int color = data[idx] - 1;896 assert(color < nthreads);897 assert(color >= 0);898 idx++;899 900 double angle = double(color) / double(nthreads);901 902 auto c = hsv2rgb({ 360.0 * angle, 0.8, 0.8 });903 904 r = char(c.r * 255.0);905 g = char(c.g * 255.0);906 b = char(c.b * 255.0);907 908 }909 png_write_row(png_ptr, row);910 }911 912 assert(idx == (rows * columns));913 914 // End write915 png_write_end(png_ptr, NULL);916 917 finalise:918 if (fp != NULL) fclose(fp);919 if (info_ptr != NULL) png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1);920 if (png_ptr != NULL) png_destroy_write_struct(&png_ptr, (png_infopp)NULL);921 if (row != NULL) free(row);922 }923 */ -
doc/theses/thierry_delisle_PhD/code/relaxed_list.hpp
r3b56166 rd231700 37 37 }; 38 38 39 static inline bool bts(std::atomic_size_t & target, size_t bit ) {40 //*41 int result = 0;42 asm volatile(43 "LOCK btsq %[bit], %[target]\n\t"44 :"=@ccc" (result)45 : [target] "m" (target), [bit] "r" (bit)46 );47 return result != 0;48 /*/49 size_t mask = 1ul << bit;50 size_t ret = target.fetch_or(mask, std::memory_order_relaxed);51 return (ret & mask) != 0;52 //*/53 }54 55 static inline bool btr(std::atomic_size_t & target, size_t bit ) {56 //*57 int result = 0;58 asm volatile(59 "LOCK btrq %[bit], %[target]\n\t"60 :"=@ccc" (result)61 : [target] "m" (target), [bit] "r" (bit)62 );63 return result != 0;64 /*/65 size_t mask = 1ul << bit;66 size_t ret = target.fetch_and(~mask, std::memory_order_relaxed);67 return (ret & mask) != 0;68 //*/69 }70 39 71 40 extern bool enable_stats; … … 79 48 size_t attempt = 0; 80 49 size_t success = 0; 81 size_t mask_attempt = 0;82 } pop;83 };84 85 struct empty_stat {86 struct {87 size_t value = 0;88 size_t count = 0;89 } push;90 struct {91 size_t value = 0;92 size_t count = 0;93 50 } pop; 94 51 }; … … 105 62 static_assert(std::is_same<decltype(node_t::_links), _LinksFields_t<node_t>>::value, "Node must have a links field"); 106 63 64 107 65 public: 108 66 relaxed_list(unsigned numLists) 109 : lists(new intrusive_queue_t[numLists]) 67 : numNonEmpty{0} 68 , lists(new intrusive_queue_t[numLists]) 110 69 , numLists(numLists) 111 { 112 assertf(7 * 8 * 8 >= numLists, "List currently only supports 448 sublists"); 113 // assert(sizeof(*this) == 128); 114 std::cout << "Constructing Relaxed List with " << numLists << std::endl; 115 70 {} 71 72 ~relaxed_list() { 73 lists.reset(); 116 74 #ifndef NO_STATS 117 if(head) this->next = head; 118 head = this; 75 std::cout << "Difference : " 76 << ssize_t(double(intrusive_queue_t::stat::dif.value) / intrusive_queue_t::stat::dif.num ) << " avg\t" 77 << intrusive_queue_t::stat::dif.max << "max" << std::endl; 119 78 #endif 120 }121 122 ~relaxed_list() {123 std::cout << "Destroying Relaxed List" << std::endl;124 lists.reset();125 79 } 126 80 … … 130 84 while(true) { 131 85 // Pick a random list 132 unsignedi = tls.rng.next() % numLists;86 int i = tls.rng.next() % numLists; 133 87 134 88 #ifndef NO_STATS … … 139 93 if( !lists[i].lock.try_lock() ) continue; 140 94 141 __attribute__((unused)) int num = numNonEmpty;142 143 95 // Actually push it 144 if(lists[i].push(node)) { 145 numNonEmpty++; 146 size_t qword = i >> 6ull; 147 size_t bit = i & 63ull; 148 assertf((list_mask[qword] & (1ul << bit)) == 0, "Before set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit)); 149 __attribute__((unused)) bool ret = bts(list_mask[qword], bit); 150 assert(!ret); 151 assertf((list_mask[qword] & (1ul << bit)) != 0, "After set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit)); 152 } 96 lists[i].push(node, numNonEmpty); 153 97 assert(numNonEmpty <= (int)numLists); 154 98 … … 158 102 #ifndef NO_STATS 159 103 tls.pick.push.success++; 160 tls.empty.push.value += num;161 tls.empty.push.count += 1;162 104 #endif 163 105 return; … … 166 108 167 109 __attribute__((noinline, hot)) node_t * pop() { 168 #if !defined(NO_BITMASK) 169 // for(int r = 0; r < 10 && numNonEmpty != 0; r++) { 170 // // Pick two lists at random 171 // unsigned i = tls.rng.next() % numLists; 172 // unsigned j = tls.rng.next() % numLists; 173 174 // if(auto node = try_pop(i, j)) return node; 175 // } 176 int nnempty; 177 while(0 != (nnempty = numNonEmpty)) { 178 tls.pick.pop.mask_attempt++; 179 unsigned i, j; 180 // if( numLists < 4 || (numLists / nnempty) < 4 ) { 181 // // Pick two lists at random 182 // i = tls.rng.next() % numLists; 183 // j = tls.rng.next() % numLists; 184 // } else 185 { 186 #ifndef NO_STATS 187 // tls.pick.push.mask_attempt++; 188 #endif 189 190 // Pick two lists at random 191 unsigned num = ((numLists - 1) >> 6) + 1; 192 193 unsigned ri = tls.rng.next(); 194 unsigned rj = tls.rng.next(); 195 196 unsigned wdxi = (ri >> 6u) % num; 197 unsigned wdxj = (rj >> 6u) % num; 198 199 size_t maski = list_mask[wdxi].load(std::memory_order_relaxed); 200 size_t maskj = list_mask[wdxj].load(std::memory_order_relaxed); 201 202 if(maski == 0 && maskj == 0) continue; 203 204 unsigned bi = rand_bit(ri, maski); 205 unsigned bj = rand_bit(rj, maskj); 206 207 assertf(bi < 64, "%zu %u", maski, bi); 208 assertf(bj < 64, "%zu %u", maskj, bj); 209 210 i = bi | (wdxi << 6); 211 j = bj | (wdxj << 6); 212 213 assertf(i < numLists, "%u", wdxi << 6); 214 assertf(j < numLists, "%u", wdxj << 6); 215 } 216 217 if(auto node = try_pop(i, j)) return node; 218 } 219 #else 220 while(numNonEmpty != 0) { 221 // Pick two lists at random 222 int i = tls.rng.next() % numLists; 223 int j = tls.rng.next() % numLists; 224 225 if(auto node = try_pop(i, j)) return node; 226 } 227 #endif 110 while(numNonEmpty != 0) { 111 // Pick two lists at random 112 int i = tls.rng.next() % numLists; 113 int j = tls.rng.next() % numLists; 114 115 #ifndef NO_STATS 116 tls.pick.pop.attempt++; 117 #endif 118 119 // Pick the bet list 120 int w = i; 121 if( __builtin_expect(lists[j].ts() != 0, true) ) { 122 w = (lists[i].ts() < lists[j].ts()) ? i : j; 123 } 124 125 auto & list = lists[w]; 126 // If list looks empty retry 127 if( list.ts() == 0 ) continue; 128 129 // If we can't get the lock retry 130 if( !list.lock.try_lock() ) continue; 131 132 // If list is empty, unlock and retry 133 if( list.ts() == 0 ) { 134 list.lock.unlock(); 135 continue; 136 } 137 138 // Actually pop the list 139 auto node = list.pop(numNonEmpty); 140 assert(node); 141 142 // Unlock and return 143 list.lock.unlock(); 144 assert(numNonEmpty >= 0); 145 #ifndef NO_STATS 146 tls.pick.pop.success++; 147 #endif 148 return node; 149 } 228 150 229 151 return nullptr; 230 152 } 231 232 private:233 node_t * try_pop(unsigned i, unsigned j) {234 #ifndef NO_STATS235 tls.pick.pop.attempt++;236 #endif237 238 // Pick the bet list239 int w = i;240 if( __builtin_expect(lists[j].ts() != 0, true) ) {241 w = (lists[i].ts() < lists[j].ts()) ? i : j;242 }243 244 auto & list = lists[w];245 // If list looks empty retry246 if( list.ts() == 0 ) return nullptr;247 248 // If we can't get the lock retry249 if( !list.lock.try_lock() ) return nullptr;250 251 __attribute__((unused)) int num = numNonEmpty;252 253 // If list is empty, unlock and retry254 if( list.ts() == 0 ) {255 list.lock.unlock();256 return nullptr;257 }258 259 // Actually pop the list260 node_t * node;261 bool emptied;262 std::tie(node, emptied) = list.pop();263 assert(node);264 265 if(emptied) {266 numNonEmpty--;267 size_t qword = w >> 6ull;268 size_t bit = w & 63ull;269 assert((list_mask[qword] & (1ul << bit)) != 0);270 __attribute__((unused)) bool ret = btr(list_mask[qword], bit);271 assert(ret);272 assert((list_mask[qword] & (1ul << bit)) == 0);273 }274 275 // Unlock and return276 list.lock.unlock();277 assert(numNonEmpty >= 0);278 #ifndef NO_STATS279 tls.pick.pop.success++;280 tls.empty.pop.value += num;281 tls.empty.pop.count += 1;282 #endif283 return node;284 }285 153 286 154 private: … … 294 162 struct stat { 295 163 ssize_t diff = 0; 296 size_t push = 0; 297 size_t pop = 0; 298 // size_t value = 0; 299 // size_t count = 0; 164 165 static struct Dif { 166 ssize_t value = 0; 167 size_t num = 0; 168 ssize_t max = 0; 169 } dif; 300 170 }; 301 171 … … 308 178 sentinel_t before; 309 179 sentinel_t after; 310 #ifndef NO_STATS 311 stat s; 312 #endif 313 314 #pragma GCC diagnostic push 315 #pragma GCC diagnostic ignored "-Winvalid-offsetof" 180 stat s; 181 316 182 static constexpr auto fields_offset = offsetof( node_t, _links ); 317 #pragma GCC diagnostic pop318 183 public: 319 184 intrusive_queue_t() … … 321 186 , after {{ head(), nullptr }} 322 187 { 323 /* paranoid */ assert((reinterpret_cast<uintptr_t>( head() ) + fields_offset) == reinterpret_cast<uintptr_t>(&before)); 324 /* paranoid */ assert((reinterpret_cast<uintptr_t>( tail() ) + fields_offset) == reinterpret_cast<uintptr_t>(&after )); 325 /* paranoid */ assert(head()->_links.prev == nullptr); 326 /* paranoid */ assert(head()->_links.next == tail() ); 327 /* paranoid */ assert(tail()->_links.next == nullptr); 328 /* paranoid */ assert(tail()->_links.prev == head() ); 329 /* paranoid */ assert(sizeof(*this) == 128); 330 /* paranoid */ assert((intptr_t(this) % 128) == 0); 331 } 332 333 ~intrusive_queue_t() = default; 188 assert((reinterpret_cast<uintptr_t>( head() ) + fields_offset) == reinterpret_cast<uintptr_t>(&before)); 189 assert((reinterpret_cast<uintptr_t>( tail() ) + fields_offset) == reinterpret_cast<uintptr_t>(&after )); 190 assert(head()->_links.prev == nullptr); 191 assert(head()->_links.next == tail() ); 192 assert(tail()->_links.next == nullptr); 193 assert(tail()->_links.prev == head() ); 194 assert(sizeof(*this) == 128); 195 assert((intptr_t(this) % 128) == 0); 196 } 197 198 ~intrusive_queue_t() { 199 #ifndef NO_STATS 200 stat::dif.value+= s.diff; 201 stat::dif.num ++; 202 stat::dif.max = std::abs(stat::dif.max) > std::abs(s.diff) ? stat::dif.max : s.diff; 203 #endif 204 } 334 205 335 206 inline node_t * head() const { … … 349 220 } 350 221 351 inline bool push(node_t * node) {222 inline void push(node_t * node, std::atomic_int & nonEmpty) { 352 223 assert(lock); 353 224 assert(node->_links.ts != 0); … … 361 232 prev->_links.next = node; 362 233 tail->_links.prev = node; 363 #ifndef NO_STATS364 if(enable_stats) {365 s.diff++;366 s.push++;367 }368 #endif369 234 if(before._links.ts == 0l) { 235 nonEmpty += 1; 370 236 before._links.ts = node->_links.ts; 371 assert(node->_links.prev == this->head());372 return true;373 }374 return false;375 } 376 377 inline std::pair<node_t *, bool> pop() {237 } 238 #ifndef NO_STATS 239 if(enable_stats) s.diff++; 240 #endif 241 } 242 243 inline node_t * pop(std::atomic_int & nonEmpty) { 378 244 assert(lock); 379 245 node_t * head = this->head(); … … 382 248 node_t * node = head->_links.next; 383 249 node_t * next = node->_links.next; 384 if(node == tail) return {nullptr, false};250 if(node == tail) return nullptr; 385 251 386 252 head->_links.next = next; 387 253 next->_links.prev = head; 388 254 389 #ifndef NO_STATS390 if(enable_stats) {391 s.diff--;392 s.pop ++;393 }394 #endif395 255 if(next == tail) { 396 256 before._links.ts = 0l; 397 return {node, true};257 nonEmpty -= 1; 398 258 } 399 259 else { … … 401 261 before._links.ts = next->_links.ts; 402 262 assert(before._links.ts != 0); 403 return {node, false}; 404 } 263 } 264 #ifndef NO_STATS 265 if(enable_stats) s.diff--; 266 #endif 267 return node; 405 268 } 406 269 … … 414 277 415 278 static __attribute__((aligned(128))) thread_local struct TLS { 416 Random rng = { int(rdtscl()) }; 417 pick_stat pick; 418 empty_stat empty; 279 Random rng = { int(rdtscl()) }; 280 pick_stat pick; 419 281 } tls; 420 282 421 public:422 std::atomic_int numNonEmpty = { 0 }; // number of non-empty lists423 std::atomic_size_t list_mask[7] = { {0}, {0}, {0}, {0}, {0}, {0}, {0} }; // which queues are empty424 283 private: 284 std::atomic_int numNonEmpty; // number of non-empty lists 425 285 __attribute__((aligned(64))) std::unique_ptr<intrusive_queue_t []> lists; 426 286 const unsigned numLists; … … 428 288 public: 429 289 static const constexpr size_t sizeof_queue = sizeof(intrusive_queue_t); 430 431 #ifndef NO_STATS 432 static void stats_print(std::ostream & os) { 433 auto it = head; 434 while(it) { 435 it->stats_print_local(os); 436 it = it->next; 437 } 438 } 439 440 static void stats_tls_tally() { 441 global_stats.pick.push.attempt += tls.pick.push.attempt; 442 global_stats.pick.push.success += tls.pick.push.success; 443 global_stats.pick.pop .attempt += tls.pick.pop.attempt; 444 global_stats.pick.pop .success += tls.pick.pop.success; 445 global_stats.pick.pop .mask_attempt += tls.pick.pop.mask_attempt; 446 447 global_stats.qstat.push.value += tls.empty.push.value; 448 global_stats.qstat.push.count += tls.empty.push.count; 449 global_stats.qstat.pop .value += tls.empty.pop .value; 450 global_stats.qstat.pop .count += tls.empty.pop .count; 451 } 452 453 private: 454 static struct GlobalStats { 455 struct { 456 struct { 457 std::atomic_size_t attempt = { 0 }; 458 std::atomic_size_t success = { 0 }; 459 } push; 460 struct { 461 std::atomic_size_t attempt = { 0 }; 462 std::atomic_size_t success = { 0 }; 463 std::atomic_size_t mask_attempt = { 0 }; 464 } pop; 465 } pick; 466 struct { 467 struct { 468 std::atomic_size_t value = { 0 }; 469 std::atomic_size_t count = { 0 }; 470 } push; 471 struct { 472 std::atomic_size_t value = { 0 }; 473 std::atomic_size_t count = { 0 }; 474 } pop; 475 } qstat; 476 } global_stats; 477 478 // Link list of all lists for stats 479 __attribute__((aligned(64))) relaxed_list<node_t> * next = nullptr; 480 481 static relaxed_list<node_t> * head; 482 483 void stats_print_local(std::ostream & os ) { 484 std::cout << "----- Relaxed List Stats -----" << std::endl; 485 { 486 ssize_t diff = 0; 487 size_t num = 0; 488 ssize_t max = 0; 489 490 for(size_t i = 0; i < numLists; i++) { 491 const auto & list = lists[i]; 492 diff+= list.s.diff; 493 num ++; 494 max = std::abs(max) > std::abs(list.s.diff) ? max : list.s.diff; 495 os << "Local Q ops : " << (list.s.push + list.s.pop) << "(" << list.s.push << "i, " << list.s.pop << "o)\n"; 496 } 497 498 os << "Difference : " << ssize_t(double(diff) / num ) << " avg\t" << max << "max" << std::endl; 499 } 500 501 const auto & global = global_stats; 502 503 double push_sur = (100.0 * double(global.pick.push.success) / global.pick.push.attempt); 504 double pop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .attempt); 505 double mpop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .mask_attempt); 506 507 os << "Push Pick % : " << push_sur << "(" << global.pick.push.success << " / " << global.pick.push.attempt << ")\n"; 508 os << "Pop Pick % : " << pop_sur << "(" << global.pick.pop .success << " / " << global.pick.pop .attempt << ")\n"; 509 os << "TryPop Pick % : " << mpop_sur << "(" << global.pick.pop .success << " / " << global.pick.pop .mask_attempt << ")\n"; 510 511 double avgQ_push = double(global.qstat.push.value) / global.qstat.push.count; 512 double avgQ_pop = double(global.qstat.pop .value) / global.qstat.pop .count; 513 double avgQ = double(global.qstat.push.value + global.qstat.pop .value) / (global.qstat.push.count + global.qstat.pop .count); 514 os << "Push Avg Qs : " << avgQ_push << " (" << global.qstat.push.count << "ops)\n"; 515 os << "Pop Avg Qs : " << avgQ_pop << " (" << global.qstat.pop .count << "ops)\n"; 516 os << "Global Avg Qs : " << avgQ << " (" << (global.qstat.push.count + global.qstat.pop .count) << "ops)\n"; 517 } 518 #endif 519 }; 290 }; -
doc/theses/thierry_delisle_PhD/code/utils.hpp
r3b56166 rd231700 10 10 #include <unistd.h> 11 11 #include <sys/sysinfo.h> 12 13 #include <x86intrin.h>14 12 15 13 // Barrier from … … 58 56 } 59 57 60 static inlinevoid affinity(int tid) {58 void affinity(int tid) { 61 59 static int cpus = get_nprocs(); 62 60 … … 72 70 73 71 static const constexpr std::size_t cache_line_size = 64; 74 static inlinevoid check_cache_line_size() {72 void check_cache_line_size() { 75 73 std::cout << "Checking cache line size" << std::endl; 76 74 const std::string cache_file = "/sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size"; … … 105 103 return std::chrono::duration_cast<std::chrono::duration<T, Ratio>>(std::chrono::duration<T>(seconds)).count(); 106 104 } 107 108 static inline unsigned rand_bit(unsigned rnum, size_t mask) {109 unsigned bit = mask ? rnum % __builtin_popcountl(mask) : 0;110 #if !defined(__BMI2__)111 uint64_t v = mask; // Input value to find position with rank r.112 unsigned int r = bit + 1;// Input: bit's desired rank [1-64].113 unsigned int s; // Output: Resulting position of bit with rank r [1-64]114 uint64_t a, b, c, d; // Intermediate temporaries for bit count.115 unsigned int t; // Bit count temporary.116 117 // Do a normal parallel bit count for a 64-bit integer,118 // but store all intermediate steps.119 a = v - ((v >> 1) & ~0UL/3);120 b = (a & ~0UL/5) + ((a >> 2) & ~0UL/5);121 c = (b + (b >> 4)) & ~0UL/0x11;122 d = (c + (c >> 8)) & ~0UL/0x101;123 124 125 t = (d >> 32) + (d >> 48);126 // Now do branchless select!127 s = 64;128 s -= ((t - r) & 256) >> 3; r -= (t & ((t - r) >> 8));129 t = (d >> (s - 16)) & 0xff;130 s -= ((t - r) & 256) >> 4; r -= (t & ((t - r) >> 8));131 t = (c >> (s - 8)) & 0xf;132 s -= ((t - r) & 256) >> 5; r -= (t & ((t - r) >> 8));133 t = (b >> (s - 4)) & 0x7;134 s -= ((t - r) & 256) >> 6; r -= (t & ((t - r) >> 8));135 t = (a >> (s - 2)) & 0x3;136 s -= ((t - r) & 256) >> 7; r -= (t & ((t - r) >> 8));137 t = (v >> (s - 1)) & 0x1;138 s -= ((t - r) & 256) >> 8;139 return s - 1;140 #else141 uint64_t picked = _pdep_u64(1ul << bit, mask);142 return picked ? __builtin_ctzl(picked) : 0;143 #endif144 }
Note:
See TracChangeset
for help on using the changeset viewer.