Changeset 22f94a4 for doc/theses/thierry_delisle_PhD/code/relaxed_list.cpp
- Timestamp:
- Aug 11, 2020, 4:40:15 PM (5 years ago)
- Branches:
- ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast, new-ast-unique-expr, pthread-emulation, qualifiedEnum
- Children:
- 0d070ca
- Parents:
- 07d867b (diff), 129674b (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)links above to see all the changes relative to each parent. - File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
doc/theses/thierry_delisle_PhD/code/relaxed_list.cpp
r07d867b r22f94a4 1 #include "relaxed_list.hpp" 1 #if !defined(LIST_VARIANT_HPP) 2 #define LIST_VARIANT_HPP "relaxed_list.hpp" 3 #endif 4 5 #include LIST_VARIANT_HPP 6 #if !defined(LIST_VARIANT) 7 #error not variant selected 8 #endif 2 9 3 10 #include <array> … … 35 42 36 43 template<> 37 thread_local relaxed_list<Node>::TLS relaxed_list<Node>::tls = {};44 thread_local LIST_VARIANT<Node>::TLS LIST_VARIANT<Node>::tls = {}; 38 45 39 46 template<> 40 relaxed_list<Node> * relaxed_list<Node>::head = nullptr;47 std::atomic_uint32_t LIST_VARIANT<Node>::ticket = { 0 }; 41 48 42 49 #ifndef NO_STATS 43 50 template<> 44 relaxed_list<Node>::GlobalStats relaxed_list<Node>::global_stats = {};51 LIST_VARIANT<Node>::GlobalStats LIST_VARIANT<Node>::global_stats = {}; 45 52 #endif 46 53 … … 57 64 size_t valmax = 0; 58 65 size_t valmin = 100000000ul; 66 struct { 67 size_t val = 0; 68 size_t cnt = 0; 69 } comp; 70 struct { 71 size_t val = 0; 72 size_t cnt = 0; 73 } subm; 59 74 }; 60 75 … … 67 82 std::atomic_size_t valmax = { 0 }; 68 83 std::atomic_size_t valmin = { 100000000ul }; 84 struct { 85 std::atomic_size_t val = { 0 }; 86 std::atomic_size_t cnt = { 0 }; 87 } comp; 88 struct { 89 std::atomic_size_t val = { 0 }; 90 std::atomic_size_t cnt = { 0 }; 91 } subm; 69 92 }; 70 93 … … 96 119 global.crc_out += local.crc_out; 97 120 121 global.comp.val += local.comp.val; 122 global.comp.cnt += local.comp.cnt; 123 global.subm.val += local.subm.val; 124 global.subm.cnt += local.subm.cnt; 125 98 126 atomic_max(global.valmax, local.valmax); 99 127 atomic_min(global.valmin, local.valmin); 100 128 101 relaxed_list<Node>::stats_tls_tally();129 LIST_VARIANT<Node>::stats_tls_tally(); 102 130 } 103 131 … … 106 134 auto before = Clock::now(); 107 135 barrier.wait(0); 136 bool is_tty = isatty(STDOUT_FILENO); 108 137 109 138 while(true) { … … 115 144 break; 116 145 } 117 std::cout << "\r" << std::setprecision(4) << durr.count(); 118 std::cout.flush(); 146 if(is_tty) { 147 std::cout << "\r" << std::setprecision(4) << durr.count(); 148 std::cout.flush(); 149 } 119 150 } 120 151 … … 159 190 auto dur_nano = duration_cast<std::nano>(1.0); 160 191 192 if(global.valmax != 0) { 193 std::cout << "Max runs : " << global.valmax << "\n"; 194 std::cout << "Min runs : " << global.valmin << "\n"; 195 } 196 if(global.comp.cnt != 0) { 197 std::cout << "Submit count : " << global.subm.cnt << "\n"; 198 std::cout << "Submit average: " << ((double(global.subm.val)) / global.subm.cnt) << "\n"; 199 std::cout << "Complete count: " << global.comp.cnt << "\n"; 200 std::cout << "Complete avg : " << ((double(global.comp.val)) / global.comp.cnt) << "\n"; 201 } 161 202 std::cout << "Duration : " << duration << "s\n"; 162 203 std::cout << "ns/Op : " << ( dur_nano / ops_thread )<< "\n"; … … 164 205 std::cout << "Ops/sec : " << ops_sec << "\n"; 165 206 std::cout << "Total ops : " << ops << "(" << global.in << "i, " << global.out << "o, " << global.empty << "e)\n"; 166 if(global.valmax != 0) {167 std::cout << "Max runs : " << global.valmax << "\n";168 std::cout << "Min runs : " << global.valmin << "\n";169 }170 207 #ifndef NO_STATS 171 relaxed_list<Node>::stats_print(std::cout);208 LIST_VARIANT<Node>::stats_print(std::cout); 172 209 #endif 173 210 } … … 186 223 unsigned nslots, 187 224 local_stat_t & local, 188 relaxed_list<Node> & list225 LIST_VARIANT<Node> & list 189 226 ) { 190 227 while(__builtin_expect(!done.load(std::memory_order_relaxed), true)) { … … 224 261 std::cout << "Initializing "; 225 262 size_t npushed = 0; 226 relaxed_list<Node> list = { nthread *nqueues };263 LIST_VARIANT<Node> list = { nthread, nqueues }; 227 264 { 228 265 Node** all_nodes[nthread]; … … 310 347 unsigned nnodes, 311 348 local_stat_t & local, 312 relaxed_list<Node> & list349 LIST_VARIANT<Node> & list 313 350 ) { 314 351 Node * nodes[nnodes]; … … 354 391 std::cout << "Initializing "; 355 392 // List being tested 356 relaxed_list<Node> list = { nthread *nqueues };393 LIST_VARIANT<Node> list = { nthread, nqueues }; 357 394 { 358 395 enable_stats = true; … … 395 432 396 433 enable_stats = false; 434 } 435 436 print_stats(duration, nthread, global); 437 } 438 439 // ================================================================================================ 440 struct __attribute__((aligned(64))) Slot { 441 Node * volatile node; 442 }; 443 444 __attribute__((noinline)) void runProducer_body( 445 std::atomic<bool>& done, 446 Random & rand, 447 Slot * slots, 448 int nslots, 449 local_stat_t & local, 450 LIST_VARIANT<Node> & list 451 ) { 452 while(__builtin_expect(!done.load(std::memory_order_relaxed), true)) { 453 454 Node * node = list.pop(); 455 if(!node) { 456 local.empty ++; 457 continue; 458 } 459 460 local.crc_out += node->value; 461 local.out++; 462 463 if(node->id == 0) { 464 unsigned cnt = 0; 465 for(int i = 0; i < nslots; i++) { 466 Node * found = __atomic_exchange_n( &slots[i].node, nullptr, __ATOMIC_SEQ_CST ); 467 if( found ) { 468 local.crc_in += found->value; 469 local.in++; 470 cnt++; 471 list.push( found ); 472 } 473 } 474 475 local.crc_in += node->value; 476 local.in++; 477 list.push( node ); 478 479 local.comp.cnt++; 480 local.comp.val += cnt; 481 } 482 else { 483 unsigned len = 0; 484 while(true) { 485 auto off = rand.next(); 486 for(int i = 0; i < nslots; i++) { 487 Node * expected = nullptr; 488 int idx = (i + off) % nslots; 489 Slot & slot = slots[ idx ]; 490 if( 491 slot.node == nullptr && 492 __atomic_compare_exchange_n( &slot.node, &expected, node, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ) 493 ) { 494 local.subm.cnt++; 495 local.subm.val += len; 496 goto LOOP; 497 } 498 assert( expected != node ); 499 len++; 500 } 501 } 502 } 503 504 LOOP:; 505 } 506 } 507 508 void runProducer(unsigned nthread, unsigned nqueues, double duration, unsigned nnodes) { 509 std::cout << "Producer Benchmark" << std::endl; 510 511 // Barrier for synchronization 512 barrier_t barrier(nthread + 1); 513 514 // Data to check everything is OK 515 global_stat_t global; 516 517 // Flag to signal termination 518 std::atomic_bool done = { false }; 519 520 std::cout << "Initializing "; 521 522 int nslots = nnodes * 4; 523 Slot * slots = new Slot[nslots]; 524 std::cout << nnodes << " nodes (" << nslots << " slots)" << std::endl; 525 526 // List being tested 527 LIST_VARIANT<Node> list = { nthread, nqueues }; 528 { 529 Random rand(rdtscl()); 530 for(unsigned i = 0; i < nnodes; i++) { 531 Node * node = new Node(rand.next() % 100); 532 node->id = i; 533 global.crc_in += node->value; 534 list.push(node); 535 } 536 537 for(int i = 0; i < nslots; i++) { 538 slots[i].node = nullptr; 539 } 540 } 541 542 { 543 enable_stats = true; 544 545 std::thread * threads[nthread]; 546 unsigned i = 1; 547 for(auto & t : threads) { 548 t = new std::thread([&done, &list, &barrier, &global, slots, nslots](unsigned tid) { 549 Random rand(tid + rdtscl()); 550 551 local_stat_t local; 552 barrier.wait(tid); 553 554 // EXPERIMENT START 555 556 runProducer_body(done, rand, slots, nslots, local, list); 557 558 // EXPERIMENT END 559 560 barrier.wait(tid); 561 562 tally_stats(global, local); 563 }, i++); 564 } 565 566 waitfor(duration, barrier, done); 567 568 for(auto t : threads) { 569 t->join(); 570 delete t; 571 } 572 573 enable_stats = false; 574 } 575 576 { 577 while(Node * node = list.pop()) { 578 global.crc_out += node->value; 579 delete node; 580 } 581 582 for(int i = 0; i < nslots; i++) { 583 delete slots[i].node; 584 } 585 586 delete [] slots; 397 587 } 398 588 … … 410 600 unsigned nnodes, 411 601 local_stat_t & local, 412 relaxed_list<Node> & list602 LIST_VARIANT<Node> & list 413 603 ) { 414 604 Node * nodes[nnodes]; … … 470 660 471 661 // List being tested 472 relaxed_list<Node> list = { nthread *nqueues };662 LIST_VARIANT<Node> list = { nthread, nqueues }; 473 663 { 474 664 enable_stats = true; … … 521 711 print_stats(duration, nthread, global); 522 712 523 save_fairness(data_out.get(), 100, nthread, width, length, output);713 // save_fairness(data_out.get(), 100, nthread, width, length, output); 524 714 } 525 715 … … 547 737 Churn, 548 738 PingPong, 739 Producer, 549 740 Fairness, 550 741 NONE … … 577 768 case PingPong: 578 769 nnodes = 1; 579 nslots = 1;580 770 switch(argc - optind) { 581 771 case 0: break; … … 591 781 break; 592 782 default: 593 std::cerr << "'PingPong' benchmark doesn't accept more than 2 extra arguments" << std::endl; 783 std::cerr << "'PingPong' benchmark doesn't accept more than 1 extra arguments" << std::endl; 784 goto usage; 785 } 786 break; 787 case Producer: 788 nnodes = 32; 789 switch(argc - optind) { 790 case 0: break; 791 case 1: 792 try { 793 arg = optarg = argv[optind]; 794 nnodes = stoul(optarg, &len); 795 if(len != arg.size()) { throw std::invalid_argument(""); } 796 } catch(std::invalid_argument &) { 797 std::cerr << "Number of nodes must be a positive integer, was " << arg << std::endl; 798 goto usage; 799 } 800 break; 801 default: 802 std::cerr << "'Producer' benchmark doesn't accept more than 1 extra arguments" << std::endl; 594 803 goto usage; 595 804 } … … 662 871 break; 663 872 } 873 if(iequals(arg, "producer")) { 874 benchmark = Producer; 875 break; 876 } 664 877 if(iequals(arg, "fairness")) { 665 878 benchmark = Fairness; … … 702 915 std::cerr << "Usage: " << argv[0] << ": [options] -b churn [NNODES] [NSLOTS = NNODES]" << std::endl; 703 916 std::cerr << " or: " << argv[0] << ": [options] -b pingpong [NNODES]" << std::endl; 917 std::cerr << " or: " << argv[0] << ": [options] -b producer [NNODES]" << std::endl; 704 918 std::cerr << std::endl; 705 919 std::cerr << " -d, --duration=DURATION Duration of the experiment, in seconds" << std::endl; … … 714 928 715 929 std::cout << "Running " << nthreads << " threads (" << (nthreads * nqueues) << " queues) for " << duration << " seconds" << std::endl; 930 std::cout << "Relaxed list variant: " << LIST_VARIANT<Node>::name() << std::endl; 716 931 switch(benchmark) { 717 932 case Churn: … … 720 935 case PingPong: 721 936 runPingPong(nthreads, nqueues, duration, nnodes); 937 break; 938 case Producer: 939 runProducer(nthreads, nqueues, duration, nnodes); 722 940 break; 723 941 case Fairness: … … 801 1019 } 802 1020 803 void save_fairness(const int data[], int factor, unsigned nthreads, size_t columns, size_t rows, const std::string & output) {804 std::ofstream os(output);805 os << "<html>\n";806 os << "<head>\n";807 os << "<style>\n";808 os << "</style>\n";809 os << "</head>\n";810 os << "<body>\n";811 os << "<table style=\"width=100%\">\n";812 813 size_t idx = 0;814 for(size_t r = 0ul; r < rows; r++) {815 os << "<tr>\n";816 for(size_t c = 0ul; c < columns; c++) {817 os << "<td class=\"custom custom" << data[idx] << "\"></td>\n";818 idx++;819 }820 os << "</tr>\n";821 }822 823 os << "</table>\n";824 os << "</body>\n";825 os << "</html>\n";826 os << std::endl;827 }828 829 #include <png.h>830 #include <setjmp.h>1021 // void save_fairness(const int data[], int factor, unsigned nthreads, size_t columns, size_t rows, const std::string & output) { 1022 // std::ofstream os(output); 1023 // os << "<html>\n"; 1024 // os << "<head>\n"; 1025 // os << "<style>\n"; 1026 // os << "</style>\n"; 1027 // os << "</head>\n"; 1028 // os << "<body>\n"; 1029 // os << "<table style=\"width=100%\">\n"; 1030 1031 // size_t idx = 0; 1032 // for(size_t r = 0ul; r < rows; r++) { 1033 // os << "<tr>\n"; 1034 // for(size_t c = 0ul; c < columns; c++) { 1035 // os << "<td class=\"custom custom" << data[idx] << "\"></td>\n"; 1036 // idx++; 1037 // } 1038 // os << "</tr>\n"; 1039 // } 1040 1041 // os << "</table>\n"; 1042 // os << "</body>\n"; 1043 // os << "</html>\n"; 1044 // os << std::endl; 1045 // } 1046 1047 // #include <png.h> 1048 // #include <setjmp.h> 831 1049 832 1050 /*
Note:
See TracChangeset
for help on using the changeset viewer.