Changes in / [ee06db5c:97392b69]
- Files:
-
- 6 added
- 18 edited
Legend:
- Unmodified
- Added
- Removed
-
doc/theses/thierry_delisle_PhD/code/relaxed_list.cpp
ree06db5c r97392b69 57 57 size_t valmax = 0; 58 58 size_t valmin = 100000000ul; 59 struct { 60 size_t val = 0; 61 size_t cnt = 0; 62 } comp; 63 struct { 64 size_t val = 0; 65 size_t cnt = 0; 66 } subm; 59 67 }; 60 68 … … 67 75 std::atomic_size_t valmax = { 0 }; 68 76 std::atomic_size_t valmin = { 100000000ul }; 77 struct { 78 std::atomic_size_t val = { 0 }; 79 std::atomic_size_t cnt = { 0 }; 80 } comp; 81 struct { 82 std::atomic_size_t val = { 0 }; 83 std::atomic_size_t cnt = { 0 }; 84 } subm; 69 85 }; 70 86 … … 96 112 global.crc_out += local.crc_out; 97 113 114 global.comp.val += local.comp.val; 115 global.comp.cnt += local.comp.cnt; 116 global.subm.val += local.subm.val; 117 global.subm.cnt += local.subm.cnt; 118 98 119 atomic_max(global.valmax, local.valmax); 99 120 atomic_min(global.valmin, local.valmin); … … 106 127 auto before = Clock::now(); 107 128 barrier.wait(0); 129 bool is_tty = isatty(STDOUT_FILENO); 108 130 109 131 while(true) { … … 115 137 break; 116 138 } 117 std::cout << "\r" << std::setprecision(4) << durr.count(); 118 std::cout.flush(); 139 if(is_tty) { 140 std::cout << "\r" << std::setprecision(4) << durr.count(); 141 std::cout.flush(); 142 } 119 143 } 120 144 … … 159 183 auto dur_nano = duration_cast<std::nano>(1.0); 160 184 185 if(global.valmax != 0) { 186 std::cout << "Max runs : " << global.valmax << "\n"; 187 std::cout << "Min runs : " << global.valmin << "\n"; 188 } 189 if(global.comp.cnt != 0) { 190 std::cout << "Submit count : " << global.subm.cnt << "\n"; 191 std::cout << "Submit average: " << ((double(global.subm.val)) / global.subm.cnt) << "\n"; 192 std::cout << "Complete count: " << global.comp.cnt << "\n"; 193 std::cout << "Complete avg : " << ((double(global.comp.val)) / global.comp.cnt) << "\n"; 194 } 161 195 std::cout << "Duration : " << duration << "s\n"; 162 196 std::cout << "ns/Op : " << ( dur_nano / ops_thread )<< "\n"; … … 164 198 std::cout << "Ops/sec : " << ops_sec << "\n"; 165 199 std::cout << "Total ops : " << ops << "(" << global.in << "i, " << global.out << "o, " << global.empty << "e)\n"; 166 if(global.valmax != 0) {167 std::cout << "Max runs : " << global.valmax << "\n";168 std::cout << "Min runs : " << global.valmin << "\n";169 }170 200 #ifndef NO_STATS 171 201 relaxed_list<Node>::stats_print(std::cout); … … 395 425 396 426 enable_stats = false; 427 } 428 429 print_stats(duration, nthread, global); 430 } 431 432 // ================================================================================================ 433 struct __attribute__((aligned(64))) Slot { 434 Node * volatile node; 435 }; 436 437 __attribute__((noinline)) void runProducer_body( 438 std::atomic<bool>& done, 439 Random & rand, 440 Slot * slots, 441 int nslots, 442 local_stat_t & local, 443 relaxed_list<Node> & list 444 ) { 445 while(__builtin_expect(!done.load(std::memory_order_relaxed), true)) { 446 447 Node * node = list.pop(); 448 if(!node) { 449 local.empty ++; 450 continue; 451 } 452 453 local.crc_out += node->value; 454 local.out++; 455 456 if(node->id == 0) { 457 unsigned cnt = 0; 458 for(int i = 0; i < nslots; i++) { 459 Node * found = __atomic_exchange_n( &slots[i].node, nullptr, __ATOMIC_SEQ_CST ); 460 if( found ) { 461 local.crc_in += found->value; 462 local.in++; 463 cnt++; 464 list.push( found ); 465 } 466 } 467 468 local.crc_in += node->value; 469 local.in++; 470 list.push( node ); 471 472 local.comp.cnt++; 473 local.comp.val += cnt; 474 } 475 else { 476 unsigned len = 0; 477 while(true) { 478 auto off = rand.next(); 479 for(int i = 0; i < nslots; i++) { 480 Node * expected = nullptr; 481 int idx = (i + off) % nslots; 482 Slot & slot = slots[ idx ]; 483 if( 484 slot.node == nullptr && 485 __atomic_compare_exchange_n( &slot.node, &expected, node, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ) 486 ) { 487 local.subm.cnt++; 488 local.subm.val += len; 489 goto LOOP; 490 } 491 assert( expected != node ); 492 len++; 493 } 494 } 495 } 496 497 LOOP:; 498 } 499 } 500 501 void runProducer(unsigned nthread, unsigned nqueues, double duration, unsigned nnodes) { 502 std::cout << "Producer Benchmark" << std::endl; 503 504 // Barrier for synchronization 505 barrier_t barrier(nthread + 1); 506 507 // Data to check everything is OK 508 global_stat_t global; 509 510 // Flag to signal termination 511 std::atomic_bool done = { false }; 512 513 std::cout << "Initializing "; 514 515 int nslots = nnodes * 4; 516 Slot * slots = new Slot[nslots]; 517 std::cout << nnodes << " nodes (" << nslots << " slots)" << std::endl; 518 519 // List being tested 520 relaxed_list<Node> list = { nthread * nqueues }; 521 { 522 Random rand(rdtscl()); 523 for(unsigned i = 0; i < nnodes; i++) { 524 Node * node = new Node(rand.next() % 100); 525 node->id = i; 526 global.crc_in += node->value; 527 list.push(node); 528 } 529 530 for(int i = 0; i < nslots; i++) { 531 slots[i].node = nullptr; 532 } 533 } 534 535 { 536 enable_stats = true; 537 538 std::thread * threads[nthread]; 539 unsigned i = 1; 540 for(auto & t : threads) { 541 t = new std::thread([&done, &list, &barrier, &global, slots, nslots](unsigned tid) { 542 Random rand(tid + rdtscl()); 543 544 local_stat_t local; 545 barrier.wait(tid); 546 547 // EXPERIMENT START 548 549 runProducer_body(done, rand, slots, nslots, local, list); 550 551 // EXPERIMENT END 552 553 barrier.wait(tid); 554 555 tally_stats(global, local); 556 }, i++); 557 } 558 559 waitfor(duration, barrier, done); 560 561 for(auto t : threads) { 562 t->join(); 563 delete t; 564 } 565 566 enable_stats = false; 567 } 568 569 { 570 while(Node * node = list.pop()) { 571 global.crc_out += node->value; 572 delete node; 573 } 574 575 for(int i = 0; i < nslots; i++) { 576 delete slots[i].node; 577 } 578 579 delete [] slots; 397 580 } 398 581 … … 521 704 print_stats(duration, nthread, global); 522 705 523 save_fairness(data_out.get(), 100, nthread, width, length, output);706 // save_fairness(data_out.get(), 100, nthread, width, length, output); 524 707 } 525 708 … … 547 730 Churn, 548 731 PingPong, 732 Producer, 549 733 Fairness, 550 734 NONE … … 577 761 case PingPong: 578 762 nnodes = 1; 579 nslots = 1;580 763 switch(argc - optind) { 581 764 case 0: break; … … 591 774 break; 592 775 default: 593 std::cerr << "'PingPong' benchmark doesn't accept more than 2 extra arguments" << std::endl; 776 std::cerr << "'PingPong' benchmark doesn't accept more than 1 extra arguments" << std::endl; 777 goto usage; 778 } 779 break; 780 case Producer: 781 nnodes = 32; 782 switch(argc - optind) { 783 case 0: break; 784 case 1: 785 try { 786 arg = optarg = argv[optind]; 787 nnodes = stoul(optarg, &len); 788 if(len != arg.size()) { throw std::invalid_argument(""); } 789 } catch(std::invalid_argument &) { 790 std::cerr << "Number of nodes must be a positive integer, was " << arg << std::endl; 791 goto usage; 792 } 793 break; 794 default: 795 std::cerr << "'Producer' benchmark doesn't accept more than 1 extra arguments" << std::endl; 594 796 goto usage; 595 797 } … … 662 864 break; 663 865 } 866 if(iequals(arg, "producer")) { 867 benchmark = Producer; 868 break; 869 } 664 870 if(iequals(arg, "fairness")) { 665 871 benchmark = Fairness; … … 702 908 std::cerr << "Usage: " << argv[0] << ": [options] -b churn [NNODES] [NSLOTS = NNODES]" << std::endl; 703 909 std::cerr << " or: " << argv[0] << ": [options] -b pingpong [NNODES]" << std::endl; 910 std::cerr << " or: " << argv[0] << ": [options] -b producer [NNODES]" << std::endl; 704 911 std::cerr << std::endl; 705 912 std::cerr << " -d, --duration=DURATION Duration of the experiment, in seconds" << std::endl; … … 714 921 715 922 std::cout << "Running " << nthreads << " threads (" << (nthreads * nqueues) << " queues) for " << duration << " seconds" << std::endl; 923 std::cout << "Relaxed list variant: " << relaxed_list<Node>::name() << std::endl; 716 924 switch(benchmark) { 717 925 case Churn: … … 720 928 case PingPong: 721 929 runPingPong(nthreads, nqueues, duration, nnodes); 930 break; 931 case Producer: 932 runProducer(nthreads, nqueues, duration, nnodes); 722 933 break; 723 934 case Fairness: … … 801 1012 } 802 1013 803 void save_fairness(const int data[], int factor, unsigned nthreads, size_t columns, size_t rows, const std::string & output) {804 std::ofstream os(output);805 os << "<html>\n";806 os << "<head>\n";807 os << "<style>\n";808 os << "</style>\n";809 os << "</head>\n";810 os << "<body>\n";811 os << "<table style=\"width=100%\">\n";812 813 size_t idx = 0;814 for(size_t r = 0ul; r < rows; r++) {815 os << "<tr>\n";816 for(size_t c = 0ul; c < columns; c++) {817 os << "<td class=\"custom custom" << data[idx] << "\"></td>\n";818 idx++;819 }820 os << "</tr>\n";821 }822 823 os << "</table>\n";824 os << "</body>\n";825 os << "</html>\n";826 os << std::endl;827 }828 829 #include <png.h>830 #include <setjmp.h>1014 // void save_fairness(const int data[], int factor, unsigned nthreads, size_t columns, size_t rows, const std::string & output) { 1015 // std::ofstream os(output); 1016 // os << "<html>\n"; 1017 // os << "<head>\n"; 1018 // os << "<style>\n"; 1019 // os << "</style>\n"; 1020 // os << "</head>\n"; 1021 // os << "<body>\n"; 1022 // os << "<table style=\"width=100%\">\n"; 1023 1024 // size_t idx = 0; 1025 // for(size_t r = 0ul; r < rows; r++) { 1026 // os << "<tr>\n"; 1027 // for(size_t c = 0ul; c < columns; c++) { 1028 // os << "<td class=\"custom custom" << data[idx] << "\"></td>\n"; 1029 // idx++; 1030 // } 1031 // os << "</tr>\n"; 1032 // } 1033 1034 // os << "</table>\n"; 1035 // os << "</body>\n"; 1036 // os << "</html>\n"; 1037 // os << std::endl; 1038 // } 1039 1040 // #include <png.h> 1041 // #include <setjmp.h> 831 1042 832 1043 /* -
doc/theses/thierry_delisle_PhD/code/relaxed_list.hpp
ree06db5c r97392b69 1 1 #pragma once 2 3 #define MACRO_XSTR(s) MACRO_STR(s) 4 #define MACRO_STR(s) #s 5 6 #define VANILLA 0 7 #define SNZI 1 8 #define BITMASK 2 9 #define DISCOVER 3 10 #define SNZM 4 11 12 #ifndef VARIANT 13 #define VARIANT VANILLA 14 #endif 2 15 3 16 #ifndef NO_STATS … … 5 18 #endif 6 19 20 #include <cmath> 7 21 #include <memory> 8 22 #include <mutex> … … 11 25 #include "assert.hpp" 12 26 #include "utils.hpp" 27 #include "snzi.hpp" 28 #include "snzm.hpp" 13 29 14 30 using namespace std; 15 16 struct spinlock_t {17 std::atomic_bool ll = { false };18 19 inline void lock() {20 while( __builtin_expect(ll.exchange(true),false) ) {21 while(ll.load(std::memory_order_relaxed))22 asm volatile("pause");23 }24 }25 26 inline bool try_lock() {27 return false == ll.exchange(true);28 }29 30 inline void unlock() {31 ll.store(false, std::memory_order_release);32 }33 34 inline explicit operator bool() {35 return ll.load(std::memory_order_relaxed);36 }37 };38 39 static inline bool bts(std::atomic_size_t & target, size_t bit ) {40 //*41 int result = 0;42 asm volatile(43 "LOCK btsq %[bit], %[target]\n\t"44 :"=@ccc" (result)45 : [target] "m" (target), [bit] "r" (bit)46 );47 return result != 0;48 /*/49 size_t mask = 1ul << bit;50 size_t ret = target.fetch_or(mask, std::memory_order_relaxed);51 return (ret & mask) != 0;52 //*/53 }54 55 static inline bool btr(std::atomic_size_t & target, size_t bit ) {56 //*57 int result = 0;58 asm volatile(59 "LOCK btrq %[bit], %[target]\n\t"60 :"=@ccc" (result)61 : [target] "m" (target), [bit] "r" (bit)62 );63 return result != 0;64 /*/65 size_t mask = 1ul << bit;66 size_t ret = target.fetch_and(~mask, std::memory_order_relaxed);67 return (ret & mask) != 0;68 //*/69 }70 31 71 32 extern bool enable_stats; … … 80 41 size_t success = 0; 81 42 size_t mask_attempt = 0; 43 size_t mask_reset = 0; 82 44 } pop; 83 45 }; … … 106 68 107 69 public: 70 static const char * name() { 71 const char * names[] = { 72 "VANILLA", 73 "SNZI", 74 "BITMASK", 75 "SNZI + DISCOVERED MASK", 76 "SNZI + MASK" 77 }; 78 return names[VARIANT]; 79 } 80 108 81 relaxed_list(unsigned numLists) 109 82 : lists(new intrusive_queue_t[numLists]) 110 83 , numLists(numLists) 84 #if VARIANT == SNZI 85 , snzi( std::log2( numLists / 8 ), 2 ) 86 #elif VARIANT == SNZM || VARIANT == DISCOVER 87 , snzm( numLists ) 88 #endif 111 89 { 112 90 assertf(7 * 8 * 8 >= numLists, "List currently only supports 448 sublists"); … … 139 117 if( !lists[i].lock.try_lock() ) continue; 140 118 141 __attribute__((unused)) int num = numNonEmpty; 119 #if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER 120 __attribute__((unused)) int num = numNonEmpty; 121 #endif 142 122 143 123 // Actually push it 144 124 if(lists[i].push(node)) { 145 numNonEmpty++; 146 size_t qword = i >> 6ull; 147 size_t bit = i & 63ull; 148 assertf((list_mask[qword] & (1ul << bit)) == 0, "Before set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit)); 149 __attribute__((unused)) bool ret = bts(list_mask[qword], bit); 150 assert(!ret); 151 assertf((list_mask[qword] & (1ul << bit)) != 0, "After set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit)); 152 } 153 assert(numNonEmpty <= (int)numLists); 125 #if VARIANT == DISCOVER 126 size_t qword = i >> 6ull; 127 size_t bit = i & 63ull; 128 assert(qword == 0); 129 bts(tls.mask, bit); 130 snzm.arrive(i); 131 #elif VARIANT == SNZI 132 snzi.arrive(i); 133 #elif VARIANT == SNZM 134 snzm.arrive(i); 135 #elif VARIANT == BITMASK 136 numNonEmpty++; 137 size_t qword = i >> 6ull; 138 size_t bit = i & 63ull; 139 assertf((list_mask[qword] & (1ul << bit)) == 0, "Before set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit)); 140 __attribute__((unused)) bool ret = bts(list_mask[qword], bit); 141 assert(!ret); 142 assertf((list_mask[qword] & (1ul << bit)) != 0, "After set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit)); 143 #else 144 numNonEmpty++; 145 #endif 146 } 147 #if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER 148 assert(numNonEmpty <= (int)numLists); 149 #endif 154 150 155 151 // Unlock and return … … 158 154 #ifndef NO_STATS 159 155 tls.pick.push.success++; 160 tls.empty.push.value += num; 161 tls.empty.push.count += 1; 156 #if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER 157 tls.empty.push.value += num; 158 tls.empty.push.count += 1; 159 #endif 162 160 #endif 163 161 return; … … 166 164 167 165 __attribute__((noinline, hot)) node_t * pop() { 168 #if !defined(NO_BITMASK) 169 // for(int r = 0; r < 10 && numNonEmpty != 0; r++) { 170 // // Pick two lists at random 171 // unsigned i = tls.rng.next() % numLists; 172 // unsigned j = tls.rng.next() % numLists; 173 174 // if(auto node = try_pop(i, j)) return node; 175 // } 166 #if VARIANT == DISCOVER 167 assert(numLists <= 64); 168 while(snzm.query()) { 169 tls.pick.pop.mask_attempt++; 170 unsigned i, j; 171 { 172 // Pick first list totally randomly 173 i = tls.rng.next() % numLists; 174 175 // Pick the other according to the bitmask 176 unsigned r = tls.rng.next(); 177 178 size_t mask = tls.mask.load(std::memory_order_relaxed); 179 if(mask == 0) { 180 tls.pick.pop.mask_reset++; 181 mask = (1U << numLists) - 1; 182 tls.mask.store(mask, std::memory_order_relaxed); 183 } 184 185 unsigned b = rand_bit(r, mask); 186 187 assertf(b < 64, "%zu %u", mask, b); 188 189 j = b; 190 191 assert(j < numLists); 192 } 193 194 if(auto node = try_pop(i, j)) return node; 195 } 196 #elif VARIANT == SNZI 197 while(snzi.query()) { 198 // Pick two lists at random 199 int i = tls.rng.next() % numLists; 200 int j = tls.rng.next() % numLists; 201 202 if(auto node = try_pop(i, j)) return node; 203 } 204 #elif VARIANT == SNZM 205 //* 206 while(snzm.query()) { 207 tls.pick.pop.mask_attempt++; 208 unsigned i, j; 209 { 210 // Pick two random number 211 unsigned ri = tls.rng.next(); 212 unsigned rj = tls.rng.next(); 213 214 // Pick two nodes from it 215 unsigned wdxi = ri & snzm.mask; 216 unsigned wdxj = rj & snzm.mask; 217 218 // Get the masks from the nodes 219 size_t maski = snzm.masks(wdxi); 220 size_t maskj = snzm.masks(wdxj); 221 222 if(maski == 0 && maskj == 0) continue; 223 224 #if defined(__BMI2__) 225 uint64_t idxsi = _pext_u64(snzm.indexes, maski); 226 uint64_t idxsj = _pext_u64(snzm.indexes, maskj); 227 228 auto pi = __builtin_popcountll(maski); 229 auto pj = __builtin_popcountll(maskj); 230 231 ri = pi ? ri & ((pi >> 3) - 1) : 0; 232 rj = pj ? rj & ((pj >> 3) - 1) : 0; 233 234 unsigned bi = (idxsi >> (ri << 3)) & 0xff; 235 unsigned bj = (idxsj >> (rj << 3)) & 0xff; 236 #else 237 unsigned bi = rand_bit(ri >> snzm.depth, maski); 238 unsigned bj = rand_bit(rj >> snzm.depth, maskj); 239 #endif 240 241 i = (bi << snzm.depth) | wdxi; 242 j = (bj << snzm.depth) | wdxj; 243 244 /* paranoid */ assertf(i < numLists, "%u %u", bj, wdxi); 245 /* paranoid */ assertf(j < numLists, "%u %u", bj, wdxj); 246 } 247 248 if(auto node = try_pop(i, j)) return node; 249 } 250 /*/ 251 while(snzm.query()) { 252 // Pick two lists at random 253 int i = tls.rng.next() % numLists; 254 int j = tls.rng.next() % numLists; 255 256 if(auto node = try_pop(i, j)) return node; 257 } 258 //*/ 259 #elif VARIANT == BITMASK 176 260 int nnempty; 177 261 while(0 != (nnempty = numNonEmpty)) { 178 262 tls.pick.pop.mask_attempt++; 179 263 unsigned i, j; 180 // if( numLists < 4 || (numLists / nnempty) < 4 ) {181 // // Pick two lists at random182 // i = tls.rng.next() % numLists;183 // j = tls.rng.next() % numLists;184 // } else185 264 { 186 #ifndef NO_STATS187 // tls.pick.push.mask_attempt++;188 #endif189 190 265 // Pick two lists at random 191 266 unsigned num = ((numLists - 1) >> 6) + 1; … … 236 311 #endif 237 312 313 #if VARIANT == DISCOVER 314 if(lists[i].ts() > 0) bts(tls.mask, i); else btr(tls.mask, i); 315 if(lists[j].ts() > 0) bts(tls.mask, j); else btr(tls.mask, j); 316 #endif 317 238 318 // Pick the bet list 239 319 int w = i; … … 249 329 if( !list.lock.try_lock() ) return nullptr; 250 330 251 __attribute__((unused)) int num = numNonEmpty; 331 #if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER 332 __attribute__((unused)) int num = numNonEmpty; 333 #endif 252 334 253 335 // If list is empty, unlock and retry … … 264 346 265 347 if(emptied) { 266 numNonEmpty--; 267 size_t qword = w >> 6ull; 268 size_t bit = w & 63ull; 269 assert((list_mask[qword] & (1ul << bit)) != 0); 270 __attribute__((unused)) bool ret = btr(list_mask[qword], bit); 271 assert(ret); 272 assert((list_mask[qword] & (1ul << bit)) == 0); 348 #if VARIANT == DISCOVER 349 size_t qword = w >> 6ull; 350 size_t bit = w & 63ull; 351 assert(qword == 0); 352 __attribute__((unused)) bool ret = btr(tls.mask, bit); 353 snzm.depart(w); 354 #elif VARIANT == SNZI 355 snzi.depart(w); 356 #elif VARIANT == SNZM 357 snzm.depart(w); 358 #elif VARIANT == BITMASK 359 numNonEmpty--; 360 size_t qword = w >> 6ull; 361 size_t bit = w & 63ull; 362 assert((list_mask[qword] & (1ul << bit)) != 0); 363 __attribute__((unused)) bool ret = btr(list_mask[qword], bit); 364 assert(ret); 365 assert((list_mask[qword] & (1ul << bit)) == 0); 366 #else 367 numNonEmpty--; 368 #endif 273 369 } 274 370 275 371 // Unlock and return 276 372 list.lock.unlock(); 277 assert(numNonEmpty >= 0); 373 #if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER 374 assert(numNonEmpty >= 0); 375 #endif 278 376 #ifndef NO_STATS 279 377 tls.pick.pop.success++; 280 tls.empty.pop.value += num; 281 tls.empty.pop.count += 1; 378 #if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER 379 tls.empty.pop.value += num; 380 tls.empty.pop.count += 1; 381 #endif 282 382 #endif 283 383 return node; … … 296 396 size_t push = 0; 297 397 size_t pop = 0; 298 // size_t value = 0;299 // size_t count = 0;300 398 }; 301 399 … … 417 515 pick_stat pick; 418 516 empty_stat empty; 517 __attribute__((aligned(64))) std::atomic_size_t mask = { 0 }; 419 518 } tls; 420 519 421 public:422 std::atomic_int numNonEmpty = { 0 }; // number of non-empty lists423 std::atomic_size_t list_mask[7] = { {0}, {0}, {0}, {0}, {0}, {0}, {0} }; // which queues are empty424 520 private: 425 521 __attribute__((aligned(64))) std::unique_ptr<intrusive_queue_t []> lists; 426 522 const unsigned numLists; 523 private: 524 #if VARIANT == SNZI 525 snzi_t snzi; 526 #elif VARIANT == SNZM || VARIANT == DISCOVER 527 snzm_t snzm; 528 #else 529 std::atomic_int numNonEmpty = { 0 }; // number of non-empty lists 530 #endif 531 #if VARIANT == BITMASK 532 std::atomic_size_t list_mask[7] = { {0}, {0}, {0}, {0}, {0}, {0}, {0} }; // which queues are empty 533 #endif 427 534 428 535 public: … … 444 551 global_stats.pick.pop .success += tls.pick.pop.success; 445 552 global_stats.pick.pop .mask_attempt += tls.pick.pop.mask_attempt; 553 global_stats.pick.pop .mask_reset += tls.pick.pop.mask_reset; 446 554 447 555 global_stats.qstat.push.value += tls.empty.push.value; … … 462 570 std::atomic_size_t success = { 0 }; 463 571 std::atomic_size_t mask_attempt = { 0 }; 572 std::atomic_size_t mask_reset = { 0 }; 464 573 } pop; 465 574 } pick; … … 483 592 void stats_print_local(std::ostream & os ) { 484 593 std::cout << "----- Relaxed List Stats -----" << std::endl; 485 {486 ssize_t diff = 0;487 size_t num = 0;488 ssize_t max = 0;489 490 for(size_t i = 0; i < numLists; i++) {491 const auto & list = lists[i];492 diff+= list.s.diff;493 num ++;494 max = std::abs(max) > std::abs(list.s.diff) ? max : list.s.diff;495 os << "Local Q ops : " << (list.s.push + list.s.pop) << "(" << list.s.push << "i, " << list.s.pop << "o)\n";496 }497 498 os << "Difference : " << ssize_t(double(diff) / num ) << " avg\t" << max << "max" << std::endl;499 }594 // { 595 // ssize_t diff = 0; 596 // size_t num = 0; 597 // ssize_t max = 0; 598 599 // for(size_t i = 0; i < numLists; i++) { 600 // const auto & list = lists[i]; 601 // diff+= list.s.diff; 602 // num ++; 603 // max = std::abs(max) > std::abs(list.s.diff) ? max : list.s.diff; 604 // os << "Local Q ops : " << (list.s.push + list.s.pop) << "(" << list.s.push << "i, " << list.s.pop << "o)\n"; 605 // } 606 607 // os << "Difference : " << ssize_t(double(diff) / num ) << " avg\t" << max << "max" << std::endl; 608 // } 500 609 501 610 const auto & global = global_stats; … … 504 613 double pop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .attempt); 505 614 double mpop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .mask_attempt); 506 507 os << "Push Pick % : " << push_sur << "(" << global.pick.push.success << " / " << global.pick.push.attempt << ")\n"; 508 os << "Pop Pick % : " << pop_sur << "(" << global.pick.pop .success << " / " << global.pick.pop .attempt << ")\n"; 509 os << "TryPop Pick % : " << mpop_sur << "(" << global.pick.pop .success << " / " << global.pick.pop .mask_attempt << ")\n"; 615 double rpop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .mask_reset); 616 617 double push_len = double(global.pick.push.attempt ) / global.pick.push.success; 618 double pop_len = double(global.pick.pop .attempt ) / global.pick.pop .success; 619 double mpop_len = double(global.pick.pop .mask_attempt) / global.pick.pop .success; 620 double rpop_len = double(global.pick.pop .mask_reset ) / global.pick.pop .success; 621 622 os << "Push Pick : " << push_sur << " %, len " << push_len << " (" << global.pick.push.attempt << " / " << global.pick.push.success << ")\n"; 623 os << "Pop Pick : " << pop_sur << " %, len " << pop_len << " (" << global.pick.pop .attempt << " / " << global.pick.pop .success << ")\n"; 624 os << "TryPop Pick : " << mpop_sur << " %, len " << mpop_len << " (" << global.pick.pop .mask_attempt << " / " << global.pick.pop .success << ")\n"; 625 os << "Pop M Reset : " << rpop_sur << " %, len " << rpop_len << " (" << global.pick.pop .mask_reset << " / " << global.pick.pop .success << ")\n"; 510 626 511 627 double avgQ_push = double(global.qstat.push.value) / global.qstat.push.count; -
doc/theses/thierry_delisle_PhD/code/utils.hpp
ree06db5c r97392b69 106 106 } 107 107 108 static inline unsigned rand_bit(unsigned rnum, size_t mask) __attribute__((artificial)); 108 109 static inline unsigned rand_bit(unsigned rnum, size_t mask) { 109 110 unsigned bit = mask ? rnum % __builtin_popcountl(mask) : 0; … … 143 144 #endif 144 145 } 146 147 struct spinlock_t { 148 std::atomic_bool ll = { false }; 149 150 inline void lock() { 151 while( __builtin_expect(ll.exchange(true),false) ) { 152 while(ll.load(std::memory_order_relaxed)) 153 asm volatile("pause"); 154 } 155 } 156 157 inline bool try_lock() { 158 return false == ll.exchange(true); 159 } 160 161 inline void unlock() { 162 ll.store(false, std::memory_order_release); 163 } 164 165 inline explicit operator bool() { 166 return ll.load(std::memory_order_relaxed); 167 } 168 }; 169 170 static inline bool bts(std::atomic_size_t & target, size_t bit ) { 171 //* 172 int result = 0; 173 asm volatile( 174 "LOCK btsq %[bit], %[target]\n\t" 175 :"=@ccc" (result) 176 : [target] "m" (target), [bit] "r" (bit) 177 ); 178 return result != 0; 179 /*/ 180 size_t mask = 1ul << bit; 181 size_t ret = target.fetch_or(mask, std::memory_order_relaxed); 182 return (ret & mask) != 0; 183 //*/ 184 } 185 186 static inline bool btr(std::atomic_size_t & target, size_t bit ) { 187 //* 188 int result = 0; 189 asm volatile( 190 "LOCK btrq %[bit], %[target]\n\t" 191 :"=@ccc" (result) 192 : [target] "m" (target), [bit] "r" (bit) 193 ); 194 return result != 0; 195 /*/ 196 size_t mask = 1ul << bit; 197 size_t ret = target.fetch_and(~mask, std::memory_order_relaxed); 198 return (ret & mask) != 0; 199 //*/ 200 } -
libcfa/src/Makefile.am
ree06db5c r97392b69 50 50 thread_headers_nosrc = concurrency/invoke.h 51 51 thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa concurrency/monitor.hfa concurrency/mutex.hfa 52 thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/io.cfa concurrency/preemption.cfa ${thread_headers:.hfa=.cfa}52 thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/io.cfa concurrency/preemption.cfa concurrency/ready_queue.cfa ${thread_headers:.hfa=.cfa} 53 53 else 54 54 headers = -
libcfa/src/Makefile.in
ree06db5c r97392b69 166 166 concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa \ 167 167 concurrency/invoke.c concurrency/io.cfa \ 168 concurrency/preemption.cfa concurrency/coroutine.cfa \ 169 concurrency/thread.cfa concurrency/kernel.cfa \ 170 concurrency/monitor.cfa concurrency/mutex.cfa 168 concurrency/preemption.cfa concurrency/ready_queue.cfa \ 169 concurrency/coroutine.cfa concurrency/thread.cfa \ 170 concurrency/kernel.cfa concurrency/monitor.cfa \ 171 concurrency/mutex.cfa 171 172 @BUILDLIB_TRUE@am__objects_3 = concurrency/coroutine.lo \ 172 173 @BUILDLIB_TRUE@ concurrency/thread.lo concurrency/kernel.lo \ … … 176 177 @BUILDLIB_TRUE@ concurrency/alarm.lo concurrency/invoke.lo \ 177 178 @BUILDLIB_TRUE@ concurrency/io.lo concurrency/preemption.lo \ 178 @BUILDLIB_TRUE@ $(am__objects_3)179 @BUILDLIB_TRUE@ concurrency/ready_queue.lo $(am__objects_3) 179 180 am_libcfathread_la_OBJECTS = $(am__objects_4) 180 181 libcfathread_la_OBJECTS = $(am_libcfathread_la_OBJECTS) … … 482 483 @BUILDLIB_FALSE@thread_headers = 483 484 @BUILDLIB_TRUE@thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa concurrency/monitor.hfa concurrency/mutex.hfa 484 @BUILDLIB_TRUE@thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/io.cfa concurrency/preemption.cfa ${thread_headers:.hfa=.cfa}485 @BUILDLIB_TRUE@thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/io.cfa concurrency/preemption.cfa concurrency/ready_queue.cfa ${thread_headers:.hfa=.cfa} 485 486 486 487 #---------------------------------------------------------------------------------------------------------------- … … 620 621 concurrency/$(DEPDIR)/$(am__dirstamp) 621 622 concurrency/preemption.lo: concurrency/$(am__dirstamp) \ 623 concurrency/$(DEPDIR)/$(am__dirstamp) 624 concurrency/ready_queue.lo: concurrency/$(am__dirstamp) \ 622 625 concurrency/$(DEPDIR)/$(am__dirstamp) 623 626 concurrency/coroutine.lo: concurrency/$(am__dirstamp) \ -
libcfa/src/bits/debug.hfa
ree06db5c r97392b69 52 52 || defined(__CFA_DEBUG_PRINT_IO__) || defined(__CFA_DEBUG_PRINT_IO_CORE__) \ 53 53 || defined(__CFA_DEBUG_PRINT_MONITOR__) || defined(__CFA_DEBUG_PRINT_PREEMPTION__) \ 54 || defined(__CFA_DEBUG_PRINT_RUNTIME_CORE__) || defined(__CFA_DEBUG_PRINT_EXCEPTION__) 54 || defined(__CFA_DEBUG_PRINT_RUNTIME_CORE__) || defined(__CFA_DEBUG_PRINT_EXCEPTION__) \ 55 || defined(__CFA_DEBUG_PRINT_READY_QUEUE__) 55 56 #include <stdio.h> 56 57 #include <unistd.h> -
libcfa/src/bits/defs.hfa
ree06db5c r97392b69 54 54 return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 ); 55 55 } 56 57 // #define __CFA_NO_BIT_TEST_AND_SET__ 58 59 #if defined( __i386 ) 60 static inline bool __atomic_bts(volatile unsigned long int * target, unsigned long int bit ) { 61 #if defined(__CFA_NO_BIT_TEST_AND_SET__) 62 unsigned long int mask = 1ul << bit; 63 unsigned long int ret = __atomic_fetch_or(target, mask, (int)__ATOMIC_RELAXED); 64 return (ret & mask) != 0; 65 #else 66 int result = 0; 67 asm volatile( 68 "LOCK btsl %[bit], %[target]\n\t" 69 : "=@ccc" (result) 70 : [target] "m" (*target), [bit] "r" (bit) 71 ); 72 return result != 0; 73 #endif 74 } 75 76 static inline bool __atomic_btr(volatile unsigned long int * target, unsigned long int bit ) { 77 #if defined(__CFA_NO_BIT_TEST_AND_SET__) 78 unsigned long int mask = 1ul << bit; 79 unsigned long int ret = __atomic_fetch_and(target, ~mask, (int)__ATOMIC_RELAXED); 80 return (ret & mask) != 0; 81 #else 82 int result = 0; 83 asm volatile( 84 "LOCK btrl %[bit], %[target]\n\t" 85 :"=@ccc" (result) 86 : [target] "m" (*target), [bit] "r" (bit) 87 ); 88 return result != 0; 89 #endif 90 } 91 #elif defined( __x86_64 ) 92 static inline bool __atomic_bts(volatile unsigned long long int * target, unsigned long long int bit ) { 93 #if defined(__CFA_NO_BIT_TEST_AND_SET__) 94 unsigned long long int mask = 1ul << bit; 95 unsigned long long int ret = __atomic_fetch_or(target, mask, (int)__ATOMIC_RELAXED); 96 return (ret & mask) != 0; 97 #else 98 int result = 0; 99 asm volatile( 100 "LOCK btsq %[bit], %[target]\n\t" 101 : "=@ccc" (result) 102 : [target] "m" (*target), [bit] "r" (bit) 103 ); 104 return result != 0; 105 #endif 106 } 107 108 static inline bool __atomic_btr(volatile unsigned long long int * target, unsigned long long int bit ) { 109 #if defined(__CFA_NO_BIT_TEST_AND_SET__) 110 unsigned long long int mask = 1ul << bit; 111 unsigned long long int ret = __atomic_fetch_and(target, ~mask, (int)__ATOMIC_RELAXED); 112 return (ret & mask) != 0; 113 #else 114 int result = 0; 115 asm volatile( 116 "LOCK btrq %[bit], %[target]\n\t" 117 :"=@ccc" (result) 118 : [target] "m" (*target), [bit] "r" (bit) 119 ); 120 return result != 0; 121 #endif 122 } 123 #elif defined( __ARM_ARCH ) 124 #error __atomic_bts and __atomic_btr not implemented for arm 125 #else 126 #error uknown hardware architecture 127 #endif -
libcfa/src/concurrency/invoke.h
ree06db5c r97392b69 161 161 }; 162 162 163 // Link lists fields 164 // instrusive link field for threads 165 struct __thread_desc_link { 166 struct $thread * next; 167 struct $thread * prev; 168 volatile unsigned long long ts; 169 }; 170 163 171 struct $thread { 164 172 // Core threading fields … … 192 200 // Link lists fields 193 201 // instrusive link field for threads 194 struct $thread * next;202 struct __thread_desc_link link; 195 203 196 204 struct { … … 218 226 #ifdef __cforall 219 227 extern "Cforall" { 228 220 229 static inline $thread *& get_next( $thread & this ) __attribute__((const)) { 221 return this. next;230 return this.link.next; 222 231 } 223 232 -
libcfa/src/concurrency/io.cfa
ree06db5c r97392b69 392 392 // This is the tricky case 393 393 // The thread was preempted and now it is on the ready queue 394 /* paranoid */ verify( thrd.next == 1p ); // The thread should be the last on the list 394 395 /* paranoid */ verify( thrd.next != 0p ); // The thread should be the last on the list 395 396 /* paranoid */ verify( this.ready_queue.head == &thrd ); // The thread should be the only thing on the list 396 397 -
libcfa/src/concurrency/kernel.cfa
ree06db5c r97392b69 120 120 static void __run_thread(processor * this, $thread * dst); 121 121 static $thread * __halt(processor * this); 122 static bool __wake_one(cluster * cltr , bool was_empty);122 static bool __wake_one(cluster * cltr); 123 123 static bool __wake_proc(processor *); 124 124 … … 197 197 self_mon.recursion = 1; 198 198 self_mon_p = &self_mon; 199 next = 0p; 199 link.next = 0p; 200 link.prev = 0p; 200 201 201 202 node.next = 0p; … … 223 224 this.name = name; 224 225 this.cltr = &cltr; 226 id = -1u; 225 227 terminated{ 0 }; 226 228 destroyer = 0p; … … 260 262 this.preemption_rate = preemption_rate; 261 263 ready_queue{}; 262 ready_ queue_lock{};264 ready_lock{}; 263 265 264 266 #if !defined(__CFA_NO_STATISTICS__) … … 295 297 __cfadbg_print_safe(runtime_core, "Kernel : core %p starting\n", this); 296 298 299 // register the processor unless it's the main thread which is handled in the boot sequence 300 if(this != mainProcessor) { 301 this->id = doregister2(this->cltr, this); 302 ready_queue_grow( this->cltr ); 303 } 304 297 305 doregister(this->cltr, this); 298 306 … … 318 326 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled ); 319 327 /* paranoid */ verifyf( readyThread->state == Ready || readyThread->preempted != __NO_PREEMPTION, "state : %d, preempted %d\n", readyThread->state, readyThread->preempted); 320 /* paranoid */ verifyf( readyThread-> next == 0p, "Expected null got %p", readyThread->next );328 /* paranoid */ verifyf( readyThread->link.next == 0p, "Expected null got %p", readyThread->link.next ); 321 329 322 330 // We found a thread run it … … 334 342 V( this->terminated ); 335 343 344 // unregister the processor unless it's the main thread which is handled in the boot sequence 345 if(this != mainProcessor) { 346 ready_queue_shrink( this->cltr ); 347 unregister2(this->cltr, this); 348 } 349 else { 350 // HACK : the coroutine context switch expects this_thread to be set 351 // and it make sense for it to be set in all other cases except here 352 // fake it 353 kernelTLS.this_thread = mainThread; 354 } 355 336 356 __cfadbg_print_safe(runtime_core, "Kernel : core %p terminated\n", this); 337 357 338 // HACK : the coroutine context switch expects this_thread to be set 339 // and it make sense for it to be set in all other cases except here 340 // fake it 341 if( this == mainProcessor ) kernelTLS.this_thread = mainThread; 358 stats_tls_tally(this->cltr); 342 359 } 343 360 … … 591 608 // Scheduler routines 592 609 // KERNEL ONLY 593 void __schedule_thread( $thread * thrd ) with( *thrd->curr_cluster ) { 610 void __schedule_thread( $thread * thrd ) { 611 /* paranoid */ verify( thrd ); 612 /* paranoid */ verify( thrd->state != Halted ); 594 613 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled ); 595 614 /* paranoid */ #if defined( __CFA_WITH_VERIFY__ ) 596 /* paranoid */ if( thrd->state == Blocked || thrd->state == Start ) assertf( thrd->preempted == __NO_PREEMPTION,597 598 /* paranoid */ if( thrd->preempted != __NO_PREEMPTION ) assertf(thrd->state == Active || thrd->state == Rerun,599 615 /* paranoid */ if( thrd->state == Blocked || thrd->state == Start ) assertf( thrd->preempted == __NO_PREEMPTION, 616 "Error inactive thread marked as preempted, state %d, preemption %d\n", thrd->state, thrd->preempted ); 617 /* paranoid */ if( thrd->preempted != __NO_PREEMPTION ) assertf(thrd->state == Active || thrd->state == Rerun, 618 "Error preempted thread marked as not currently running, state %d, preemption %d\n", thrd->state, thrd->preempted ); 600 619 /* paranoid */ #endif 601 /* paranoid */ verifyf( thrd-> next == 0p, "Expected null got %p", thrd->next );620 /* paranoid */ verifyf( thrd->link.next == 0p, "Expected null got %p", thrd->link.next ); 602 621 603 622 if (thrd->preempted == __NO_PREEMPTION) thrd->state = Ready; 604 623 605 lock ( ready_queue_lock __cfaabi_dbg_ctx2 ); 606 bool was_empty = !(ready_queue != 0); 607 append( ready_queue, thrd ); 608 unlock( ready_queue_lock ); 609 610 __wake_one(thrd->curr_cluster, was_empty); 624 ready_schedule_lock(thrd->curr_cluster, kernelTLS.this_processor); 625 push( thrd->curr_cluster, thrd ); 626 627 __wake_one(thrd->curr_cluster); 628 ready_schedule_unlock(thrd->curr_cluster, kernelTLS.this_processor); 611 629 612 630 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled ); … … 617 635 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled ); 618 636 619 lock( ready_queue_lock __cfaabi_dbg_ctx2);620 $thread * head = pop_head( ready_queue);621 unlock( ready_queue_lock);637 ready_schedule_lock(this, kernelTLS.this_processor); 638 $thread * head = pop( this ); 639 ready_schedule_unlock(this, kernelTLS.this_processor); 622 640 623 641 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled ); … … 704 722 // If that is the case, abandon the preemption. 705 723 bool preempted = false; 706 if(thrd-> next == 0p) {724 if(thrd->link.next == 0p) { 707 725 preempted = true; 708 726 thrd->preempted = reason; … … 764 782 pending_preemption = false; 765 783 kernel_thread = pthread_self(); 784 id = -1u; 766 785 767 786 runner{ &this }; … … 773 792 mainProcessor = (processor *)&storage_mainProcessor; 774 793 (*mainProcessor){}; 794 795 mainProcessor->id = doregister2(mainCluster, mainProcessor); 775 796 776 797 //initialize the global state variables … … 827 848 kernel_stop_preemption(); 828 849 850 unregister2(mainCluster, mainProcessor); 851 829 852 // Destroy the main processor and its context in reverse order of construction 830 853 // These were manually constructed so we need manually destroy them 831 854 void ^?{}(processor & this) with( this ){ 832 855 /* paranoid */ verify( this.do_terminate == true ); 856 __cfaabi_dbg_print_safe("Kernel : destroyed main processor context %p\n", &runner); 833 857 } 834 858 … … 836 860 837 861 // Final step, destroy the main thread since it is no longer needed 862 838 863 // Since we provided a stack to this taxk it will not destroy anything 839 864 /* paranoid */ verify(mainThread->self_cor.stack.storage == (__stack_t*)(((uintptr_t)&storage_mainThreadCtx)| 0x1)); … … 888 913 889 914 // Wake a thread from the front if there are any 890 static bool __wake_one(cluster * this, __attribute__((unused)) bool force) { 891 // if we don't want to force check if we know it's false 892 // if( !this->idles.head && !force ) return false; 893 915 static bool __wake_one(cluster * this) { 894 916 // First, lock the cluster idle 895 917 lock( this->idle_lock __cfaabi_dbg_ctx2 ); -
libcfa/src/concurrency/kernel.hfa
ree06db5c r97392b69 60 60 // Cluster from which to get threads 61 61 struct cluster * cltr; 62 unsigned int id; 62 63 63 64 // Name of the processor … … 92 93 93 94 // Link lists fields 94 struct __dbg_node_ proc{95 structprocessor * next;96 structprocessor * prev;95 struct __dbg_node_cltr { 96 processor * next; 97 processor * prev; 97 98 } node; 98 99 … … 121 122 #define CFA_CLUSTER_IO_BUFFLEN_OFFSET 16 122 123 124 125 //----------------------------------------------------------------------------- 126 // Cluster Tools 127 128 // Cells use by the reader writer lock 129 // while not generic it only relies on a opaque pointer 130 struct __processor_id; 131 132 // Reader-Writer lock protecting the ready-queue 133 // while this lock is mostly generic some aspects 134 // have been hard-coded to for the ready-queue for 135 // simplicity and performance 136 struct __clusterRWLock_t { 137 // total cachelines allocated 138 unsigned int max; 139 140 // cachelines currently in use 141 volatile unsigned int alloc; 142 143 // cachelines ready to itereate over 144 // (!= to alloc when thread is in second half of doregister) 145 volatile unsigned int ready; 146 147 // writer lock 148 volatile bool lock; 149 150 // data pointer 151 __processor_id * data; 152 }; 153 154 void ?{}(__clusterRWLock_t & this); 155 void ^?{}(__clusterRWLock_t & this); 156 157 // Intrusives lanes which are used by the relaxed ready queue 158 struct __attribute__((aligned(128))) __intrusive_lane_t { 159 // spin lock protecting the queue 160 volatile bool lock; 161 162 // anchor for the head and the tail of the queue 163 struct __sentinel_t { 164 // Link lists fields 165 // instrusive link field for threads 166 // must be exactly as in $thread 167 __thread_desc_link link; 168 } before, after; 169 170 #if defined(__CFA_WITH_VERIFY__) 171 // id of last processor to acquire the lock 172 // needed only to check for mutual exclusion violations 173 unsigned int last_id; 174 175 // number of items on this list 176 // needed only to check for deadlocks 177 unsigned int count; 178 #endif 179 180 // Optional statistic counters 181 #if !defined(__CFA_NO_SCHED_STATS__) 182 struct __attribute__((aligned(64))) { 183 // difference between number of push and pops 184 ssize_t diff; 185 186 // total number of pushes and pops 187 size_t push; 188 size_t pop ; 189 } stat; 190 #endif 191 }; 192 193 void ?{}(__intrusive_lane_t & this); 194 void ^?{}(__intrusive_lane_t & this); 195 196 typedef unsigned long long __cfa_readyQ_mask_t; 197 198 // enum { 199 // __cfa_ready_queue_mask_size = (64 - sizeof(size_t)) / sizeof(size_t), 200 // __cfa_max_ready_queues = __cfa_ready_queue_mask_size * 8 * sizeof(size_t) 201 // }; 202 203 #define __cfa_lane_mask_size ((64 - sizeof(size_t)) / sizeof(__cfa_readyQ_mask_t)) 204 #define __cfa_max_lanes (__cfa_lane_mask_size * 8 * sizeof(__cfa_readyQ_mask_t)) 205 206 //TODO adjust cache size to ARCHITECTURE 207 // Structure holding the relaxed ready queue 208 struct __attribute__((aligned(128))) __ready_queue_t { 209 // Data tracking how many/which lanes are used 210 // Aligned to 128 for cache locality 211 struct { 212 // number of non-empty lanes 213 volatile size_t count; 214 215 // bit mask, set bits indentify which lanes are non-empty 216 volatile __cfa_readyQ_mask_t mask[ __cfa_lane_mask_size ]; 217 } used; 218 219 // Data tracking the actual lanes 220 // On a seperate cacheline from the used struct since 221 // used can change on each push/pop but this data 222 // only changes on shrink/grow 223 struct __attribute__((aligned(64))) { 224 // Arary of lanes 225 __intrusive_lane_t * volatile data; 226 227 // Number of lanes (empty or not) 228 volatile size_t count; 229 } lanes; 230 231 // Statistics 232 #if !defined(__CFA_NO_STATISTICS__) 233 __attribute__((aligned(64))) struct { 234 struct { 235 // Push statistic 236 struct { 237 // number of attemps at pushing something 238 volatile size_t attempt; 239 240 // number of successes at pushing 241 volatile size_t success; 242 } push; 243 244 // Pop statistic 245 struct { 246 // number of reads of the mask 247 // picking an empty __cfa_readyQ_mask_t counts here 248 // but not as an attempt 249 volatile size_t maskrds; 250 251 // number of attemps at poping something 252 volatile size_t attempt; 253 254 // number of successes at poping 255 volatile size_t success; 256 } pop; 257 } pick; 258 259 // stats on the "used" struct of the queue 260 // tracks average number of queues that are not empty 261 // when pushing / poping 262 struct { 263 volatile size_t value; 264 volatile size_t count; 265 } used; 266 } global_stats; 267 268 #endif 269 }; 270 271 void ?{}(__ready_queue_t & this); 272 void ^?{}(__ready_queue_t & this); 273 123 274 //----------------------------------------------------------------------------- 124 275 // Cluster 125 276 struct cluster { 126 277 // Ready queue locks 127 __ spinlock_t ready_queue_lock;278 __clusterRWLock_t ready_lock; 128 279 129 280 // Ready queue for threads 130 __ queue_t($thread)ready_queue;281 __ready_queue_t ready_queue; 131 282 132 283 // Name of the cluster -
libcfa/src/concurrency/kernel_private.hfa
ree06db5c r97392b69 84 84 //----------------------------------------------------------------------------- 85 85 // Utils 86 #define KERNEL_STORAGE(T,X) static char storage_##X[sizeof(T)]86 #define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)] 87 87 88 88 static inline uint32_t __tls_rand() { … … 103 103 void unregister( struct cluster * cltr, struct processor * proc ); 104 104 105 //======================================================================= 106 // Cluster lock API 107 //======================================================================= 108 struct __attribute__((aligned(64))) __processor_id { 109 processor * volatile handle; 110 volatile bool lock; 111 }; 112 113 // Lock-Free registering/unregistering of threads 114 // Register a processor to a given cluster and get its unique id in return 115 unsigned doregister2( struct cluster * cltr, struct processor * proc ); 116 117 // Unregister a processor from a given cluster using its id, getting back the original pointer 118 void unregister2( struct cluster * cltr, struct processor * proc ); 119 120 //======================================================================= 121 // Reader-writer lock implementation 122 // Concurrent with doregister/unregister, 123 // i.e., threads can be added at any point during or between the entry/exit 124 125 //----------------------------------------------------------------------- 126 // simple spinlock underlying the RWLock 127 // Blocking acquire 128 static inline void __atomic_acquire(volatile bool * ll) { 129 while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) { 130 while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED)) 131 asm volatile("pause"); 132 } 133 /* paranoid */ verify(*ll); 134 } 135 136 // Non-Blocking acquire 137 static inline bool __atomic_try_acquire(volatile bool * ll) { 138 return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST); 139 } 140 141 // Release 142 static inline void __atomic_unlock(volatile bool * ll) { 143 /* paranoid */ verify(*ll); 144 __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE); 145 } 146 147 //----------------------------------------------------------------------- 148 // Reader side : acquire when using the ready queue to schedule but not 149 // creating/destroying queues 150 static inline void ready_schedule_lock( struct cluster * cltr, struct processor * proc) with(cltr->ready_lock) { 151 unsigned iproc = proc->id; 152 /*paranoid*/ verify(data[iproc].handle == proc); 153 /*paranoid*/ verify(iproc < ready); 154 155 // Step 1 : make sure no writer are in the middle of the critical section 156 while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED)) 157 asm volatile("pause"); 158 159 // Fence needed because we don't want to start trying to acquire the lock 160 // before we read a false. 161 // Not needed on x86 162 // std::atomic_thread_fence(std::memory_order_seq_cst); 163 164 // Step 2 : acquire our local lock 165 __atomic_acquire( &data[iproc].lock ); 166 /*paranoid*/ verify(data[iproc].lock); 167 } 168 169 static inline void ready_schedule_unlock( struct cluster * cltr, struct processor * proc) with(cltr->ready_lock) { 170 unsigned iproc = proc->id; 171 /*paranoid*/ verify(data[iproc].handle == proc); 172 /*paranoid*/ verify(iproc < ready); 173 /*paranoid*/ verify(data[iproc].lock); 174 __atomic_unlock(&data[iproc].lock); 175 } 176 177 //----------------------------------------------------------------------- 178 // Writer side : acquire when changing the ready queue, e.g. adding more 179 // queues or removing them. 180 uint_fast32_t ready_mutate_lock( struct cluster & cltr ); 181 182 void ready_mutate_unlock( struct cluster & cltr, uint_fast32_t /* value returned by lock */ ); 183 184 //======================================================================= 185 // Ready-Queue API 186 //----------------------------------------------------------------------- 187 // push thread onto a ready queue for a cluster 188 // returns true if the list was previously empty, false otherwise 189 __attribute__((hot)) bool push(struct cluster * cltr, struct $thread * thrd); 190 191 //----------------------------------------------------------------------- 192 // pop thread from the ready queue of a cluster 193 // returns 0p if empty 194 __attribute__((hot)) struct $thread * pop(struct cluster * cltr); 195 196 //----------------------------------------------------------------------- 197 // Increase the width of the ready queue (number of lanes) by 4 198 void ready_queue_grow (struct cluster * cltr); 199 200 //----------------------------------------------------------------------- 201 // Decrease the width of the ready queue (number of lanes) by 4 202 void ready_queue_shrink(struct cluster * cltr); 203 204 //----------------------------------------------------------------------- 205 // Statics call at the end of each thread to register statistics 206 #if !defined(__CFA_NO_STATISTICS__) 207 void stats_tls_tally(struct cluster * cltr); 208 #else 209 static inline void stats_tls_tally(struct cluster * cltr) {} 210 #endif 211 105 212 // Local Variables: // 106 213 // mode: c // -
libcfa/src/concurrency/monitor.cfa
ree06db5c r97392b69 114 114 115 115 // Some one else has the monitor, wait in line for it 116 /* paranoid */ verify( thrd-> next == 0p );116 /* paranoid */ verify( thrd->link.next == 0p ); 117 117 append( this->entry_queue, thrd ); 118 /* paranoid */ verify( thrd-> next == 1p );118 /* paranoid */ verify( thrd->link.next == 1p ); 119 119 120 120 unlock( this->lock ); … … 199 199 200 200 // Some one else has the monitor, wait in line for it 201 /* paranoid */ verify( thrd-> next == 0p );201 /* paranoid */ verify( thrd->link.next == 0p ); 202 202 append( this->entry_queue, thrd ); 203 /* paranoid */ verify( thrd-> next == 1p );203 /* paranoid */ verify( thrd->link.next == 1p ); 204 204 unlock( this->lock ); 205 205 … … 761 761 $thread * new_owner = pop_head( this->entry_queue ); 762 762 /* paranoid */ verifyf( !this->owner || kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this ); 763 /* paranoid */ verify( !new_owner || new_owner-> next == 0p );763 /* paranoid */ verify( !new_owner || new_owner->link.next == 0p ); 764 764 __set_owner( this, new_owner ); 765 765 … … 883 883 } 884 884 885 __cfaabi_dbg_print_safe( "Kernel : Runing %i (%p)\n", ready2run, ready2run ? node->waiting_thread :0p );885 __cfaabi_dbg_print_safe( "Kernel : Runing %i (%p)\n", ready2run, ready2run ? (thread*)node->waiting_thread : (thread*)0p ); 886 886 return ready2run ? node->waiting_thread : 0p; 887 887 } … … 907 907 // For each thread in the entry-queue 908 908 for( $thread ** thrd_it = &entry_queue.head; 909 *thrd_it!= 1p;910 thrd_it = &(*thrd_it)-> next909 (*thrd_it) != 1p; 910 thrd_it = &(*thrd_it)->link.next 911 911 ) { 912 912 // For each acceptable check if it matches -
libcfa/src/concurrency/preemption.cfa
ree06db5c r97392b69 121 121 // If there are still alarms pending, reset the timer 122 122 if( & (*alarms)`first ) { 123 __cfa abi_dbg_print_buffer_decl(" KERNEL: @%ju(%ju) resetting alarm to %ju.\n", currtime.tv, __kernel_get_time().tv, (alarms->head->alarm - currtime).tv);123 __cfadbg_print_buffer_decl(preemption, " KERNEL: @%ju(%ju) resetting alarm to %ju.\n", currtime.tv, __kernel_get_time().tv, (alarms->head->alarm - currtime).tv); 124 124 Duration delta = (*alarms)`first.alarm - currtime; 125 125 Duration capped = max(delta, 50`us); -
libcfa/src/concurrency/thread.cfa
ree06db5c r97392b69 35 35 self_mon_p = &self_mon; 36 36 curr_cluster = &cl; 37 next = 0p; 37 link.next = 0p; 38 link.prev = 0p; 38 39 39 40 node.next = 0p; -
libcfa/src/stdhdr/assert.h
ree06db5c r97392b69 33 33 #define verify(x) assert(x) 34 34 #define verifyf(x, ...) assertf(x, __VA_ARGS__) 35 #define verifyfail(...) 35 36 #define __CFA_WITH_VERIFY__ 36 37 #else 37 38 #define verify(x) 38 39 #define verifyf(x, ...) 40 #define verifyfail(...) 39 41 #endif 40 42 -
tests/concurrent/examples/datingService.cfa
ree06db5c r97392b69 35 35 signal_block( Boys[ccode] ); // restart boy to set phone number 36 36 } // if 37 // sout | "Girl:" | PhoneNo | "is dating Boy at" | BoyPhoneNo | "with ccode" | ccode;37 // sout | "Girl:" | PhoneNo | "is dating Boy at" | BoyPhoneNo | "with ccode" | ccode; 38 38 return BoyPhoneNo; 39 39 } // DatingService girl … … 47 47 signal_block( Girls[ccode] ); // restart girl to set phone number 48 48 } // if 49 // sout | " Boy:" | PhoneNo | "is dating Girl" | GirlPhoneNo | "with ccode" | ccode;49 // sout | " Boy:" | PhoneNo | "is dating Girl" | GirlPhoneNo | "with ccode" | ccode; 50 50 return GirlPhoneNo; 51 51 } // DatingService boy -
tests/concurrent/waitfor/when.cfa
ree06db5c r97392b69 57 57 58 58 void arbiter( global_t & mutex this ) { 59 // There is a race at start where callers can get in before the arbiter. 60 // It doesn't really matter here so just restart the loop correctly and move on 61 this.last_call = 6; 62 59 63 for( int i = 0; i < N; i++ ) { 60 64 when( this.last_call == 6 ) waitfor( call1 : this ) { if( this.last_call != 1) { serr | "Expected last_call to be 1 got" | this.last_call; } }
Note:
See TracChangeset
for help on using the changeset viewer.