Changes in / [6a8208cb:59f3f61]
- Files:
-
- 40 edited
Legend:
- Unmodified
- Added
- Removed
-
benchmark/benchcltr.hfa
r6a8208cb r59f3f61 114 114 for() { 115 115 sleep(100`ms); 116 end = getTimeNsec();116 end = timeHiRes(); 117 117 Duration delta = end - start; 118 118 /*if(is_tty)*/ { … … 126 126 } 127 127 #else 128 uint64_t getTimeNsec() {128 uint64_t timeHiRes() { 129 129 timespec curr; 130 130 clock_gettime( CLOCK_REALTIME, &curr ); … … 140 140 for(;;) { 141 141 usleep(100000); 142 end = getTimeNsec();142 end = timeHiRes(); 143 143 uint64_t delta = end - start; 144 144 /*if(is_tty)*/ { -
benchmark/io/http/protocol.cfa
r6a8208cb r59f3f61 228 228 229 229 char buff[100]; 230 Time now = getTimeNsec();230 Time now = timeHiRes(); 231 231 strftime( buff, 100, "%a, %d %b %Y %H:%M:%S %Z", now ); 232 232 sout | "Updated date to '" | buff | "'"; -
benchmark/io/readv-posix.c
r6a8208cb r59f3f61 111 111 printf("Starting\n"); 112 112 bool is_tty = isatty(STDOUT_FILENO); 113 start = getTimeNsec();113 start = timeHiRes(); 114 114 run = true; 115 115 … … 118 118 119 119 run = false; 120 end = getTimeNsec();120 end = timeHiRes(); 121 121 printf("\nDone\n"); 122 122 -
benchmark/io/readv.cfa
r6a8208cb r59f3f61 147 147 printf("Starting\n"); 148 148 bool is_tty = isatty(STDOUT_FILENO); 149 start = getTimeNsec();149 start = timeHiRes(); 150 150 run = true; 151 151 … … 156 156 157 157 run = false; 158 end = getTimeNsec();158 end = timeHiRes(); 159 159 printf("\nDone\n"); 160 160 } -
benchmark/readyQ/cycle.cc
r6a8208cb r59f3f61 89 89 90 90 bool is_tty = isatty(STDOUT_FILENO); 91 start = getTimeNsec();91 start = timeHiRes(); 92 92 93 93 for(int i = 0; i < nthreads; i++) { … … 97 97 98 98 stop = true; 99 end = getTimeNsec();99 end = timeHiRes(); 100 100 printf("\nDone\n"); 101 101 -
benchmark/readyQ/cycle.cfa
r6a8208cb r59f3f61 65 65 66 66 bool is_tty = isatty(STDOUT_FILENO); 67 start = getTimeNsec();67 start = timeHiRes(); 68 68 69 69 for(i; nthreads) { … … 73 73 74 74 stop = true; 75 end = getTimeNsec();75 end = timeHiRes(); 76 76 printf("\nDone\n"); 77 77 -
benchmark/readyQ/cycle.cpp
r6a8208cb r59f3f61 93 93 94 94 bool is_tty = isatty(STDOUT_FILENO); 95 start = getTimeNsec();95 start = timeHiRes(); 96 96 97 97 for(int i = 0; i < nthreads; i++) { … … 101 101 102 102 stop = true; 103 end = getTimeNsec();103 end = timeHiRes(); 104 104 printf("\nDone\n"); 105 105 -
benchmark/readyQ/locality.cc
r6a8208cb r59f3f61 281 281 282 282 bool is_tty = isatty(STDOUT_FILENO); 283 start = getTimeNsec();283 start = timeHiRes(); 284 284 285 285 for(size_t i = 0; i < nthreads; i++) { … … 289 289 290 290 stop = true; 291 end = getTimeNsec();291 end = timeHiRes(); 292 292 printf("\nDone\n"); 293 293 -
benchmark/readyQ/locality.cfa
r6a8208cb r59f3f61 232 232 233 233 bool is_tty = isatty(STDOUT_FILENO); 234 start = getTimeNsec();234 start = timeHiRes(); 235 235 236 236 for(i; nthreads) { … … 240 240 241 241 stop = true; 242 end = getTimeNsec();242 end = timeHiRes(); 243 243 printf("\nDone\n"); 244 244 -
benchmark/readyQ/locality.cpp
r6a8208cb r59f3f61 287 287 288 288 bool is_tty = isatty(STDOUT_FILENO); 289 start = getTimeNsec();289 start = timeHiRes(); 290 290 291 291 for(size_t i = 0; i < nthreads; i++) { … … 295 295 296 296 stop = true; 297 end = getTimeNsec();297 end = timeHiRes(); 298 298 printf("\nDone\n"); 299 299 -
benchmark/readyQ/rq_bench.hfa
r6a8208cb r59f3f61 73 73 for() { 74 74 sleep(100`ms); 75 Time end = getTimeNsec();75 Time end = timeHiRes(); 76 76 Duration delta = end - start; 77 77 if(is_tty) { -
benchmark/readyQ/rq_bench.hpp
r6a8208cb r59f3f61 46 46 } 47 47 48 uint64_t getTimeNsec() {48 uint64_t timeHiRes() { 49 49 timespec curr; 50 50 clock_gettime( CLOCK_REALTIME, &curr ); … … 60 60 for(;;) { 61 61 Sleeper::usleep(100000); 62 uint64_t end = getTimeNsec();62 uint64_t end = timeHiRes(); 63 63 uint64_t delta = end - start; 64 64 if(is_tty) { -
benchmark/readyQ/yield.cfa
r6a8208cb r59f3f61 66 66 67 67 bool is_tty = isatty(STDOUT_FILENO); 68 start = getTimeNsec();68 start = timeHiRes(); 69 69 run = true; 70 70 … … 75 75 76 76 run = false; 77 end = getTimeNsec();77 end = timeHiRes(); 78 78 printf("\nDone\n"); 79 79 } -
doc/theses/thierry_delisle_PhD/code/readyQ_proto/links.hpp
r6a8208cb r59f3f61 117 117 } 118 118 119 long long ts() const {119 unsigned long long ts() const { 120 120 return before._links.ts; 121 121 } -
doc/theses/thierry_delisle_PhD/code/readyQ_proto/links2.hpp
r6a8208cb r59f3f61 56 56 template<typename node_t> 57 57 class mpsc_queue : private mcs_queue<node_t> { 58 node_t * volatile head;58 node_t * volatile _head; 59 59 public: 60 mpsc_queue(): mcs_queue<node_t>(), head(nullptr) {}60 mpsc_queue(): mcs_queue<node_t>(), _head(nullptr) {} 61 61 62 62 inline bool empty() const { return mcs_queue<node_t>::empty(); } 63 64 node_t * head() const { return _head; } 63 65 64 66 // Added a new element to the queue … … 66 68 inline node_t * push(node_t * elem) { 67 69 node_t * prev = mcs_queue<node_t>::push(elem); 68 if (!prev) head = elem;70 if (!prev) _head = elem; 69 71 return prev; 70 72 } … … 75 77 // NOT Multi-Thread Safe 76 78 inline node_t * pop(node_t *& next) { 77 node_t * elem = head;79 node_t * elem = _head; 78 80 // If head is empty just return 79 81 if (!elem) return nullptr; … … 81 83 // If there is already someone in the list, then it's easy 82 84 if (elem->_links.next) { 83 head = next = elem->_links.next;85 _head = next = elem->_links.next; 84 86 // force memory sync 85 87 __atomic_thread_fence(__ATOMIC_SEQ_CST); … … 93 95 // at the CAS in advance and therefore can write to head 94 96 // after that point, it could overwrite the write in push 95 head = nullptr;97 _head = nullptr; 96 98 next = mcs_queue<node_t>::advance(elem); 97 99 … … 99 101 // it is the only way we can guarantee we are not overwriting 100 102 // a write made in push 101 if (next) head = next;103 if (next) _head = next; 102 104 } 103 105 -
doc/theses/thierry_delisle_PhD/code/readyQ_proto/utils.hpp
r6a8208cb r59f3f61 11 11 #include <sys/sysinfo.h> 12 12 13 #include <x86intrin.h>13 // #include <x86intrin.h> 14 14 15 15 // class Random { -
doc/theses/thierry_delisle_PhD/code/readyQ_proto/work_stealing.hpp
r6a8208cb r59f3f61 15 15 #include "snzi.hpp" 16 16 17 #include <x86intrin.h>17 // #include <x86intrin.h> 18 18 19 19 using namespace std; … … 28 28 template<typename node_t> 29 29 struct __attribute__((aligned(128))) localQ_t { 30 mpsc_queue<node_t> queue = {}; 31 spinlock_t lock = {}; 32 bool needs_help = true; 30 #ifdef NO_MPSC 31 intrusive_queue_t<node_t> list; 32 33 inline auto ts() { return list.ts(); } 34 inline auto lock() { return list.lock.lock(); } 35 inline auto try_lock() { return list.lock.try_lock(); } 36 inline auto unlock() { return list.lock.unlock(); } 37 38 inline auto push( node_t * node ) { return list.push( node ); } 39 inline auto pop() { return list.pop(); } 40 #else 41 mpsc_queue<node_t> queue = {}; 42 spinlock_t _lock = {}; 43 44 inline auto ts() { auto h = queue.head(); return h ? h->_links.ts : 0ull; } 45 inline auto lock() { return _lock.lock(); } 46 inline auto try_lock() { return _lock.try_lock(); } 47 inline auto unlock() { return _lock.unlock(); } 48 49 inline auto push( node_t * node ) { return queue.push( node ); } 50 inline auto pop() { return queue.pop(); } 51 #endif 52 53 33 54 }; 34 55 … … 44 65 work_stealing(unsigned _numThreads, unsigned) 45 66 : numThreads(_numThreads * nqueues) 46 , lists(new intrusive_queue_t<node_t>[numThreads]) 67 , lists(new localQ_t<node_t>[numThreads]) 68 // , lists(new intrusive_queue_t<node_t>[numThreads]) 47 69 , times(new timestamp_t[numThreads]) 48 70 // , snzi( std::log2( numThreads / 2 ), 2 ) … … 58 80 59 81 __attribute__((noinline, hot)) void push(node_t * node) { 60 //node->_links.ts = rdtscl();61 node->_links.ts = 1;82 node->_links.ts = rdtscl(); 83 // node->_links.ts = 1; 62 84 63 85 auto & list = *({ 64 86 unsigned i; 65 do { 87 #ifdef NO_MPSC 88 do { 89 #endif 66 90 tls.stats.push.attempt++; 67 91 // unsigned r = tls.rng1.next(); … … 72 96 i = tls.my_queue + (r % nqueues); 73 97 } 74 } while(!lists[i].lock.try_lock()); 98 #ifdef NO_MPSC 99 } while(!lists[i].try_lock()); 100 #endif 75 101 &lists[i]; 76 102 }); 77 103 78 104 list.push( node ); 79 list.lock.unlock(); 105 #ifdef NO_MPSC 106 list.unlock(); 107 #endif 80 108 // tls.rng2.set_raw_state( tls.rng1.get_raw_state()); 81 109 // count++; … … 84 112 85 113 __attribute__((noinline, hot)) node_t * pop() { 86 if( tls.myfriend == outside ) {87 auto r = tls.rng1.next();88 tls.myfriend = r % numThreads;89 times[tls.myfriend].val = 0;90 }91 else if(times[tls.myfriend].val == 0) {92 node_t * n = try_pop(tls.myfriend, tls.stats.pop.help);93 tls.stats.help++;94 tls.myfriend = outside;95 if(n) return n;96 }97 98 114 if(tls.my_queue != outside) { 115 // if( tls.myfriend == outside ) { 116 // auto r = tls.rng1.next(); 117 // tls.myfriend = r % numThreads; 118 // // assert(lists[(tls.it % nqueues) + tls.my_queue].ts() >= lists[((tls.it + 1) % nqueues) + tls.my_queue].ts()); 119 // tls.mytime = std::min(lists[(tls.it % nqueues) + tls.my_queue].ts(), lists[((tls.it + 1) % nqueues) + tls.my_queue].ts()); 120 // // times[tls.myfriend].val = 0; 121 // // lists[tls.myfriend].val = 0; 122 // } 123 // // else if(times[tls.myfriend].val == 0) { 124 // // else if(lists[tls.myfriend].val == 0) { 125 // else if(times[tls.myfriend].val < tls.mytime) { 126 // // else if(times[tls.myfriend].val < lists[(tls.it % nqueues) + tls.my_queue].ts()) { 127 // node_t * n = try_pop(tls.myfriend, tls.stats.pop.help); 128 // tls.stats.help++; 129 // tls.myfriend = outside; 130 // if(n) return n; 131 // } 132 // if( tls.myfriend == outside ) { 133 // auto r = tls.rng1.next(); 134 // tls.myfriend = r % numThreads; 135 // tls.mytime = lists[((tls.it + 1) % nqueues) + tls.my_queue].ts(); 136 // } 137 // else { 138 // if(times[tls.myfriend].val + 1000 < tls.mytime) { 139 // node_t * n = try_pop(tls.myfriend, tls.stats.pop.help); 140 // tls.stats.help++; 141 // if(n) return n; 142 // } 143 // tls.myfriend = outside; 144 // } 145 99 146 node_t * n = local(); 100 147 if(n) return n; … … 112 159 private: 113 160 inline node_t * local() { 114 // unsigned i = (tls.rng2.prev() % 4) + tls.my_queue;115 161 unsigned i = (--tls.it % nqueues) + tls.my_queue; 162 node_t * n = try_pop(i, tls.stats.pop.local); 163 if(n) return n; 164 i = (--tls.it % nqueues) + tls.my_queue; 116 165 return try_pop(i, tls.stats.pop.local); 117 166 } … … 153 202 154 203 // If we can't get the lock, move on 155 if( !list.lock.try_lock() ) { stat.elock++; return nullptr; } 156 204 if( !list.try_lock() ) { stat.elock++; return nullptr; } 157 205 158 206 // If list is empty, unlock and retry 159 207 if( list.ts() == 0 ) { 160 list. lock.unlock();208 list.unlock(); 161 209 stat.eempty++; 162 210 return nullptr; … … 164 212 165 213 auto node = list.pop(); 166 list. lock.unlock();214 list.unlock(); 167 215 stat.success++; 168 times[i].val = 1; //node.first->_links.ts; 169 // count--; 170 // _mm_stream_si64((long long int*)×[i].val, node.first->_links.ts); 171 return node.first; 216 #ifdef NO_MPSC 217 // times[i].val = 1; 218 times[i].val = node.first->_links.ts; 219 // lists[i].val = node.first->_links.ts; 220 return node.first; 221 #else 222 times[i].val = node->_links.ts; 223 return node; 224 #endif 172 225 } 173 226 … … 191 244 unsigned my_queue = calc_preferred(); 192 245 unsigned myfriend = outside; 246 unsigned long long int mytime = 0; 193 247 #if defined(READ) 194 248 unsigned it = 0; … … 211 265 private: 212 266 const unsigned numThreads; 213 std::unique_ptr<intrusive_queue_t<node_t> []> lists; 267 std::unique_ptr<localQ_t<node_t> []> lists; 268 // std::unique_ptr<intrusive_queue_t<node_t> []> lists; 214 269 std::unique_ptr<timestamp_t []> times; 215 270 __attribute__((aligned(128))) std::atomic_size_t count; -
example/io/batch-readv.c
r6a8208cb r59f3f61 66 66 } 67 67 68 uint64_t getTimeNsec() {68 uint64_t timeHiRes() { 69 69 timespec curr; 70 70 clock_gettime( CLOCK_REALTIME, &curr ); … … 163 163 164 164 printf("Running for %f second, reading %d bytes in batches of %d\n", duration, buflen, batch); 165 uint64_t start = getTimeNsec();166 uint64_t end = getTimeNsec();167 uint64_t prev = getTimeNsec();165 uint64_t start = timeHiRes(); 166 uint64_t end = timeHiRes(); 167 uint64_t prev = timeHiRes(); 168 168 for(;;) { 169 169 submit_and_drain(&iov, batch); 170 end = getTimeNsec();170 end = timeHiRes(); 171 171 uint64_t delta = end - start; 172 172 if( to_fseconds(end - prev) > 0.1 ) { -
libcfa/src/bits/weakso_locks.cfa
r6a8208cb r59f3f61 25 25 void unlock( blocking_lock & ) {} 26 26 void on_notify( blocking_lock &, struct $thread * ) {} 27 size_t on_wait( blocking_lock & ) { }27 size_t on_wait( blocking_lock & ) { return 0; } 28 28 void on_wakeup( blocking_lock &, size_t ) {} 29 29 size_t wait_count( blocking_lock & ) { return 0; } -
libcfa/src/clock.hfa
r6a8208cb r59f3f61 10 10 // Created On : Thu Apr 12 14:36:06 2018 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Mon Jan 6 12:49:58 202013 // Update Count : 912 // Last Modified On : Sun Apr 18 08:12:16 2021 13 // Update Count : 28 14 14 // 15 15 … … 27 27 //######################### Clock ######################### 28 28 29 struct Clock { // private 30 Duration offset; // for virtual clock: contains offset from real-time 29 struct Clock { // virtual clock 30 // private 31 Duration offset; // offset from computer real-time 31 32 }; 32 33 33 34 static inline { 34 void reset Clock( Clock & clk, Duration adj ) with( clk ) {35 void reset( Clock & clk, Duration adj ) with( clk ) { // change offset 35 36 offset = adj + __timezone`s; // timezone (global) is (UTC - local time) in seconds 36 } // reset Clock37 } // reset 37 38 38 void ?{}( Clock & clk, Duration adj ) { resetClock( clk, adj ); } 39 void ?{}( Clock & clk ) { reset( clk, (Duration){ 0 } ); } // create no offset 40 void ?{}( Clock & clk, Duration adj ) { reset( clk, adj ); } // create with offset 39 41 40 Duration getResNsec() { 42 // System-wide clock that measures real, i.e., wall-clock) time. This clock is affected by discontinuous jumps in 43 // the system time. For example, manual changes of the clock, and incremental adjustments performed by adjtime(3) 44 // and NTP (daylight saving (Fall back). 45 Duration resolutionHi() { // clock resolution in nanoseconds (fine) 41 46 struct timespec res; 42 47 clock_getres( CLOCK_REALTIME, &res ); 43 48 return ((int64_t)res.tv_sec * TIMEGRAN + res.tv_nsec)`ns; 44 } // getRes49 } // resolutionHi 45 50 46 Duration getRes() {51 Duration resolution() { // clock resolution without nanoseconds (coarse) 47 52 struct timespec res; 48 53 clock_getres( CLOCK_REALTIME_COARSE, &res ); 49 54 return ((int64_t)res.tv_sec * TIMEGRAN + res.tv_nsec)`ns; 50 } // getRes55 } // resolution 51 56 52 Time getTimeNsec() { //with nanoseconds57 Time timeHiRes() { // real time with nanoseconds 53 58 timespec curr; 54 59 clock_gettime( CLOCK_REALTIME, &curr ); 55 60 return (Time){ curr }; 56 } // getTimeNsec61 } // timeHiRes 57 62 58 Time getTime() { //without nanoseconds63 Time time() { // real time without nanoseconds 59 64 timespec curr; 60 65 clock_gettime( CLOCK_REALTIME_COARSE, &curr ); 61 66 curr.tv_nsec = 0; 62 67 return (Time){ curr }; 63 } // getTime68 } // time 64 69 65 Time getTime( Clock & clk ) with( clk ) {66 return getTime() + offset;67 } // getTime70 Time time( Clock & clk ) with( clk ) { // real time for given clock 71 return time() + offset; 72 } // time 68 73 69 74 Time ?()( Clock & clk ) with( clk ) { // alternative syntax 70 return getTime() + offset;71 } // getTime75 return time() + offset; 76 } // ?() 72 77 73 timeval getTime( Clock & clk ) {78 timeval time( Clock & clk ) { // convert to C time format 74 79 return (timeval){ clk() }; 75 } // getTime80 } // time 76 81 77 tm getTime( Clock & clk ) with( clk ) {82 tm time( Clock & clk ) with( clk ) { 78 83 tm ret; 79 localtime_r( getTime( clk ).tv_sec, &ret );84 localtime_r( time( clk ).tv_sec, &ret ); 80 85 return ret; 81 } // getTime86 } // time 82 87 83 Time getCPUTime() { 88 // CFA processor CPU-time watch that ticks when the processor (kernel thread) is running. This watch is affected by 89 // discontinuous jumps when the OS is not running the kernal thread. A duration is returned because the value is 90 // relative and cannot be converted to real-time (wall-clock) time. 91 Duration processor() { // non-monotonic duration of kernel thread 84 92 timespec ts; 85 93 clock_gettime( CLOCK_THREAD_CPUTIME_ID, &ts ); 86 return (Time){ ts }; 87 } // getCPUTime 94 return (Duration){ ts }; 95 } // processor 96 97 // Program CPU-time watch measures CPU time consumed by all processors (kernel threads) in the UNIX process. This 98 // watch is affected by discontinuous jumps when the OS is not running the kernel threads. A duration is returned 99 // because the value is relative and cannot be converted to real-time (wall-clock) time. 100 Duration program() { // non-monotonic duration of program CPU 101 timespec ts; 102 clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ); 103 return (Duration){ ts }; 104 } // program 105 106 // Monotonic duration from machine boot and including system suspension. This watch is unaffected by discontinuous 107 // jumps resulting from manual changes of the clock, and incremental adjustments performed by adjtime(3) and NTP 108 // (Fall back). A duration is returned because the value is relative and cannot be converted to real-time 109 // (wall-clock) time. 110 Duration boot() { // monotonic duration since computer boot 111 timespec ts; 112 clock_gettime( CLOCK_BOOTTIME, &ts ); 113 return (Duration){ ts }; 114 } // boot 88 115 } // distribution 89 116 -
libcfa/src/concurrency/invoke.h
r6a8208cb r59f3f61 148 148 struct $thread * prev; 149 149 volatile unsigned long long ts; 150 int preferred;151 150 }; 152 151 -
libcfa/src/concurrency/io/call.cfa.in
r6a8208cb r59f3f61 201 201 202 202 sqe->opcode = IORING_OP_{op}; 203 sqe->user_data = ( __u64)(uintptr_t)&future;203 sqe->user_data = (uintptr_t)&future; 204 204 sqe->flags = sflags; 205 205 sqe->ioprio = 0; … … 215 215 asm volatile("": : :"memory"); 216 216 217 verify( sqe->user_data == ( __u64)(uintptr_t)&future );217 verify( sqe->user_data == (uintptr_t)&future ); 218 218 cfa_io_submit( ctx, &idx, 1, 0 != (submit_flags & CFA_IO_LAZY) ); 219 219 #endif … … 238 238 'fd' : 'fd', 239 239 'off' : 'offset', 240 'addr': '( __u64)iov',240 'addr': '(uintptr_t)iov', 241 241 'len' : 'iovcnt', 242 242 }, define = 'CFA_HAVE_PREADV2'), … … 245 245 'fd' : 'fd', 246 246 'off' : 'offset', 247 'addr': '( __u64)iov',247 'addr': '(uintptr_t)iov', 248 248 'len' : 'iovcnt' 249 249 }, define = 'CFA_HAVE_PWRITEV2'), … … 257 257 'addr': 'fd', 258 258 'len': 'op', 259 'off': '( __u64)event'259 'off': '(uintptr_t)event' 260 260 }), 261 261 # CFA_HAVE_IORING_OP_SYNC_FILE_RANGE … … 269 269 Call('SENDMSG', 'ssize_t sendmsg(int sockfd, const struct msghdr *msg, int flags)', { 270 270 'fd': 'sockfd', 271 'addr': '( __u64)(struct msghdr *)msg',271 'addr': '(uintptr_t)(struct msghdr *)msg', 272 272 'len': '1', 273 273 'msg_flags': 'flags' … … 276 276 Call('RECVMSG', 'ssize_t recvmsg(int sockfd, struct msghdr *msg, int flags)', { 277 277 'fd': 'sockfd', 278 'addr': '( __u64)(struct msghdr *)msg',278 'addr': '(uintptr_t)(struct msghdr *)msg', 279 279 'len': '1', 280 280 'msg_flags': 'flags' … … 283 283 Call('SEND', 'ssize_t send(int sockfd, const void *buf, size_t len, int flags)', { 284 284 'fd': 'sockfd', 285 'addr': '( __u64)buf',285 'addr': '(uintptr_t)buf', 286 286 'len': 'len', 287 287 'msg_flags': 'flags' … … 290 290 Call('RECV', 'ssize_t recv(int sockfd, void *buf, size_t len, int flags)', { 291 291 'fd': 'sockfd', 292 'addr': '( __u64)buf',292 'addr': '(uintptr_t)buf', 293 293 'len': 'len', 294 294 'msg_flags': 'flags' … … 297 297 Call('ACCEPT', 'int accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags)', { 298 298 'fd': 'sockfd', 299 'addr': '( __u64)addr',300 'addr2': '( __u64)addrlen',299 'addr': '(uintptr_t)addr', 300 'addr2': '(uintptr_t)addrlen', 301 301 'accept_flags': 'flags' 302 302 }), … … 304 304 Call('CONNECT', 'int connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen)', { 305 305 'fd': 'sockfd', 306 'addr': '( __u64)addr',306 'addr': '(uintptr_t)addr', 307 307 'off': 'addrlen' 308 308 }), … … 310 310 Call('FALLOCATE', 'int fallocate(int fd, int mode, off_t offset, off_t len)', { 311 311 'fd': 'fd', 312 'addr': '( __u64)len',312 'addr': '(uintptr_t)len', 313 313 'len': 'mode', 314 314 'off': 'offset' … … 323 323 # CFA_HAVE_IORING_OP_MADVISE 324 324 Call('MADVISE', 'int madvise(void *addr, size_t length, int advice)', { 325 'addr': '( __u64)addr',325 'addr': '(uintptr_t)addr', 326 326 'len': 'length', 327 327 'fadvise_advice': 'advice' … … 330 330 Call('OPENAT', 'int openat(int dirfd, const char *pathname, int flags, mode_t mode)', { 331 331 'fd': 'dirfd', 332 'addr': '( __u64)pathname',332 'addr': '(uintptr_t)pathname', 333 333 'len': 'mode', 334 334 'open_flags': 'flags;' … … 339 339 'addr': 'pathname', 340 340 'len': 'sizeof(*how)', 341 'off': '( __u64)how',341 'off': '(uintptr_t)how', 342 342 }, define = 'CFA_HAVE_OPENAT2'), 343 343 # CFA_HAVE_IORING_OP_CLOSE … … 348 348 Call('STATX', 'int statx(int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf)', { 349 349 'fd': 'dirfd', 350 'off': '( __u64)statxbuf',350 'off': '(uintptr_t)statxbuf', 351 351 'addr': 'pathname', 352 352 'len': 'mask', … … 356 356 Call('READ', 'ssize_t read(int fd, void * buf, size_t count)', { 357 357 'fd': 'fd', 358 'addr': '( __u64)buf',358 'addr': '(uintptr_t)buf', 359 359 'len': 'count' 360 360 }), … … 362 362 Call('WRITE', 'ssize_t write(int fd, void * buf, size_t count)', { 363 363 'fd': 'fd', 364 'addr': '( __u64)buf',364 'addr': '(uintptr_t)buf', 365 365 'len': 'count' 366 366 }), -
libcfa/src/concurrency/kernel.cfa
r6a8208cb r59f3f61 113 113 static void __wake_one(cluster * cltr); 114 114 115 static void push (__cluster_idles& idles, processor & proc);116 static void remove(__cluster_idles& idles, processor & proc);117 static [unsigned idle, unsigned total, * processor] query ( & __cluster_idlesidles );115 static void mark_idle (__cluster_proc_list & idles, processor & proc); 116 static void mark_awake(__cluster_proc_list & idles, processor & proc); 117 static [unsigned idle, unsigned total, * processor] query_idles( & __cluster_proc_list idles ); 118 118 119 119 extern void __cfa_io_start( processor * ); … … 189 189 190 190 // Push self to idle stack 191 push(this->cltr->idles, * this);191 mark_idle(this->cltr->procs, * this); 192 192 193 193 // Confirm the ready-queue is empty … … 195 195 if( readyThread ) { 196 196 // A thread was found, cancel the halt 197 remove(this->cltr->idles, * this);197 mark_awake(this->cltr->procs, * this); 198 198 199 199 #if !defined(__CFA_NO_STATISTICS__) … … 225 225 226 226 // We were woken up, remove self from idle 227 remove(this->cltr->idles, * this);227 mark_awake(this->cltr->procs, * this); 228 228 229 229 // DON'T just proceed, start looking again … … 474 474 475 475 ready_schedule_lock(); 476 $thread * thrd = pop ( this );476 $thread * thrd = pop_fast( this ); 477 477 ready_schedule_unlock(); 478 478 … … 617 617 unsigned idle; 618 618 unsigned total; 619 [idle, total, p] = query (this->idles);619 [idle, total, p] = query_idles(this->procs); 620 620 621 621 // If no one is sleeping, we are done … … 654 654 } 655 655 656 static void push (__cluster_idles& this, processor & proc) {656 static void mark_idle(__cluster_proc_list & this, processor & proc) { 657 657 /* paranoid */ verify( ! __preemption_enabled() ); 658 658 lock( this ); 659 659 this.idle++; 660 660 /* paranoid */ verify( this.idle <= this.total ); 661 662 insert_first(this. list, proc);661 remove(proc); 662 insert_first(this.idles, proc); 663 663 unlock( this ); 664 664 /* paranoid */ verify( ! __preemption_enabled() ); 665 665 } 666 666 667 static void remove(__cluster_idles& this, processor & proc) {667 static void mark_awake(__cluster_proc_list & this, processor & proc) { 668 668 /* paranoid */ verify( ! __preemption_enabled() ); 669 669 lock( this ); 670 670 this.idle--; 671 671 /* paranoid */ verify( this.idle >= 0 ); 672 673 672 remove(proc); 673 insert_last(this.actives, proc); 674 674 unlock( this ); 675 675 /* paranoid */ verify( ! __preemption_enabled() ); 676 676 } 677 677 678 static [unsigned idle, unsigned total, * processor] query( & __cluster_idles this ) { 678 static [unsigned idle, unsigned total, * processor] query_idles( & __cluster_proc_list this ) { 679 /* paranoid */ verify( ! __preemption_enabled() ); 680 /* paranoid */ verify( ready_schedule_islocked() ); 681 679 682 for() { 680 683 uint64_t l = __atomic_load_n(&this.lock, __ATOMIC_SEQ_CST); … … 682 685 unsigned idle = this.idle; 683 686 unsigned total = this.total; 684 processor * proc = &this. list`first;687 processor * proc = &this.idles`first; 685 688 // Compiler fence is unnecessary, but gcc-8 and older incorrectly reorder code without it 686 689 asm volatile("": : :"memory"); … … 688 691 return [idle, total, proc]; 689 692 } 693 694 /* paranoid */ verify( ready_schedule_islocked() ); 695 /* paranoid */ verify( ! __preemption_enabled() ); 690 696 } 691 697 -
libcfa/src/concurrency/kernel.hfa
r6a8208cb r59f3f61 69 69 struct cluster * cltr; 70 70 71 // Id within the cluster 72 unsigned cltr_id; 71 // Ready Queue state per processor 72 struct { 73 unsigned short its; 74 unsigned short itr; 75 unsigned id; 76 unsigned target; 77 unsigned long long int cutoff; 78 } rdq; 73 79 74 80 // Set to true to notify the processor should terminate … … 140 146 // Cluster Tools 141 147 142 // Intrusives lanes which are used by the re laxed ready queue148 // Intrusives lanes which are used by the ready queue 143 149 struct __attribute__((aligned(128))) __intrusive_lane_t; 144 150 void ?{}(__intrusive_lane_t & this); 145 151 void ^?{}(__intrusive_lane_t & this); 146 152 147 // Counter used for wether or not the lanes are all empty 148 struct __attribute__((aligned(128))) __snzi_node_t; 149 struct __snzi_t { 150 unsigned mask; 151 int root; 152 __snzi_node_t * nodes; 153 }; 154 155 void ?{}( __snzi_t & this, unsigned depth ); 156 void ^?{}( __snzi_t & this ); 153 // Aligned timestamps which are used by the relaxed ready queue 154 struct __attribute__((aligned(128))) __timestamp_t; 155 void ?{}(__timestamp_t & this); 156 void ^?{}(__timestamp_t & this); 157 157 158 158 //TODO adjust cache size to ARCHITECTURE 159 159 // Structure holding the relaxed ready queue 160 160 struct __ready_queue_t { 161 // Data tracking how many/which lanes are used162 // Aligned to 128 for cache locality163 __snzi_t snzi;164 165 161 // Data tracking the actual lanes 166 162 // On a seperate cacheline from the used struct since … … 171 167 __intrusive_lane_t * volatile data; 172 168 169 // Array of times 170 __timestamp_t * volatile tscs; 171 173 172 // Number of lanes (empty or not) 174 173 volatile size_t count; … … 180 179 181 180 // Idle Sleep 182 struct __cluster_ idles{181 struct __cluster_proc_list { 183 182 // Spin lock protecting the queue 184 183 volatile uint64_t lock; … … 191 190 192 191 // List of idle processors 193 dlist(processor, processor) list; 192 dlist(processor, processor) idles; 193 194 // List of active processors 195 dlist(processor, processor) actives; 194 196 }; 195 197 … … 207 209 208 210 // List of idle processors 209 __cluster_ idles idles;211 __cluster_proc_list procs; 210 212 211 213 // List of threads -
libcfa/src/concurrency/kernel/startup.cfa
r6a8208cb r59f3f61 469 469 this.name = name; 470 470 this.cltr = &_cltr; 471 this.rdq.its = 0; 472 this.rdq.itr = 0; 473 this.rdq.id = -1u; 474 this.rdq.target = -1u; 475 this.rdq.cutoff = -1ull; 471 476 do_terminate = false; 472 477 preemption_alarm = 0p; … … 489 494 #endif 490 495 491 lock( this.cltr->idles ); 492 int target = this.cltr->idles.total += 1u; 493 unlock( this.cltr->idles ); 494 495 id = doregister((__processor_id_t*)&this); 496 496 // Register and Lock the RWlock so no-one pushes/pops while we are changing the queue 497 uint_fast32_t last_size = ready_mutate_register((__processor_id_t*)&this); 498 this.cltr->procs.total += 1u; 499 insert_last(this.cltr->procs.actives, this); 500 501 // Adjust the ready queue size 502 ready_queue_grow( cltr ); 503 504 // Unlock the RWlock 505 ready_mutate_unlock( last_size ); 506 507 __cfadbg_print_safe(runtime_core, "Kernel : core %p created\n", &this); 508 } 509 510 // Not a ctor, it just preps the destruction but should not destroy members 511 static void deinit(processor & this) { 497 512 // Lock the RWlock so no-one pushes/pops while we are changing the queue 498 513 uint_fast32_t last_size = ready_mutate_lock(); 514 this.cltr->procs.total -= 1u; 515 remove(this); 499 516 500 517 // Adjust the ready queue size 501 this.cltr_id = ready_queue_grow( cltr, target ); 502 503 // Unlock the RWlock 504 ready_mutate_unlock( last_size ); 505 506 __cfadbg_print_safe(runtime_core, "Kernel : core %p created\n", &this); 507 } 508 509 // Not a ctor, it just preps the destruction but should not destroy members 510 static void deinit(processor & this) { 511 lock( this.cltr->idles ); 512 int target = this.cltr->idles.total -= 1u; 513 unlock( this.cltr->idles ); 514 515 // Lock the RWlock so no-one pushes/pops while we are changing the queue 516 uint_fast32_t last_size = ready_mutate_lock(); 517 518 // Adjust the ready queue size 519 ready_queue_shrink( this.cltr, target ); 520 521 // Unlock the RWlock 522 ready_mutate_unlock( last_size ); 523 524 // Finally we don't need the read_lock any more 525 unregister((__processor_id_t*)&this); 518 ready_queue_shrink( this.cltr ); 519 520 // Unlock the RWlock and unregister: we don't need the read_lock any more 521 ready_mutate_unregister((__processor_id_t*)&this, last_size ); 526 522 527 523 close(this.idle); … … 566 562 //----------------------------------------------------------------------------- 567 563 // Cluster 568 static void ?{}(__cluster_ idles& this) {564 static void ?{}(__cluster_proc_list & this) { 569 565 this.lock = 0; 570 566 this.idle = 0; 571 567 this.total = 0; 572 (this.list){};573 568 } 574 569 … … 596 591 597 592 // Adjust the ready queue size 598 ready_queue_grow( &this , 0);593 ready_queue_grow( &this ); 599 594 600 595 // Unlock the RWlock … … 611 606 612 607 // Adjust the ready queue size 613 ready_queue_shrink( &this , 0);608 ready_queue_shrink( &this ); 614 609 615 610 // Unlock the RWlock -
libcfa/src/concurrency/kernel_private.hfa
r6a8208cb r59f3f61 83 83 // Cluster lock API 84 84 //======================================================================= 85 // Cells use by the reader writer lock86 // while not generic it only relies on a opaque pointer87 struct __attribute__((aligned(128))) __scheduler_lock_id_t {88 // Spin lock used as the underlying lock89 volatile bool lock;90 91 // Handle pointing to the proc owning this cell92 // Used for allocating cells and debugging93 __processor_id_t * volatile handle;94 95 #ifdef __CFA_WITH_VERIFY__96 // Debug, check if this is owned for reading97 bool owned;98 #endif99 };100 101 static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t));102 103 85 // Lock-Free registering/unregistering of threads 104 86 // Register a processor to a given cluster and get its unique id in return 105 unsigned doregister( struct __processor_id_t * proc);87 void register_proc_id( struct __processor_id_t * ); 106 88 107 89 // Unregister a processor from a given cluster using its id, getting back the original pointer 108 void unregister( struct __processor_id_t * proc ); 109 110 //----------------------------------------------------------------------- 111 // Cluster idle lock/unlock 112 static inline void lock(__cluster_idles & this) { 113 for() { 114 uint64_t l = this.lock; 115 if( 116 (0 == (l % 2)) 117 && __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) 118 ) return; 119 Pause(); 120 } 121 } 122 123 static inline void unlock(__cluster_idles & this) { 124 /* paranoid */ verify( 1 == (this.lock % 2) ); 125 __atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST ); 126 } 90 void unregister_proc_id( struct __processor_id_t * proc ); 127 91 128 92 //======================================================================= … … 152 116 __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE); 153 117 } 118 119 // Cells use by the reader writer lock 120 // while not generic it only relies on a opaque pointer 121 struct __attribute__((aligned(128))) __scheduler_lock_id_t { 122 // Spin lock used as the underlying lock 123 volatile bool lock; 124 125 // Handle pointing to the proc owning this cell 126 // Used for allocating cells and debugging 127 __processor_id_t * volatile handle; 128 129 #ifdef __CFA_WITH_VERIFY__ 130 // Debug, check if this is owned for reading 131 bool owned; 132 #endif 133 }; 134 135 static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t)); 154 136 155 137 //----------------------------------------------------------------------- … … 247 229 void ready_mutate_unlock( uint_fast32_t /* value returned by lock */ ); 248 230 231 //----------------------------------------------------------------------- 232 // Lock-Free registering/unregistering of threads 233 // Register a processor to a given cluster and get its unique id in return 234 // For convenience, also acquires the lock 235 static inline uint_fast32_t ready_mutate_register( struct __processor_id_t * proc ) { 236 register_proc_id( proc ); 237 return ready_mutate_lock(); 238 } 239 240 // Unregister a processor from a given cluster using its id, getting back the original pointer 241 // assumes the lock is acquired 242 static inline void ready_mutate_unregister( struct __processor_id_t * proc, uint_fast32_t last_s ) { 243 ready_mutate_unlock( last_s ); 244 unregister_proc_id( proc ); 245 } 246 247 //----------------------------------------------------------------------- 248 // Cluster idle lock/unlock 249 static inline void lock(__cluster_proc_list & this) { 250 /* paranoid */ verify( ! __preemption_enabled() ); 251 252 // Start by locking the global RWlock so that we know no-one is 253 // adding/removing processors while we mess with the idle lock 254 ready_schedule_lock(); 255 256 // Simple counting lock, acquired, acquired by incrementing the counter 257 // to an odd number 258 for() { 259 uint64_t l = this.lock; 260 if( 261 (0 == (l % 2)) 262 && __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) 263 ) return; 264 Pause(); 265 } 266 267 /* paranoid */ verify( ! __preemption_enabled() ); 268 } 269 270 static inline void unlock(__cluster_proc_list & this) { 271 /* paranoid */ verify( ! __preemption_enabled() ); 272 273 /* paranoid */ verify( 1 == (this.lock % 2) ); 274 // Simple couting lock, release by incrementing to an even number 275 __atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST ); 276 277 // Release the global lock, which we acquired when locking 278 ready_schedule_unlock(); 279 280 /* paranoid */ verify( ! __preemption_enabled() ); 281 } 282 249 283 //======================================================================= 250 284 // Ready-Queue API 251 285 //----------------------------------------------------------------------- 252 // pop thread from the ready queue of a cluster253 // returns 0p if empty254 __attribute__((hot)) bool query(struct cluster * cltr);255 256 //-----------------------------------------------------------------------257 286 // push thread onto a ready queue for a cluster 258 287 // returns true if the list was previously empty, false otherwise 259 __attribute__((hot)) boolpush(struct cluster * cltr, struct $thread * thrd);288 __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd); 260 289 261 290 //----------------------------------------------------------------------- … … 263 292 // returns 0p if empty 264 293 // May return 0p spuriously 265 __attribute__((hot)) struct $thread * pop (struct cluster * cltr);294 __attribute__((hot)) struct $thread * pop_fast(struct cluster * cltr); 266 295 267 296 //----------------------------------------------------------------------- … … 272 301 273 302 //----------------------------------------------------------------------- 274 // remove thread from the ready queue of a cluster275 // returns bool if it wasn't found276 bool remove_head(struct cluster * cltr, struct $thread * thrd);277 278 //-----------------------------------------------------------------------279 303 // Increase the width of the ready queue (number of lanes) by 4 280 unsigned ready_queue_grow (struct cluster * cltr, int target);304 void ready_queue_grow (struct cluster * cltr); 281 305 282 306 //----------------------------------------------------------------------- 283 307 // Decrease the width of the ready queue (number of lanes) by 4 284 void ready_queue_shrink(struct cluster * cltr , int target);308 void ready_queue_shrink(struct cluster * cltr); 285 309 286 310 -
libcfa/src/concurrency/preemption.cfa
r6a8208cb r59f3f61 712 712 static void * alarm_loop( __attribute__((unused)) void * args ) { 713 713 __processor_id_t id; 714 id.id = doregister(&id);714 register_proc_id(&id); 715 715 __cfaabi_tls.this_proc_id = &id; 716 716 … … 773 773 EXIT: 774 774 __cfaabi_dbg_print_safe( "Kernel : Preemption thread stopping\n" ); 775 unregister(&id);775 register_proc_id(&id); 776 776 777 777 return 0p; -
libcfa/src/concurrency/ready_queue.cfa
r6a8208cb r59f3f61 17 17 // #define __CFA_DEBUG_PRINT_READY_QUEUE__ 18 18 19 // #define USE_SNZI20 19 // #define USE_MPSC 20 21 #define USE_RELAXED_FIFO 22 // #define USE_WORK_STEALING 21 23 22 24 #include "bits/defs.hfa" … … 29 31 #include <unistd.h> 30 32 31 #include "snzi.hfa"32 33 #include "ready_subqueue.hfa" 33 34 34 35 static const size_t cache_line_size = 64; 36 37 #if !defined(__CFA_NO_STATISTICS__) 38 #define __STATS(...) __VA_ARGS__ 39 #else 40 #define __STATS(...) 41 #endif 35 42 36 43 // No overriden function, no environment variable, no define … … 40 47 #endif 41 48 42 #define BIAS 4 49 #if defined(USE_RELAXED_FIFO) 50 #define BIAS 4 51 #define READYQ_SHARD_FACTOR 4 52 #define SEQUENTIAL_SHARD 1 53 #elif defined(USE_WORK_STEALING) 54 #define READYQ_SHARD_FACTOR 2 55 #define SEQUENTIAL_SHARD 2 56 #else 57 #error no scheduling strategy selected 58 #endif 59 60 static inline struct $thread * try_pop(struct cluster * cltr, unsigned w __STATS(, __stats_readyQ_pop_t & stats)); 61 static inline struct $thread * try_pop(struct cluster * cltr, unsigned i, unsigned j __STATS(, __stats_readyQ_pop_t & stats)); 62 static inline struct $thread * search(struct cluster * cltr); 63 static inline [unsigned, bool] idx_from_r(unsigned r, unsigned preferred); 64 43 65 44 66 // returns the maximum number of processors the RWLock support … … 94 116 //======================================================================= 95 117 // Lock-Free registering/unregistering of threads 96 unsigned doregister( struct __processor_id_t * proc ) with(*__scheduler_lock) {118 void register_proc_id( struct __processor_id_t * proc ) with(*__scheduler_lock) { 97 119 __cfadbg_print_safe(ready_queue, "Kernel : Registering proc %p for RW-Lock\n", proc); 98 120 … … 108 130 /*paranoid*/ verify(0 == (__alignof__(data[i]) % cache_line_size)); 109 131 /*paranoid*/ verify((((uintptr_t)&data[i]) % cache_line_size) == 0); 110 returni;132 proc->id = i; 111 133 } 112 134 } … … 135 157 /*paranoid*/ verify(__alignof__(data[n]) == (2 * cache_line_size)); 136 158 /*paranoid*/ verify((((uintptr_t)&data[n]) % cache_line_size) == 0); 137 returnn;138 } 139 140 void unregister ( struct __processor_id_t * proc ) with(*__scheduler_lock) {159 proc->id = n; 160 } 161 162 void unregister_proc_id( struct __processor_id_t * proc ) with(*__scheduler_lock) { 141 163 unsigned id = proc->id; 142 164 /*paranoid*/ verify(id < ready); … … 193 215 194 216 //======================================================================= 195 // Cforall Re qdy Queue used for scheduling217 // Cforall Ready Queue used for scheduling 196 218 //======================================================================= 197 219 void ?{}(__ready_queue_t & this) with (this) { 198 220 lanes.data = 0p; 221 lanes.tscs = 0p; 199 222 lanes.count = 0; 200 223 } 201 224 202 225 void ^?{}(__ready_queue_t & this) with (this) { 203 verify( 1 == lanes.count ); 204 #ifdef USE_SNZI 205 verify( !query( snzi ) ); 206 #endif 226 verify( SEQUENTIAL_SHARD == lanes.count ); 207 227 free(lanes.data); 228 free(lanes.tscs); 208 229 } 209 230 210 231 //----------------------------------------------------------------------- 211 __attribute__((hot)) bool query(struct cluster * cltr) { 212 #ifdef USE_SNZI 213 return query(cltr->ready_queue.snzi); 214 #endif 215 return true; 216 } 217 218 static inline [unsigned, bool] idx_from_r(unsigned r, unsigned preferred) { 219 unsigned i; 220 bool local; 221 #if defined(BIAS) 232 #if defined(USE_RELAXED_FIFO) 233 //----------------------------------------------------------------------- 234 // get index from random number with or without bias towards queues 235 static inline [unsigned, bool] idx_from_r(unsigned r, unsigned preferred) { 236 unsigned i; 237 bool local; 222 238 unsigned rlow = r % BIAS; 223 239 unsigned rhigh = r / BIAS; … … 225 241 // (BIAS - 1) out of BIAS chances 226 242 // Use perferred queues 227 i = preferred + (rhigh % 4);243 i = preferred + (rhigh % READYQ_SHARD_FACTOR); 228 244 local = true; 229 245 } … … 234 250 local = false; 235 251 } 236 #else 237 i = r; 238 local = false; 239 #endif 240 return [i, local]; 241 } 242 243 //----------------------------------------------------------------------- 244 __attribute__((hot)) bool push(struct cluster * cltr, struct $thread * thrd) with (cltr->ready_queue) { 245 __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr); 246 247 const bool external = (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr); 248 249 // write timestamp 250 thrd->link.ts = rdtscl(); 251 252 bool first = false; 253 __attribute__((unused)) bool local; 254 __attribute__((unused)) int preferred; 255 #if defined(BIAS) 256 preferred = 257 //* 258 external ? -1 : kernelTLS().this_processor->cltr_id; 259 /*/ 260 thrd->link.preferred * 4; 261 //*/ 262 #endif 263 264 // Try to pick a lane and lock it 265 unsigned i; 266 do { 267 // Pick the index of a lane 268 // unsigned r = __tls_rand(); 269 unsigned r = __tls_rand_fwd(); 270 [i, local] = idx_from_r(r, preferred); 271 272 i %= __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 273 252 return [i, local]; 253 } 254 255 __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd) with (cltr->ready_queue) { 256 __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr); 257 258 const bool external = (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr); 259 /* paranoid */ verify(external || kernelTLS().this_processor->rdq.id < lanes.count ); 260 261 // write timestamp 262 thrd->link.ts = rdtscl(); 263 264 bool local; 265 int preferred = external ? -1 : kernelTLS().this_processor->rdq.id; 266 267 // Try to pick a lane and lock it 268 unsigned i; 269 do { 270 // Pick the index of a lane 271 unsigned r = __tls_rand_fwd(); 272 [i, local] = idx_from_r(r, preferred); 273 274 i %= __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 275 276 #if !defined(__CFA_NO_STATISTICS__) 277 if(unlikely(external)) __atomic_fetch_add(&cltr->stats->ready.push.extrn.attempt, 1, __ATOMIC_RELAXED); 278 else if(local) __tls_stats()->ready.push.local.attempt++; 279 else __tls_stats()->ready.push.share.attempt++; 280 #endif 281 282 #if defined(USE_MPSC) 283 // mpsc always succeeds 284 } while( false ); 285 #else 286 // If we can't lock it retry 287 } while( !__atomic_try_acquire( &lanes.data[i].lock ) ); 288 #endif 289 290 // Actually push it 291 push(lanes.data[i], thrd); 292 293 #if !defined(USE_MPSC) 294 // Unlock and return 295 __atomic_unlock( &lanes.data[i].lock ); 296 #endif 297 298 // Mark the current index in the tls rng instance as having an item 299 __tls_rand_advance_bck(); 300 301 __cfadbg_print_safe(ready_queue, "Kernel : Pushed %p on cluster %p (idx: %u, mask %llu, first %d)\n", thrd, cltr, i, used.mask[0], lane_first); 302 303 // Update statistics 274 304 #if !defined(__CFA_NO_STATISTICS__) 275 if(external) { 276 if(local) __atomic_fetch_add(&cltr->stats->ready.pick.ext.local, 1, __ATOMIC_RELAXED); 277 __atomic_fetch_add(&cltr->stats->ready.pick.ext.attempt, 1, __ATOMIC_RELAXED); 305 if(unlikely(external)) __atomic_fetch_add(&cltr->stats->ready.push.extrn.success, 1, __ATOMIC_RELAXED); 306 else if(local) __tls_stats()->ready.push.local.success++; 307 else __tls_stats()->ready.push.share.success++; 308 #endif 309 } 310 311 // Pop from the ready queue from a given cluster 312 __attribute__((hot)) $thread * pop_fast(struct cluster * cltr) with (cltr->ready_queue) { 313 /* paranoid */ verify( lanes.count > 0 ); 314 /* paranoid */ verify( kernelTLS().this_processor ); 315 /* paranoid */ verify( kernelTLS().this_processor->rdq.id < lanes.count ); 316 317 unsigned count = __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 318 int preferred = kernelTLS().this_processor->rdq.id; 319 320 321 // As long as the list is not empty, try finding a lane that isn't empty and pop from it 322 for(25) { 323 // Pick two lists at random 324 unsigned ri = __tls_rand_bck(); 325 unsigned rj = __tls_rand_bck(); 326 327 unsigned i, j; 328 __attribute__((unused)) bool locali, localj; 329 [i, locali] = idx_from_r(ri, preferred); 330 [j, localj] = idx_from_r(rj, preferred); 331 332 i %= count; 333 j %= count; 334 335 // try popping from the 2 picked lists 336 struct $thread * thrd = try_pop(cltr, i, j __STATS(, *(locali || localj ? &__tls_stats()->ready.pop.local : &__tls_stats()->ready.pop.help))); 337 if(thrd) { 338 return thrd; 339 } 340 } 341 342 // All lanes where empty return 0p 343 return 0p; 344 } 345 346 __attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr) { 347 return search(cltr); 348 } 349 #endif 350 #if defined(USE_WORK_STEALING) 351 __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd) with (cltr->ready_queue) { 352 __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr); 353 354 const bool external = (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr); 355 /* paranoid */ verify(external || kernelTLS().this_processor->rdq.id < lanes.count ); 356 357 // write timestamp 358 thrd->link.ts = rdtscl(); 359 360 // Try to pick a lane and lock it 361 unsigned i; 362 do { 363 #if !defined(__CFA_NO_STATISTICS__) 364 if(unlikely(external)) __atomic_fetch_add(&cltr->stats->ready.push.extrn.attempt, 1, __ATOMIC_RELAXED); 365 else __tls_stats()->ready.push.local.attempt++; 366 #endif 367 368 if(unlikely(external)) { 369 i = __tls_rand() % lanes.count; 278 370 } 279 371 else { 280 if(local) __tls_stats()->ready.pick.push.local++; 281 __tls_stats()->ready.pick.push.attempt++; 372 processor * proc = kernelTLS().this_processor; 373 unsigned r = proc->rdq.its++; 374 i = proc->rdq.id + (r % READYQ_SHARD_FACTOR); 282 375 } 376 377 378 #if defined(USE_MPSC) 379 // mpsc always succeeds 380 } while( false ); 381 #else 382 // If we can't lock it retry 383 } while( !__atomic_try_acquire( &lanes.data[i].lock ) ); 283 384 #endif 284 385 285 #if defined(USE_MPSC) 286 // mpsc always succeeds 287 } while( false ); 288 #else 289 // If we can't lock it retry 290 } while( !__atomic_try_acquire( &lanes.data[i].lock ) ); 386 // Actually push it 387 push(lanes.data[i], thrd); 388 389 #if !defined(USE_MPSC) 390 // Unlock and return 391 __atomic_unlock( &lanes.data[i].lock ); 392 #endif 393 394 #if !defined(__CFA_NO_STATISTICS__) 395 if(unlikely(external)) __atomic_fetch_add(&cltr->stats->ready.push.extrn.success, 1, __ATOMIC_RELAXED); 396 else __tls_stats()->ready.push.local.success++; 397 #endif 398 399 __cfadbg_print_safe(ready_queue, "Kernel : Pushed %p on cluster %p (idx: %u, mask %llu, first %d)\n", thrd, cltr, i, used.mask[0], lane_first); 400 } 401 402 // Pop from the ready queue from a given cluster 403 __attribute__((hot)) $thread * pop_fast(struct cluster * cltr) with (cltr->ready_queue) { 404 /* paranoid */ verify( lanes.count > 0 ); 405 /* paranoid */ verify( kernelTLS().this_processor ); 406 /* paranoid */ verify( kernelTLS().this_processor->rdq.id < lanes.count ); 407 408 processor * proc = kernelTLS().this_processor; 409 410 if(proc->rdq.target == -1u) { 411 proc->rdq.target = __tls_rand() % lanes.count; 412 unsigned it1 = proc->rdq.itr; 413 unsigned it2 = proc->rdq.itr + 1; 414 unsigned idx1 = proc->rdq.id + (it1 % READYQ_SHARD_FACTOR); 415 unsigned idx2 = proc->rdq.id + (it1 % READYQ_SHARD_FACTOR); 416 unsigned long long tsc1 = ts(lanes.data[idx1]); 417 unsigned long long tsc2 = ts(lanes.data[idx2]); 418 proc->rdq.cutoff = min(tsc1, tsc2); 419 } 420 else if(lanes.tscs[proc->rdq.target].tv < proc->rdq.cutoff) { 421 $thread * t = try_pop(cltr, proc->rdq.target __STATS(, __tls_stats()->ready.pop.help)); 422 proc->rdq.target = -1u; 423 if(t) return t; 424 } 425 426 for(READYQ_SHARD_FACTOR) { 427 unsigned i = proc->rdq.id + (--proc->rdq.itr % READYQ_SHARD_FACTOR); 428 if($thread * t = try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.local))) return t; 429 } 430 return 0p; 431 } 432 433 __attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr) with (cltr->ready_queue) { 434 for(25) { 435 unsigned i = __tls_rand() % lanes.count; 436 $thread * t = try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.steal)); 437 if(t) return t; 438 } 439 440 return search(cltr); 441 } 442 #endif 443 444 //======================================================================= 445 // Various Ready Queue utilities 446 //======================================================================= 447 // these function work the same or almost the same 448 // whether they are using work-stealing or relaxed fifo scheduling 449 450 //----------------------------------------------------------------------- 451 // try to pop from a lane given by index w 452 static inline struct $thread * try_pop(struct cluster * cltr, unsigned w __STATS(, __stats_readyQ_pop_t & stats)) with (cltr->ready_queue) { 453 __STATS( stats.attempt++; ) 454 455 // Get relevant elements locally 456 __intrusive_lane_t & lane = lanes.data[w]; 457 458 // If list looks empty retry 459 if( is_empty(lane) ) { 460 __STATS( stats.espec++; ) 461 return 0p; 462 } 463 464 // If we can't get the lock retry 465 if( !__atomic_try_acquire(&lane.lock) ) { 466 __STATS( stats.elock++; ) 467 return 0p; 468 } 469 470 // If list is empty, unlock and retry 471 if( is_empty(lane) ) { 472 __atomic_unlock(&lane.lock); 473 __STATS( stats.eempty++; ) 474 return 0p; 475 } 476 477 // Actually pop the list 478 struct $thread * thrd; 479 thrd = pop(lane); 480 481 /* paranoid */ verify(thrd); 482 /* paranoid */ verify(lane.lock); 483 484 // Unlock and return 485 __atomic_unlock(&lane.lock); 486 487 // Update statistics 488 __STATS( stats.success++; ) 489 490 #if defined(USE_WORK_STEALING) 491 lanes.tscs[w].tv = thrd->link.ts; 291 492 #endif 292 493 293 // Actually push it 294 #ifdef USE_SNZI 295 bool lane_first = 296 #endif 297 298 push(lanes.data[i], thrd); 299 300 #ifdef USE_SNZI 301 // If this lane used to be empty we need to do more 302 if(lane_first) { 303 // Check if the entire queue used to be empty 304 first = !query(snzi); 305 306 // Update the snzi 307 arrive( snzi, i ); 308 } 309 #endif 310 311 #if !defined(USE_MPSC) 312 // Unlock and return 313 __atomic_unlock( &lanes.data[i].lock ); 314 #endif 315 316 // Mark the current index in the tls rng instance as having an item 317 __tls_rand_advance_bck(); 318 319 __cfadbg_print_safe(ready_queue, "Kernel : Pushed %p on cluster %p (idx: %u, mask %llu, first %d)\n", thrd, cltr, i, used.mask[0], lane_first); 320 321 // Update statistics 322 #if !defined(__CFA_NO_STATISTICS__) 323 if(external) { 324 if(local) __atomic_fetch_add(&cltr->stats->ready.pick.ext.lsuccess, 1, __ATOMIC_RELAXED); 325 __atomic_fetch_add(&cltr->stats->ready.pick.ext.success, 1, __ATOMIC_RELAXED); 326 } 327 else { 328 if(local) __tls_stats()->ready.pick.push.lsuccess++; 329 __tls_stats()->ready.pick.push.success++; 330 } 331 #endif 332 333 // return whether or not the list was empty before this push 334 return first; 335 } 336 337 static struct $thread * try_pop(struct cluster * cltr, unsigned i, unsigned j); 338 static struct $thread * try_pop(struct cluster * cltr, unsigned i); 339 340 // Pop from the ready queue from a given cluster 341 __attribute__((hot)) $thread * pop(struct cluster * cltr) with (cltr->ready_queue) { 342 /* paranoid */ verify( lanes.count > 0 ); 343 unsigned count = __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 344 int preferred; 345 #if defined(BIAS) 346 // Don't bother trying locally too much 347 preferred = kernelTLS().this_processor->cltr_id; 348 #endif 349 350 351 // As long as the list is not empty, try finding a lane that isn't empty and pop from it 352 #ifdef USE_SNZI 353 while( query(snzi) ) { 354 #else 355 for(25) { 356 #endif 357 // Pick two lists at random 358 // unsigned ri = __tls_rand(); 359 // unsigned rj = __tls_rand(); 360 unsigned ri = __tls_rand_bck(); 361 unsigned rj = __tls_rand_bck(); 362 363 unsigned i, j; 364 __attribute__((unused)) bool locali, localj; 365 [i, locali] = idx_from_r(ri, preferred); 366 [j, localj] = idx_from_r(rj, preferred); 367 368 #if !defined(__CFA_NO_STATISTICS__) 369 if(locali && localj) { 370 __tls_stats()->ready.pick.pop.local++; 371 } 372 #endif 373 374 i %= count; 375 j %= count; 376 377 // try popping from the 2 picked lists 378 struct $thread * thrd = try_pop(cltr, i, j); 379 if(thrd) { 380 #if defined(BIAS) && !defined(__CFA_NO_STATISTICS__) 381 if( locali || localj ) __tls_stats()->ready.pick.pop.lsuccess++; 382 #endif 383 return thrd; 384 } 385 } 386 387 // All lanes where empty return 0p 388 return 0p; 389 } 390 391 __attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr) with (cltr->ready_queue) { 494 // return the popped thread 495 return thrd; 496 } 497 498 //----------------------------------------------------------------------- 499 // try to pop from any lanes making sure you don't miss any threads push 500 // before the start of the function 501 static inline struct $thread * search(struct cluster * cltr) with (cltr->ready_queue) { 392 502 /* paranoid */ verify( lanes.count > 0 ); 393 503 unsigned count = __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); … … 395 505 for(i; count) { 396 506 unsigned idx = (offset + i) % count; 397 struct $thread * thrd = try_pop(cltr, idx );507 struct $thread * thrd = try_pop(cltr, idx __STATS(, __tls_stats()->ready.pop.search)); 398 508 if(thrd) { 399 509 return thrd; … … 405 515 } 406 516 407 408 517 //----------------------------------------------------------------------- 409 // Given 2 indexes, pick the list with the oldest push an try to pop from it 410 static inline struct $thread * try_pop(struct cluster * cltr, unsigned i, unsigned j) with (cltr->ready_queue) { 411 #if !defined(__CFA_NO_STATISTICS__) 412 __tls_stats()->ready.pick.pop.attempt++; 413 #endif 414 415 // Pick the bet list 416 int w = i; 417 if( __builtin_expect(!is_empty(lanes.data[j]), true) ) { 418 w = (ts(lanes.data[i]) < ts(lanes.data[j])) ? i : j; 419 } 420 421 return try_pop(cltr, w); 422 } 423 424 static inline struct $thread * try_pop(struct cluster * cltr, unsigned w) with (cltr->ready_queue) { 425 // Get relevant elements locally 426 __intrusive_lane_t & lane = lanes.data[w]; 427 428 // If list looks empty retry 429 if( is_empty(lane) ) return 0p; 430 431 // If we can't get the lock retry 432 if( !__atomic_try_acquire(&lane.lock) ) return 0p; 433 434 435 // If list is empty, unlock and retry 436 if( is_empty(lane) ) { 437 __atomic_unlock(&lane.lock); 438 return 0p; 439 } 440 441 // Actually pop the list 442 struct $thread * thrd; 443 thrd = pop(lane); 444 445 /* paranoid */ verify(thrd); 446 /* paranoid */ verify(lane.lock); 447 448 #ifdef USE_SNZI 449 // If this was the last element in the lane 450 if(emptied) { 451 depart( snzi, w ); 452 } 453 #endif 454 455 // Unlock and return 456 __atomic_unlock(&lane.lock); 457 458 // Update statistics 459 #if !defined(__CFA_NO_STATISTICS__) 460 __tls_stats()->ready.pick.pop.success++; 461 #endif 462 463 // Update the thread bias 464 thrd->link.preferred = w / 4; 465 466 // return the popped thread 467 return thrd; 468 } 469 //----------------------------------------------------------------------- 470 471 bool remove_head(struct cluster * cltr, struct $thread * thrd) with (cltr->ready_queue) { 472 for(i; lanes.count) { 473 __intrusive_lane_t & lane = lanes.data[i]; 474 475 bool removed = false; 476 477 __atomic_acquire(&lane.lock); 478 if(head(lane)->link.next == thrd) { 479 $thread * pthrd; 480 pthrd = pop(lane); 481 482 /* paranoid */ verify( pthrd == thrd ); 483 484 removed = true; 485 #ifdef USE_SNZI 486 if(emptied) { 487 depart( snzi, i ); 488 } 489 #endif 490 } 491 __atomic_unlock(&lane.lock); 492 493 if( removed ) return true; 494 } 495 return false; 496 } 497 498 //----------------------------------------------------------------------- 499 518 // Check that all the intrusive queues in the data structure are still consistent 500 519 static void check( __ready_queue_t & q ) with (q) { 501 520 #if defined(__CFA_WITH_VERIFY__) && !defined(USE_MPSC) … … 522 541 } 523 542 543 //----------------------------------------------------------------------- 544 // Given 2 indexes, pick the list with the oldest push an try to pop from it 545 static inline struct $thread * try_pop(struct cluster * cltr, unsigned i, unsigned j __STATS(, __stats_readyQ_pop_t & stats)) with (cltr->ready_queue) { 546 // Pick the bet list 547 int w = i; 548 if( __builtin_expect(!is_empty(lanes.data[j]), true) ) { 549 w = (ts(lanes.data[i]) < ts(lanes.data[j])) ? i : j; 550 } 551 552 return try_pop(cltr, w __STATS(, stats)); 553 } 554 524 555 // Call this function of the intrusive list was moved using memcpy 525 556 // fixes the list so that the pointers back to anchors aren't left dangling … … 541 572 } 542 573 574 static void assign_list(unsigned & value, dlist(processor, processor) & list, unsigned count) { 575 processor * it = &list`first; 576 for(unsigned i = 0; i < count; i++) { 577 /* paranoid */ verifyf( it, "Unexpected null iterator, at index %u of %u\n", i, count); 578 it->rdq.id = value; 579 it->rdq.target = -1u; 580 value += READYQ_SHARD_FACTOR; 581 it = &(*it)`next; 582 } 583 } 584 585 static void reassign_cltr_id(struct cluster * cltr) { 586 unsigned preferred = 0; 587 assign_list(preferred, cltr->procs.actives, cltr->procs.total - cltr->procs.idle); 588 assign_list(preferred, cltr->procs.idles , cltr->procs.idle ); 589 } 590 591 static void fix_times( struct cluster * cltr ) with( cltr->ready_queue ) { 592 #if defined(USE_WORK_STEALING) 593 lanes.tscs = alloc(lanes.count, lanes.tscs`realloc); 594 for(i; lanes.count) { 595 lanes.tscs[i].tv = ts(lanes.data[i]); 596 } 597 #endif 598 } 599 543 600 // Grow the ready queue 544 unsigned ready_queue_grow(struct cluster * cltr, int target) { 545 unsigned preferred; 601 void ready_queue_grow(struct cluster * cltr) { 546 602 size_t ncount; 603 int target = cltr->procs.total; 547 604 548 605 /* paranoid */ verify( ready_mutate_islocked() ); … … 554 611 // grow the ready queue 555 612 with( cltr->ready_queue ) { 556 #ifdef USE_SNZI557 ^(snzi){};558 #endif559 560 613 // Find new count 561 614 // Make sure we always have atleast 1 list 562 615 if(target >= 2) { 563 ncount = target * 4; 564 preferred = ncount - 4; 616 ncount = target * READYQ_SHARD_FACTOR; 565 617 } else { 566 ncount = 1; 567 preferred = 0; 618 ncount = SEQUENTIAL_SHARD; 568 619 } 569 620 … … 583 634 // Update original 584 635 lanes.count = ncount; 585 586 #ifdef USE_SNZI 587 // Re-create the snzi 588 snzi{ log2( lanes.count / 8 ) }; 589 for( idx; (size_t)lanes.count ) { 590 if( !is_empty(lanes.data[idx]) ) { 591 arrive(snzi, idx); 592 } 593 } 594 #endif 595 } 636 } 637 638 fix_times(cltr); 639 640 reassign_cltr_id(cltr); 596 641 597 642 // Make sure that everything is consistent … … 601 646 602 647 /* paranoid */ verify( ready_mutate_islocked() ); 603 return preferred;604 648 } 605 649 606 650 // Shrink the ready queue 607 void ready_queue_shrink(struct cluster * cltr , int target) {651 void ready_queue_shrink(struct cluster * cltr) { 608 652 /* paranoid */ verify( ready_mutate_islocked() ); 609 653 __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue\n"); … … 612 656 /* paranoid */ check( cltr->ready_queue ); 613 657 658 int target = cltr->procs.total; 659 614 660 with( cltr->ready_queue ) { 615 #ifdef USE_SNZI616 ^(snzi){};617 #endif618 619 661 // Remember old count 620 662 size_t ocount = lanes.count; … … 622 664 // Find new count 623 665 // Make sure we always have atleast 1 list 624 lanes.count = target >= 2 ? target * 4: 1;666 lanes.count = target >= 2 ? target * READYQ_SHARD_FACTOR: SEQUENTIAL_SHARD; 625 667 /* paranoid */ verify( ocount >= lanes.count ); 626 /* paranoid */ verify( lanes.count == target * 4|| target < 2 );668 /* paranoid */ verify( lanes.count == target * READYQ_SHARD_FACTOR || target < 2 ); 627 669 628 670 // for printing count the number of displaced threads … … 667 709 fix(lanes.data[idx]); 668 710 } 669 670 #ifdef USE_SNZI 671 // Re-create the snzi 672 snzi{ log2( lanes.count / 8 ) }; 673 for( idx; (size_t)lanes.count ) { 674 if( !is_empty(lanes.data[idx]) ) { 675 arrive(snzi, idx); 676 } 677 } 678 #endif 679 } 711 } 712 713 fix_times(cltr); 714 715 reassign_cltr_id(cltr); 680 716 681 717 // Make sure that everything is consistent -
libcfa/src/concurrency/ready_subqueue.hfa
r6a8208cb r59f3f61 246 246 #endif 247 247 } 248 249 // Aligned timestamps which are used by the relaxed ready queue 250 struct __attribute__((aligned(128))) __timestamp_t { 251 volatile unsigned long long tv; 252 }; 253 254 void ?{}(__timestamp_t & this) { this.tv = 0; } 255 void ^?{}(__timestamp_t & this) {} -
libcfa/src/concurrency/stats.cfa
r6a8208cb r59f3f61 10 10 #if !defined(__CFA_NO_STATISTICS__) 11 11 void __init_stats( struct __stats_t * stats ) { 12 stats->ready.pick.push.attempt = 0; 13 stats->ready.pick.push.success = 0; 14 stats->ready.pick.push.local = 0; 15 stats->ready.pick.push.lsuccess = 0; 16 stats->ready.pick.ext.attempt = 0; 17 stats->ready.pick.ext.success = 0; 18 stats->ready.pick.ext.local = 0; 19 stats->ready.pick.ext.lsuccess = 0; 20 stats->ready.pick.pop .probe = 0; 21 stats->ready.pick.pop .attempt = 0; 22 stats->ready.pick.pop .success = 0; 23 stats->ready.pick.pop .local = 0; 24 stats->ready.pick.pop .lsuccess = 0; 12 stats->ready.push.local.attempt = 0; 13 stats->ready.push.local.success = 0; 14 stats->ready.push.share.attempt = 0; 15 stats->ready.push.share.success = 0; 16 stats->ready.push.extrn.attempt = 0; 17 stats->ready.push.extrn.success = 0; 18 stats->ready.pop.local .attempt = 0; 19 stats->ready.pop.local .success = 0; 20 stats->ready.pop.local .elock = 0; 21 stats->ready.pop.local .eempty = 0; 22 stats->ready.pop.local .espec = 0; 23 stats->ready.pop.help .attempt = 0; 24 stats->ready.pop.help .success = 0; 25 stats->ready.pop.help .elock = 0; 26 stats->ready.pop.help .eempty = 0; 27 stats->ready.pop.help .espec = 0; 28 stats->ready.pop.steal .attempt = 0; 29 stats->ready.pop.steal .success = 0; 30 stats->ready.pop.steal .elock = 0; 31 stats->ready.pop.steal .eempty = 0; 32 stats->ready.pop.steal .espec = 0; 33 stats->ready.pop.search.attempt = 0; 34 stats->ready.pop.search.success = 0; 35 stats->ready.pop.search.elock = 0; 36 stats->ready.pop.search.eempty = 0; 37 stats->ready.pop.search.espec = 0; 25 38 stats->ready.threads.migration = 0; 26 39 stats->ready.threads.threads = 0; … … 54 67 55 68 void __tally_stats( struct __stats_t * cltr, struct __stats_t * proc ) { 56 __atomic_fetch_add( &cltr->ready.pick.push.attempt , proc->ready.pick.push.attempt , __ATOMIC_SEQ_CST ); proc->ready.pick.push.attempt = 0; 57 __atomic_fetch_add( &cltr->ready.pick.push.success , proc->ready.pick.push.success , __ATOMIC_SEQ_CST ); proc->ready.pick.push.success = 0; 58 __atomic_fetch_add( &cltr->ready.pick.push.local , proc->ready.pick.push.local , __ATOMIC_SEQ_CST ); proc->ready.pick.push.local = 0; 59 __atomic_fetch_add( &cltr->ready.pick.push.lsuccess, proc->ready.pick.push.lsuccess, __ATOMIC_SEQ_CST ); proc->ready.pick.push.lsuccess = 0; 60 __atomic_fetch_add( &cltr->ready.pick.ext.attempt , proc->ready.pick.ext.attempt , __ATOMIC_SEQ_CST ); proc->ready.pick.ext.attempt = 0; 61 __atomic_fetch_add( &cltr->ready.pick.ext.success , proc->ready.pick.ext.success , __ATOMIC_SEQ_CST ); proc->ready.pick.ext.success = 0; 62 __atomic_fetch_add( &cltr->ready.pick.ext.local , proc->ready.pick.ext.local , __ATOMIC_SEQ_CST ); proc->ready.pick.ext.local = 0; 63 __atomic_fetch_add( &cltr->ready.pick.ext.lsuccess , proc->ready.pick.ext.lsuccess , __ATOMIC_SEQ_CST ); proc->ready.pick.ext.lsuccess = 0; 64 __atomic_fetch_add( &cltr->ready.pick.pop .probe , proc->ready.pick.pop .probe , __ATOMIC_SEQ_CST ); proc->ready.pick.pop .probe = 0; 65 __atomic_fetch_add( &cltr->ready.pick.pop .attempt , proc->ready.pick.pop .attempt , __ATOMIC_SEQ_CST ); proc->ready.pick.pop .attempt = 0; 66 __atomic_fetch_add( &cltr->ready.pick.pop .success , proc->ready.pick.pop .success , __ATOMIC_SEQ_CST ); proc->ready.pick.pop .success = 0; 67 __atomic_fetch_add( &cltr->ready.pick.pop .local , proc->ready.pick.pop .local , __ATOMIC_SEQ_CST ); proc->ready.pick.pop .local = 0; 68 __atomic_fetch_add( &cltr->ready.pick.pop .lsuccess, proc->ready.pick.pop .lsuccess, __ATOMIC_SEQ_CST ); proc->ready.pick.pop .lsuccess = 0; 69 __atomic_fetch_add( &cltr->ready.push.local.attempt, proc->ready.push.local.attempt, __ATOMIC_SEQ_CST ); proc->ready.push.local.attempt = 0; 70 __atomic_fetch_add( &cltr->ready.push.local.success, proc->ready.push.local.success, __ATOMIC_SEQ_CST ); proc->ready.push.local.success = 0; 71 __atomic_fetch_add( &cltr->ready.push.share.attempt, proc->ready.push.share.attempt, __ATOMIC_SEQ_CST ); proc->ready.push.share.attempt = 0; 72 __atomic_fetch_add( &cltr->ready.push.share.success, proc->ready.push.share.success, __ATOMIC_SEQ_CST ); proc->ready.push.share.success = 0; 73 __atomic_fetch_add( &cltr->ready.push.extrn.attempt, proc->ready.push.extrn.attempt, __ATOMIC_SEQ_CST ); proc->ready.push.extrn.attempt = 0; 74 __atomic_fetch_add( &cltr->ready.push.extrn.success, proc->ready.push.extrn.success, __ATOMIC_SEQ_CST ); proc->ready.push.extrn.success = 0; 75 __atomic_fetch_add( &cltr->ready.pop.local .attempt, proc->ready.pop.local .attempt, __ATOMIC_SEQ_CST ); proc->ready.pop.local .attempt = 0; 76 __atomic_fetch_add( &cltr->ready.pop.local .success, proc->ready.pop.local .success, __ATOMIC_SEQ_CST ); proc->ready.pop.local .success = 0; 77 __atomic_fetch_add( &cltr->ready.pop.local .elock , proc->ready.pop.local .elock , __ATOMIC_SEQ_CST ); proc->ready.pop.local .elock = 0; 78 __atomic_fetch_add( &cltr->ready.pop.local .eempty , proc->ready.pop.local .eempty , __ATOMIC_SEQ_CST ); proc->ready.pop.local .eempty = 0; 79 __atomic_fetch_add( &cltr->ready.pop.local .espec , proc->ready.pop.local .espec , __ATOMIC_SEQ_CST ); proc->ready.pop.local .espec = 0; 80 __atomic_fetch_add( &cltr->ready.pop.help .attempt, proc->ready.pop.help .attempt, __ATOMIC_SEQ_CST ); proc->ready.pop.help .attempt = 0; 81 __atomic_fetch_add( &cltr->ready.pop.help .success, proc->ready.pop.help .success, __ATOMIC_SEQ_CST ); proc->ready.pop.help .success = 0; 82 __atomic_fetch_add( &cltr->ready.pop.help .elock , proc->ready.pop.help .elock , __ATOMIC_SEQ_CST ); proc->ready.pop.help .elock = 0; 83 __atomic_fetch_add( &cltr->ready.pop.help .eempty , proc->ready.pop.help .eempty , __ATOMIC_SEQ_CST ); proc->ready.pop.help .eempty = 0; 84 __atomic_fetch_add( &cltr->ready.pop.help .espec , proc->ready.pop.help .espec , __ATOMIC_SEQ_CST ); proc->ready.pop.help .espec = 0; 85 __atomic_fetch_add( &cltr->ready.pop.steal .attempt, proc->ready.pop.steal .attempt, __ATOMIC_SEQ_CST ); proc->ready.pop.steal .attempt = 0; 86 __atomic_fetch_add( &cltr->ready.pop.steal .success, proc->ready.pop.steal .success, __ATOMIC_SEQ_CST ); proc->ready.pop.steal .success = 0; 87 __atomic_fetch_add( &cltr->ready.pop.steal .elock , proc->ready.pop.steal .elock , __ATOMIC_SEQ_CST ); proc->ready.pop.steal .elock = 0; 88 __atomic_fetch_add( &cltr->ready.pop.steal .eempty , proc->ready.pop.steal .eempty , __ATOMIC_SEQ_CST ); proc->ready.pop.steal .eempty = 0; 89 __atomic_fetch_add( &cltr->ready.pop.steal .espec , proc->ready.pop.steal .espec , __ATOMIC_SEQ_CST ); proc->ready.pop.steal .espec = 0; 90 __atomic_fetch_add( &cltr->ready.pop.search.attempt, proc->ready.pop.search.attempt, __ATOMIC_SEQ_CST ); proc->ready.pop.search.attempt = 0; 91 __atomic_fetch_add( &cltr->ready.pop.search.success, proc->ready.pop.search.success, __ATOMIC_SEQ_CST ); proc->ready.pop.search.success = 0; 92 __atomic_fetch_add( &cltr->ready.pop.search.elock , proc->ready.pop.search.elock , __ATOMIC_SEQ_CST ); proc->ready.pop.search.elock = 0; 93 __atomic_fetch_add( &cltr->ready.pop.search.eempty , proc->ready.pop.search.eempty , __ATOMIC_SEQ_CST ); proc->ready.pop.search.eempty = 0; 94 __atomic_fetch_add( &cltr->ready.pop.search.espec , proc->ready.pop.search.espec , __ATOMIC_SEQ_CST ); proc->ready.pop.search.espec = 0; 69 95 __atomic_fetch_add( &cltr->ready.threads.migration , proc->ready.threads.migration , __ATOMIC_SEQ_CST ); proc->ready.threads.migration = 0; 70 96 __atomic_fetch_add( &cltr->ready.threads.threads , proc->ready.threads.threads , __ATOMIC_SEQ_CST ); proc->ready.threads.threads = 0; … … 95 121 96 122 if( flags & CFA_STATS_READY_Q ) { 97 double push_len = ((double)ready.pick.push.attempt) / ready.pick.push.success; 98 double ext_len = ((double)ready.pick.ext .attempt) / ready.pick.ext .success; 99 double pop_len = ((double)ready.pick.pop .attempt) / ready.pick.pop .success; 100 101 double lpush_len = ((double)ready.pick.push.local) / ready.pick.push.lsuccess; 102 double lext_len = ((double)ready.pick.ext .local) / ready.pick.ext .lsuccess; 103 double lpop_len = ((double)ready.pick.pop .local) / ready.pick.pop .lsuccess; 123 double push_len = ((double)ready.push.local.attempt + ready.push.share.attempt + ready.push.extrn.attempt) / (ready.push.local.success + ready.push.share.success + ready.push.extrn.success); 124 double sLcl_len = ready.push.local.success ? ((double)ready.push.local.attempt) / ready.push.local.success : 0; 125 double sOth_len = ready.push.share.success ? ((double)ready.push.share.attempt) / ready.push.share.success : 0; 126 double sExt_len = ready.push.extrn.success ? ((double)ready.push.extrn.attempt) / ready.push.extrn.success : 0; 127 128 double rLcl_len = ready.pop.local .success ? ((double)ready.pop.local .attempt) / ready.pop.local .success : 0; 129 double rHlp_len = ready.pop.help .success ? ((double)ready.pop.help .attempt) / ready.pop.help .success : 0; 130 double rStl_len = ready.pop.steal .success ? ((double)ready.pop.steal .attempt) / ready.pop.steal .success : 0; 131 double rSch_len = ready.pop.search.success ? ((double)ready.pop.search.attempt) / ready.pop.search.success : 0; 104 132 105 133 __cfaabi_bits_print_safe( STDOUT_FILENO, 106 134 "----- %s \"%s\" (%p) - Ready Q Stats -----\n" 107 "- total threads : %'15" PRIu64 "run, %'15" PRIu64 "schd (%'" PRIu64 "ext, %'" PRIu64 "mig, %'" PRId64 " )\n" 108 "- push avg probe : %'3.2lf, %'3.2lfl (%'15" PRIu64 " attempts, %'15" PRIu64 " locals)\n" 109 "- ext avg probe : %'3.2lf, %'3.2lfl (%'15" PRIu64 " attempts, %'15" PRIu64 " locals)\n" 110 "- pop avg probe : %'3.2lf, %'3.2lfl (%'15" PRIu64 " attempts, %'15" PRIu64 " locals)\n" 111 "- Idle Sleep : %'15" PRIu64 "h, %'15" PRIu64 "c, %'15" PRIu64 "w, %'15" PRIu64 "e\n" 135 "- totals : %'3" PRIu64 " run, %'3" PRIu64 " schd (%'" PRIu64 "ext, %'" PRIu64 "mig, %'" PRId64 " )\n" 136 "- push avg : %'3.2lf (l: %'3.2lf/%'" PRIu64 ", s: %'3.2lf/%'" PRIu64 ", e: %'3.2lf : %'" PRIu64 "e)\n" 137 "- local : %'3.2lf (%'3" PRIu64 " try, %'3" PRIu64 " spc, %'3" PRIu64 " lck, %'3" PRIu64 " ept)\n" 138 "- help : %'3.2lf (%'3" PRIu64 " try, %'3" PRIu64 " spc, %'3" PRIu64 " lck, %'3" PRIu64 " ept)\n" 139 "- steal : %'3.2lf (%'3" PRIu64 " try, %'3" PRIu64 " spc, %'3" PRIu64 " lck, %'3" PRIu64 " ept)\n" 140 "- search : %'3.2lf (%'3" PRIu64 " try, %'3" PRIu64 " spc, %'3" PRIu64 " lck, %'3" PRIu64 " ept)\n" 141 "- Idle Slp : %'3" PRIu64 "h, %'3" PRIu64 "c, %'3" PRIu64 "w, %'3" PRIu64 "e\n" 112 142 "\n" 113 143 , type, name, id 114 , ready.pick.pop.success 115 , ready.pick.push.success + ready.pick.ext.success 116 , ready.pick.ext.success, ready.threads.migration, ready.threads.threads 117 , push_len, lpush_len, ready.pick.push.attempt, ready.pick.push.local 118 , ext_len , lext_len , ready.pick.ext .attempt, ready.pick.ext .local 119 , pop_len , lpop_len , ready.pick.pop .attempt, ready.pick.pop .local 144 , ready.pop.local.success + ready.pop.help.success + ready.pop.steal.success + ready.pop.search.success 145 , ready.push.local.success + ready.push.share.success + ready.push.extrn.success 146 , ready.push.extrn.success, ready.threads.migration, ready.threads.threads 147 , push_len, sLcl_len, ready.push.local.attempt, sOth_len, ready.push.share.attempt, sExt_len, ready.push.extrn.attempt 148 , rLcl_len, ready.pop.local .attempt, ready.pop.local .espec, ready.pop.local .elock, ready.pop.local .eempty 149 , rHlp_len, ready.pop.help .attempt, ready.pop.help .espec, ready.pop.help .elock, ready.pop.help .eempty 150 , rStl_len, ready.pop.steal .attempt, ready.pop.steal .espec, ready.pop.steal .elock, ready.pop.steal .eempty 151 , rSch_len, ready.pop.search.attempt, ready.pop.search.espec, ready.pop.search.elock, ready.pop.search.eempty 120 152 , ready.sleep.halts, ready.sleep.cancels, ready.sleep.wakes, ready.sleep.exits 121 153 ); -
libcfa/src/concurrency/stats.hfa
r6a8208cb r59f3f61 16 16 static inline void __print_stats( struct __stats_t *, int, const char *, const char *, void * ) {} 17 17 #else 18 struct __stats_readyQ_pop_t { 19 // number of attemps at poping something 20 volatile uint64_t attempt; 18 21 19 struct __attribute__((aligned(64))) __stats_readQ_t { 22 // number of successes at poping 23 volatile uint64_t success; 24 25 // number of attempts failed due to the lock being held 26 volatile uint64_t elock; 27 28 // number of attempts failed due to the queue being empty (lock held) 29 volatile uint64_t eempty; 30 31 // number of attempts failed due to the queue looking empty (lock not held) 32 volatile uint64_t espec; 33 }; 34 35 struct __attribute__((aligned(64))) __stats_readyQ_t { 36 // Push statistic 20 37 struct { 21 // Push statistic22 38 struct { 23 // number of attemps at pushing something 39 // number of attemps at pushing something to preferred queues 24 40 volatile uint64_t attempt; 25 41 26 // number of successes at pushing 42 // number of successes at pushing to preferred queues 27 43 volatile uint64_t success; 44 } 45 // Stats for local queue within cluster 46 local, 28 47 29 // number of attemps at pushing something to preferred queues30 volatile uint64_t local;48 // Stats for non-local queues within cluster 49 share, 31 50 32 // number of successes at pushing to preferred queues33 volatile uint64_t lsuccess;34 51 // Stats from outside cluster 52 extrn; 53 } push; 35 54 36 struct { 37 // number of attemps at pushing something 38 volatile uint64_t attempt; 55 // Pop statistic 56 struct { 57 // pop from local queue 58 __stats_readyQ_pop_t local; 39 59 40 // number of successes at pushing41 volatile uint64_t success;60 // pop before looking at local queue 61 __stats_readyQ_pop_t help; 42 62 43 // number of attemps at pushing something to preferred queues44 volatile uint64_t local;63 // pop from some other queue 64 __stats_readyQ_pop_t steal; 45 65 46 // number of successes at pushing to preferred queues47 volatile uint64_t lsuccess;48 } ext;66 // pop when searching queues sequentially 67 __stats_readyQ_pop_t search; 68 } pop; 49 69 50 // Pop statistic51 struct {52 // number of reads of the mask53 // picking an empty __cfa_readyQ_mask_t counts here54 // but not as an attempt55 volatile uint64_t probe;56 57 // number of attemps at poping something58 volatile uint64_t attempt;59 60 // number of successes at poping61 volatile uint64_t success;62 63 // number of attemps at poping something to preferred queues64 volatile uint64_t local;65 66 // number of successes at poping to preferred queues67 volatile uint64_t lsuccess;68 } pop;69 } pick;70 70 struct { 71 71 volatile uint64_t migration; … … 119 119 120 120 struct __attribute__((aligned(128))) __stats_t { 121 __stats_read Q_t ready;121 __stats_readyQ_t ready; 122 122 #if defined(CFA_HAVE_LINUX_IO_URING_H) 123 123 __stats_io_t io; -
libcfa/src/concurrency/thread.cfa
r6a8208cb r59f3f61 39 39 link.next = 0p; 40 40 link.prev = 0p; 41 link.preferred = -1;42 41 #if defined( __CFA_WITH_VERIFY__ ) 43 42 canary = 0x0D15EA5E0D15EA5Ep; -
libcfa/src/math.hfa
r6a8208cb r59f3f61 5 5 // file "LICENCE" distributed with Cforall. 6 6 // 7 // math --7 // math.hfa -- 8 8 // 9 9 // Author : Peter A. Buhr 10 10 // Created On : Mon Apr 18 23:37:04 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Mon Apr 12 18:35:39202113 // Update Count : 13 112 // Last Modified On : Thu Apr 15 11:47:56 2021 13 // Update Count : 132 14 14 // 15 15 … … 104 104 int log2( unsigned int n ) { return n == 0 ? -1 : sizeof(n) * __CHAR_BIT__ - 1 - __builtin_clz( n ); } 105 105 long int log2( unsigned long int n ) { return n == 0 ? -1 : sizeof(n) * __CHAR_BIT__ - 1 - __builtin_clzl( n ); } 106 long long int log2( unsigned long long int n ) { return n == 0 ? -1 : sizeof(n) * __CHAR_BIT__ - 1 - __builtin_clzl ( n ); }106 long long int log2( unsigned long long int n ) { return n == 0 ? -1 : sizeof(n) * __CHAR_BIT__ - 1 - __builtin_clzll( n ); } 107 107 float log2( float x ) { return log2f( x ); } 108 108 // extern "C" { double log2( double ); } -
libcfa/src/time.hfa
r6a8208cb r59f3f61 10 10 // Created On : Wed Mar 14 23:18:57 2018 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Wed Jun 17 16:13:00 202013 // Update Count : 66 312 // Last Modified On : Wed Apr 14 09:30:30 2021 13 // Update Count : 664 14 14 // 15 15 … … 29 29 static inline { 30 30 Duration ?=?( Duration & dur, __attribute__((unused)) zero_t ) { return dur{ 0 }; } 31 32 void ?{}( Duration & dur, timeval t ) with( dur ) { tn = (int64_t)t.tv_sec * TIMEGRAN + t.tv_usec * 1000; } 33 Duration ?=?( Duration & dur, timeval t ) with( dur ) { 34 tn = (int64_t)t.tv_sec * TIMEGRAN + t.tv_usec * (TIMEGRAN / 1_000_000LL); 35 return dur; 36 } // ?=? 37 38 void ?{}( Duration & dur, timespec t ) with( dur ) { tn = (int64_t)t.tv_sec * TIMEGRAN + t.tv_nsec; } 39 Duration ?=?( Duration & dur, timespec t ) with( dur ) { 40 tn = (int64_t)t.tv_sec * TIMEGRAN + t.tv_nsec; 41 return dur; 42 } // ?=? 31 43 32 44 Duration +?( Duration rhs ) with( rhs ) { return (Duration)@{ +tn }; } -
src/Parser/parser.yy
r6a8208cb r59f3f61 10 10 // Created On : Sat Sep 1 20:22:55 2001 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Thu Apr 1 14:43:24 202113 // Update Count : 49 7812 // Last Modified On : Wed Apr 14 18:13:44 2021 13 // Update Count : 4983 14 14 // 15 15 … … 281 281 %token ATTRIBUTE EXTENSION // GCC 282 282 %token IF ELSE SWITCH CASE DEFAULT DO WHILE FOR BREAK CONTINUE GOTO RETURN 283 %token CHOOSE DISABLE ENABLE FALLTHRU FALLTHROUGH TRY THROW THROWRESUME AT WITH WHEN WAITFOR // CFA 283 %token CHOOSE FALLTHRU FALLTHROUGH WITH WHEN WAITFOR // CFA 284 %token DISABLE ENABLE TRY THROW THROWRESUME AT // CFA 284 285 %token ASM // C99, extension ISO/IEC 9899:1999 Section J.5.10(1) 285 286 %token ALIGNAS ALIGNOF GENERIC STATICASSERT // C11 -
tests/.expect/math.nast.x86.txt
r6a8208cb r59f3f61 19 19 log2:10 17 23 20 20 log2:10 17 23 21 log2: 42 49 5521 log2:10 17 23 22 22 log2:3. 3. 3. 23 23 log10:2. 2. 2. -
tests/concurrent/futures/multi.cfa
r6a8208cb r59f3f61 5 5 6 6 thread Server { 7 int cnt, iteration;7 int pending, done, iteration; 8 8 multi_future(int) * request; 9 9 }; 10 10 11 11 void ?{}( Server & this ) { 12 this.cnt = 0; 12 ((thread&)this){"Server Thread"}; 13 this.pending = 0; 14 this.done = 0; 13 15 this.iteration = 0; 14 16 this.request = 0p; … … 16 18 17 19 void ^?{}( Server & mutex this ) { 18 assert(this. cnt== 0);19 20 assert(this.pending == 0); 21 this.request = 0p; 20 22 } 21 23 … … 24 26 } 25 27 26 void process( Server & mutex this ) { 27 fulfil( *this.request, this.iteration ); 28 this.iteration++; 28 void call( Server & mutex this ) { 29 this.pending++; 29 30 } 30 31 31 void call( Server & mutex this ) {32 this. cnt++;32 void finish( Server & mutex this ) { 33 this.done++; 33 34 } 34 35 35 void finish( Server & mutex this ) { }36 37 36 void main( Server & this ) { 37 MAIN_LOOP: 38 38 for() { 39 39 waitfor( ^?{} : this ) { 40 40 break; 41 41 } 42 or when( this.cnt < NFUTURES ) waitfor( call: this ) { 43 if (this.cnt == NFUTURES) { 44 process(this); 42 or waitfor( call: this ) { 43 if (this.pending != NFUTURES) { continue MAIN_LOOP; } 44 45 this.pending = 0; 46 fulfil( *this.request, this.iteration ); 47 this.iteration++; 48 49 for(NFUTURES) { 50 waitfor( finish: this ); 45 51 } 46 } 47 or waitfor( finish: this ) { 48 if (this.cnt == NFUTURES) { 49 reset( *this.request ); 50 this.cnt = 0; 51 } 52 53 reset( *this.request ); 54 this.done = 0; 52 55 } 53 56 } … … 57 60 Server * the_server; 58 61 thread Worker {}; 62 void ?{}(Worker & this) { 63 ((thread&)this){"Worker Thread"}; 64 } 65 59 66 multi_future(int) * shared_future; 60 67 -
tests/concurrent/spinaphore.cfa
r6a8208cb r59f3f61 49 49 void main(Unblocker & this) { 50 50 this.sum = 0; 51 unsigned me = (unsigned) &this;51 unsigned me = (unsigned)(uintptr_t)&this; 52 52 for(num_unblocks) { 53 53 $thread * t = V(sem, false); 54 54 Blocker * b = from_thread(t); 55 55 b->sum += me; 56 this.sum += (unsigned) b;56 this.sum += (unsigned)(uintptr_t)b; 57 57 unpark(t); 58 58 yield(random(10)); … … 73 73 for(i;num_blockers) { 74 74 for(num_blocks) 75 usum += (unsigned) &blockers[i];75 usum += (unsigned)(uintptr_t)&blockers[i]; 76 76 } 77 77 78 78 for(i;num_unblockers) { 79 79 for(num_unblocks) 80 bsum += (unsigned) &unblockers[i];80 bsum += (unsigned)(uintptr_t)&unblockers[i]; 81 81 } 82 82 -
tests/time.cfa
r6a8208cb r59f3f61 10 10 // Created On : Tue Mar 27 17:24:56 2018 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Thu Jun 18 18:14:49 202013 // Update Count : 3 712 // Last Modified On : Fri Apr 16 14:59:53 2021 13 // Update Count : 38 14 14 // 15 15 … … 53 53 // | "Newfoundland" | getTime( Newfoundland ) 54 54 // | "local" | getTime() 55 // | "local nsec" | getTimeNsec()55 // | "local nsec" | timeHiRes() 56 56 // | "PST" | PST(); // getTime short form 57 57 // sout | nl; -
tools/gdb/utils-gdb.py
r6a8208cb r59f3f61 23 23 gdb.execute('handle SIGUSR1 nostop noprint pass') 24 24 25 CfaTypes = collections.namedtuple('CfaTypes', 'cluster_ptr processor_ptr thread_ptr int_ptr thread_state ')25 CfaTypes = collections.namedtuple('CfaTypes', 'cluster_ptr processor_ptr thread_ptr int_ptr thread_state yield_state') 26 26 27 27 class ThreadInfo: … … 52 52 # GDB types for various structures/types in CFA 53 53 return CfaTypes(cluster_ptr = gdb.lookup_type('struct cluster').pointer(), 54 processor_ptr = gdb.lookup_type('struct processor').pointer(), 55 thread_ptr = gdb.lookup_type('struct $thread').pointer(), 56 int_ptr = gdb.lookup_type('int').pointer(), 57 thread_state = gdb.lookup_type('enum __Coroutine_State')) 54 processor_ptr = gdb.lookup_type('struct processor').pointer(), 55 thread_ptr = gdb.lookup_type('struct $thread').pointer(), 56 int_ptr = gdb.lookup_type('int').pointer(), 57 thread_state = gdb.lookup_type('enum __Coroutine_State'), 58 yield_state = gdb.lookup_type('enum __Preemption_Reason')) 58 59 59 60 def get_addr(addr): … … 371 372 def print_thread(self, thread, tid, marked): 372 373 cfa_t = get_cfa_types() 373 self.print_formatted(marked, tid, thread['self_cor']['name'].string(), str(thread['state'].cast(cfa_t.thread_state)), str(thread)) 374 ys = str(thread['preempted'].cast(cfa_t.yield_state)) 375 if ys == '_X15__NO_PREEMPTIONKM19__Preemption_Reason_1': 376 state = str(thread['state'].cast(cfa_t.thread_state)) 377 elif ys == '_X18__ALARM_PREEMPTIONKM19__Preemption_Reason_1': 378 state = 'preempted' 379 elif ys == '_X19__MANUAL_PREEMPTIONKM19__Preemption_Reason_1': 380 state = 'yield' 381 elif ys == '_X17__POLL_PREEMPTIONKM19__Preemption_Reason_1': 382 state = 'poll' 383 else: 384 print("error: thread {} in undefined preemption state {}".format(thread, ys)) 385 state = 'error' 386 self.print_formatted(marked, tid, thread['self_cor']['name'].string(), state, str(thread)) 374 387 375 388 def print_threads_by_cluster(self, cluster, print_system = False): … … 480 493 context = thread['context'] 481 494 495 496 497 # must be at frame 0 to set pc register 498 gdb.execute('select-frame 0') 499 if gdb.selected_frame().architecture().name() != 'i386:x86-64': 500 print('gdb debugging only supported for i386:x86-64 for now') 501 return 502 503 # gdb seems to handle things much better if we pretend we just entered the context switch 504 # pretend the pc is __cfactx_switch and adjust the sp, base pointer doesn't need to change 482 505 # lookup for sp,fp and uSwitch 483 xsp = context['SP'] + 4 8506 xsp = context['SP'] + 40 # 40 = 5 64bit registers : %r15, %r14, %r13, %r12, %rbx WARNING: x64 specific 484 507 xfp = context['FP'] 485 508 486 509 # convert string so we can strip out the address 487 510 try: 488 xpc = get_addr(gdb.parse_and_eval('__cfactx_switch').address + 28)511 xpc = get_addr(gdb.parse_and_eval('__cfactx_switch').address) 489 512 except: 490 513 print("here") 491 514 return 492 493 # must be at frame 0 to set pc register494 gdb.execute('select-frame 0')495 515 496 516 # push sp, fp, pc into a global stack … … 503 523 504 524 # update registers for new task 505 print('switching to ') 525 # print('switching to {} ({}) : [{}, {}, {}]'.format(thread['self_cor']['name'].string(), str(thread), str(xsp), str(xfp), str(xpc))) 526 print('switching to thread {} ({})'.format(str(thread), thread['self_cor']['name'].string())) 506 527 gdb.execute('set $rsp={}'.format(xsp)) 507 528 gdb.execute('set $rbp={}'.format(xfp)) … … 552 573 553 574 argv = parse(arg) 554 print(argv)555 575 if argv[0].isdigit(): 556 576 cname = " ".join(argv[1:]) if len(argv) > 1 else None
Note: See TracChangeset
for help on using the changeset viewer.