Changeset 8d66610 for libcfa/src/concurrency
- Timestamp:
- May 21, 2021, 4:48:10 PM (5 years ago)
- Branches:
- ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast-unique-expr, pthread-emulation, qualifiedEnum, stuck-waitfor-destruct
- Children:
- f1bce515
- Parents:
- 5407cdc (diff), 7404cdc (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)links above to see all the changes relative to each parent. - Location:
- libcfa/src/concurrency
- Files:
-
- 19 edited
-
alarm.cfa (modified) (6 diffs)
-
alarm.hfa (modified) (1 diff)
-
clib/cfathread.cfa (modified) (2 diffs)
-
clib/cfathread.h (modified) (1 diff)
-
invoke.h (modified) (3 diffs)
-
io.cfa (modified) (3 diffs)
-
kernel.cfa (modified) (16 diffs)
-
kernel.hfa (modified) (10 diffs)
-
kernel/fwd.hfa (modified) (2 diffs)
-
kernel/startup.cfa (modified) (13 diffs)
-
kernel_private.hfa (modified) (10 diffs)
-
locks.cfa (modified) (5 diffs)
-
locks.hfa (modified) (2 diffs)
-
preemption.cfa (modified) (15 diffs)
-
ready_queue.cfa (modified) (26 diffs)
-
ready_subqueue.hfa (modified) (3 diffs)
-
stats.cfa (modified) (6 diffs)
-
stats.hfa (modified) (3 diffs)
-
thread.cfa (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/concurrency/alarm.cfa
r5407cdc r8d66610 38 38 39 39 void __kernel_set_timer( Duration alarm ) { 40 verifyf(alarm >= 1`us || alarm == 0, "Setting timer to < 1us (%jins)", alarm`ns); 41 setitimer( ITIMER_REAL, &(itimerval){ alarm }, 0p ); 40 alarm = max(alarm, 1`us); 41 itimerval otv @= { 0 }; 42 getitimer( ITIMER_REAL, &otv ); 43 Duration od = { otv.it_value }; 44 if(od == 0 || od > alarm) { 45 setitimer( ITIMER_REAL, &(itimerval){ alarm }, 0p ); 46 } 42 47 } 43 48 … … 46 51 //============================================================================================= 47 52 48 void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period) with( this ) { 53 void ?{}( alarm_node_t & this, $thread * thrd, Duration alarm, Duration period) with( this ) { 54 this.initial = alarm; 55 this.period = period; 49 56 this.thrd = thrd; 50 this.alarm = alarm; 51 this.period = period; 57 this.timeval = __kernel_get_time() + alarm; 52 58 set = false; 53 59 type = User; 54 60 } 55 61 56 void ?{}( alarm_node_t & this, processor * proc, Time alarm, Duration period ) with( this ) { 62 void ?{}( alarm_node_t & this, processor * proc, Duration alarm, Duration period ) with( this ) { 63 this.initial = alarm; 64 this.period = period; 57 65 this.proc = proc; 58 this.alarm = alarm; 59 this.period = period; 66 this.timeval = __kernel_get_time() + alarm; 60 67 set = false; 61 68 type = Kernel; 62 69 } 63 void ?{}( alarm_node_t & this, Alarm_Callback callback, Time alarm, Duration period ) with( this ) { 64 this.alarm = alarm; 65 this.period = period; 70 void ?{}( alarm_node_t & this, Alarm_Callback callback, Duration alarm, Duration period ) with( this ) { 66 71 this.callback = callback; 72 this.initial = alarm; 73 this.period = period; 74 this.timeval = __kernel_get_time() + alarm; 67 75 set = false; 68 76 type = Callback; … … 77 85 void insert( alarm_list_t * this, alarm_node_t * n ) { 78 86 alarm_node_t * it = & (*this)`first; 79 while( it && (n-> alarm > it->alarm) ) {87 while( it && (n->timeval > it->timeval) ) { 80 88 it = & (*it)`next; 81 89 } … … 105 113 lock( event_kernel->lock __cfaabi_dbg_ctx2 ); 106 114 { 107 verify( validate( alarms ) ); 108 bool first = ! & alarms`first; 115 /* paranoid */ verify( validate( alarms ) ); 109 116 110 __cfadbg_print_safe( preemption, " KERNEL: alarm inserting %p (%lu).\n", this, this->alarm.tn ); 117 Time curr = __kernel_get_time(); 118 __cfadbg_print_safe( preemption, " KERNEL: alarm inserting %p (%lu -> %lu).\n", this, curr.tn, this->timeval.tn ); 111 119 insert( &alarms, this ); 112 if( first ) { 113 __kernel_set_timer( alarms`first.alarm - __kernel_get_time() ); 114 } 120 __kernel_set_timer( this->timeval - curr); 121 this->set = true; 115 122 } 116 123 unlock( event_kernel->lock ); 117 this->set = true;118 124 enable_interrupts(); 119 125 } … … 124 130 { 125 131 verify( validate( event_kernel->alarms ) ); 126 remove( *this ); 132 if (this->set) remove( *this ); 133 this->set = false; 127 134 } 128 135 unlock( event_kernel->lock ); 129 136 enable_interrupts(); 130 this->set = false;131 137 } 132 138 … … 136 142 137 143 void sleep( Duration duration ) { 138 alarm_node_t node = { active_thread(), __kernel_get_time() +duration, 0`s };144 alarm_node_t node = { active_thread(), duration, 0`s }; 139 145 140 146 register_self( &node ); -
libcfa/src/concurrency/alarm.hfa
r5407cdc r8d66610 46 46 47 47 struct alarm_node_t { 48 Time alarm;// time when alarm goes off49 Duration period; // if > 0 => period of alarm48 Duration initial; // time when alarm goes off 49 Duration period; // if > 0 => period of alarm 50 50 51 DLISTED_MGD_IMPL_IN(alarm_node_t)51 inline dlink(alarm_node_t); 52 52 53 53 union { 54 $thread * thrd; // thrd who created event55 processor * proc; // proc who created event56 Alarm_Callback callback; // callback to handle event54 $thread * thrd; // thrd who created event 55 processor * proc; // proc who created event 56 Alarm_Callback callback; // callback to handle event 57 57 }; 58 58 59 bool set :1; // whether or not the alarm has be registered 60 enum alarm_type type; // true if this is not a user defined alarm 59 Time timeval; // actual time at which the alarm goes off 60 enum alarm_type type; // true if this is not a user defined alarm 61 bool set :1; // whether or not the alarm has be registered 61 62 }; 62 DLISTED_MGD_IMPL_OUT(alarm_node_t)63 P9_EMBEDDED( alarm_node_t, dlink(alarm_node_t) ) 63 64 64 void ?{}( alarm_node_t & this, $thread * thrd, Timealarm, Duration period );65 void ?{}( alarm_node_t & this, processor * proc, Timealarm, Duration period );66 void ?{}( alarm_node_t & this, Alarm_Callback callback, Timealarm, Duration period );65 void ?{}( alarm_node_t & this, $thread * thrd, Duration alarm, Duration period ); 66 void ?{}( alarm_node_t & this, processor * proc, Duration alarm, Duration period ); 67 void ?{}( alarm_node_t & this, Alarm_Callback callback, Duration alarm, Duration period ); 67 68 void ^?{}( alarm_node_t & this ); 68 69 69 typedef dlist(alarm_node_t , alarm_node_t) alarm_list_t;70 typedef dlist(alarm_node_t) alarm_list_t; 70 71 71 72 void insert( alarm_list_t * this, alarm_node_t * n ); -
libcfa/src/concurrency/clib/cfathread.cfa
r5407cdc r8d66610 27 27 extern void __cfactx_invoke_thread(void (*main)(void *), void * this); 28 28 } 29 30 extern Time __kernel_get_time(); 29 31 30 32 //================================================================================ … … 265 267 int cfathread_cond_timedwait(cfathread_cond_t *restrict cond, cfathread_mutex_t *restrict mut, const struct timespec *restrict abstime) __attribute__((nonnull (1,2,3))) { 266 268 Time t = { *abstime }; 267 if( wait( (*cond)->impl, (*mut)->impl, t ) ) { 269 timespec curr; 270 clock_gettime( CLOCK_REALTIME, &curr ); 271 Time c = { curr }; 272 if( wait( (*cond)->impl, (*mut)->impl, t - c ) ) { 268 273 return 0; 269 274 } -
libcfa/src/concurrency/clib/cfathread.h
r5407cdc r8d66610 80 80 81 81 typedef struct cfathread_cond_attr { 82 // WARNING: adding support for pthread_condattr_setclock would require keeping track of the clock 83 // and reading it in cond_timedwait 82 84 } cfathread_condattr_t; 83 85 typedef struct cfathread_condition * cfathread_cond_t; -
libcfa/src/concurrency/invoke.h
r5407cdc r8d66610 146 146 struct __thread_desc_link { 147 147 struct $thread * next; 148 struct $thread * prev;149 148 volatile unsigned long long ts; 150 unsigned preferred;151 149 }; 152 150 … … 155 153 // context that is switch during a __cfactx_switch 156 154 struct __stack_context_t context; 155 156 // Link lists fields 157 // instrusive link field for threads 158 struct __thread_desc_link link; 157 159 158 160 // current execution status for coroutine … … 170 172 struct cluster * curr_cluster; 171 173 172 // Link lists fields 173 // instrusive link field for threads 174 struct __thread_desc_link link; 174 // preferred ready-queue 175 unsigned preferred; 175 176 176 177 // coroutine body used to store context -
libcfa/src/concurrency/io.cfa
r5407cdc r8d66610 138 138 /* paranoid */ verify( proc->io.ctx ); 139 139 140 __attribute__((unused)) cluster * cltr = proc->cltr; 140 141 $io_context & ctx = *proc->io.ctx; 142 143 // for(i; 2) { 144 // unsigned idx = proc->rdq.id + i; 145 // cltr->ready_queue.lanes.tscs[idx].tv = -1ull; 146 // } 141 147 142 148 __ioarbiter_flush( ctx ); … … 151 157 // Update statistics 152 158 __STATS__( false, io.calls.errors.busy ++; ) 159 // for(i; 2) { 160 // unsigned idx = proc->rdq.id + i; 161 // cltr->ready_queue.lanes.tscs[idx].tv = rdtscl(); 162 // } 153 163 return; 154 164 default: … … 172 182 173 183 ctx.proc->io.pending = false; 184 185 ready_schedule_lock(); 186 __cfa_io_drain( proc ); 187 ready_schedule_unlock(); 188 // for(i; 2) { 189 // unsigned idx = proc->rdq.id + i; 190 // cltr->ready_queue.lanes.tscs[idx].tv = rdtscl(); 191 // } 174 192 } 175 193 -
libcfa/src/concurrency/kernel.cfa
r5407cdc r8d66610 163 163 #if !defined(__CFA_NO_STATISTICS__) 164 164 if( this->print_halts ) { 165 __cfaabi_bits_print_safe( STDOUT_FILENO, "Processor : %d - %s (%p)\n", this-> id, this->name, (void*)this);165 __cfaabi_bits_print_safe( STDOUT_FILENO, "Processor : %d - %s (%p)\n", this->unique_id, this->name, (void*)this); 166 166 } 167 167 #endif … … 170 170 // Setup preemption data 171 171 preemption_scope scope = { this }; 172 173 __STATS( unsigned long long last_tally = rdtscl(); )174 172 175 173 // if we need to run some special setup, now is the time to do it. … … 184 182 MAIN_LOOP: 185 183 for() { 184 #define OLD_MAIN 1 185 #if OLD_MAIN 186 186 // Check if there is pending io 187 187 __maybe_io_drain( this ); … … 223 223 #if !defined(__CFA_NO_STATISTICS__) 224 224 if(this->print_halts) { 225 __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this-> id, rdtscl());225 __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->unique_id, rdtscl()); 226 226 } 227 227 #endif … … 236 236 #if !defined(__CFA_NO_STATISTICS__) 237 237 if(this->print_halts) { 238 __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this-> id, rdtscl());238 __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->unique_id, rdtscl()); 239 239 } 240 240 #endif … … 258 258 if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP; 259 259 260 #if !defined(__CFA_NO_STATISTICS__)261 unsigned long long curr = rdtscl();262 if(curr > (last_tally + 500000000)) {263 __tally_stats(this->cltr->stats, __cfaabi_tls.this_stats);264 last_tally = curr;265 }266 #endif267 268 260 if(this->io.pending && !this->io.dirty) { 269 261 __cfa_io_flush( this ); 270 262 } 271 263 272 // SEARCH: { 273 // /* paranoid */ verify( ! __preemption_enabled() ); 274 // /* paranoid */ verify( kernelTLS().this_proc_id ); 275 276 // // First, lock the scheduler since we are searching for a thread 277 278 // // Try to get the next thread 279 // ready_schedule_lock(); 280 // readyThread = pop_fast( this->cltr ); 281 // ready_schedule_unlock(); 282 // if(readyThread) { break SEARCH; } 283 284 // // If we can't find a thread, might as well flush any outstanding I/O 285 // if(this->io.pending) { __cfa_io_flush( this ); } 286 287 // // Spin a little on I/O, just in case 288 // for(25) { 289 // __maybe_io_drain( this ); 290 // ready_schedule_lock(); 291 // readyThread = pop_fast( this->cltr ); 292 // ready_schedule_unlock(); 293 // if(readyThread) { break SEARCH; } 294 // } 295 296 // // no luck, try stealing a few times 297 // for(25) { 298 // if( __maybe_io_drain( this ) ) { 299 // ready_schedule_lock(); 300 // readyThread = pop_fast( this->cltr ); 301 // } else { 302 // ready_schedule_lock(); 303 // readyThread = pop_slow( this->cltr ); 304 // } 305 // ready_schedule_unlock(); 306 // if(readyThread) { break SEARCH; } 307 // } 308 309 // // still no luck, search for a thread 310 // ready_schedule_lock(); 311 // readyThread = pop_search( this->cltr ); 312 // ready_schedule_unlock(); 313 // if(readyThread) { break SEARCH; } 314 315 // // Don't block if we are done 316 // if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP; 317 318 // __STATS( __tls_stats()->ready.sleep.halts++; ) 319 320 // // Push self to idle stack 321 // mark_idle(this->cltr->procs, * this); 322 323 // // Confirm the ready-queue is empty 324 // __maybe_io_drain( this ); 325 // ready_schedule_lock(); 326 // readyThread = pop_search( this->cltr ); 327 // ready_schedule_unlock(); 328 329 // if( readyThread ) { 330 // // A thread was found, cancel the halt 331 // mark_awake(this->cltr->procs, * this); 332 333 // __STATS( __tls_stats()->ready.sleep.cancels++; ) 334 335 // // continue the main loop 336 // break SEARCH; 337 // } 338 339 // __STATS( if(this->print_halts) __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->id, rdtscl()); ) 340 // __cfadbg_print_safe(runtime_core, "Kernel : core %p waiting on eventfd %d\n", this, this->idle); 341 342 // // __disable_interrupts_hard(); 343 // eventfd_t val; 344 // eventfd_read( this->idle, &val ); 345 // // __enable_interrupts_hard(); 346 347 // __STATS( if(this->print_halts) __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->id, rdtscl()); ) 348 349 // // We were woken up, remove self from idle 350 // mark_awake(this->cltr->procs, * this); 351 352 // // DON'T just proceed, start looking again 353 // continue MAIN_LOOP; 354 // } 355 356 // RUN_THREAD: 357 // /* paranoid */ verify( kernelTLS().this_proc_id ); 358 // /* paranoid */ verify( ! __preemption_enabled() ); 359 // /* paranoid */ verify( readyThread ); 360 361 // // Reset io dirty bit 362 // this->io.dirty = false; 363 364 // // We found a thread run it 365 // __run_thread(this, readyThread); 366 367 // // Are we done? 368 // if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP; 369 370 // #if !defined(__CFA_NO_STATISTICS__) 371 // unsigned long long curr = rdtscl(); 372 // if(curr > (last_tally + 500000000)) { 373 // __tally_stats(this->cltr->stats, __cfaabi_tls.this_stats); 374 // last_tally = curr; 375 // } 376 // #endif 377 378 // if(this->io.pending && !this->io.dirty) { 379 // __cfa_io_flush( this ); 380 // } 381 382 // // Check if there is pending io 383 // __maybe_io_drain( this ); 264 #else 265 #warning new kernel loop 266 SEARCH: { 267 /* paranoid */ verify( ! __preemption_enabled() ); 268 269 // First, lock the scheduler since we are searching for a thread 270 ready_schedule_lock(); 271 272 // Try to get the next thread 273 readyThread = pop_fast( this->cltr ); 274 if(readyThread) { ready_schedule_unlock(); break SEARCH; } 275 276 // If we can't find a thread, might as well flush any outstanding I/O 277 if(this->io.pending) { __cfa_io_flush( this ); } 278 279 // Spin a little on I/O, just in case 280 for(5) { 281 __maybe_io_drain( this ); 282 readyThread = pop_fast( this->cltr ); 283 if(readyThread) { ready_schedule_unlock(); break SEARCH; } 284 } 285 286 // no luck, try stealing a few times 287 for(5) { 288 if( __maybe_io_drain( this ) ) { 289 readyThread = pop_fast( this->cltr ); 290 } else { 291 readyThread = pop_slow( this->cltr ); 292 } 293 if(readyThread) { ready_schedule_unlock(); break SEARCH; } 294 } 295 296 // still no luck, search for a thread 297 readyThread = pop_search( this->cltr ); 298 if(readyThread) { ready_schedule_unlock(); break SEARCH; } 299 300 // Don't block if we are done 301 if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP; 302 303 __STATS( __tls_stats()->ready.sleep.halts++; ) 304 305 // Push self to idle stack 306 ready_schedule_unlock(); 307 mark_idle(this->cltr->procs, * this); 308 ready_schedule_lock(); 309 310 // Confirm the ready-queue is empty 311 __maybe_io_drain( this ); 312 readyThread = pop_search( this->cltr ); 313 ready_schedule_unlock(); 314 315 if( readyThread ) { 316 // A thread was found, cancel the halt 317 mark_awake(this->cltr->procs, * this); 318 319 __STATS( __tls_stats()->ready.sleep.cancels++; ) 320 321 // continue the main loop 322 break SEARCH; 323 } 324 325 __STATS( if(this->print_halts) __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->unique_id, rdtscl()); ) 326 __cfadbg_print_safe(runtime_core, "Kernel : core %p waiting on eventfd %d\n", this, this->idle); 327 328 // __disable_interrupts_hard(); 329 eventfd_t val; 330 eventfd_read( this->idle, &val ); 331 // __enable_interrupts_hard(); 332 333 __STATS( if(this->print_halts) __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->unique_id, rdtscl()); ) 334 335 // We were woken up, remove self from idle 336 mark_awake(this->cltr->procs, * this); 337 338 // DON'T just proceed, start looking again 339 continue MAIN_LOOP; 340 } 341 342 RUN_THREAD: 343 /* paranoid */ verify( ! __preemption_enabled() ); 344 /* paranoid */ verify( readyThread ); 345 346 // Reset io dirty bit 347 this->io.dirty = false; 348 349 // We found a thread run it 350 __run_thread(this, readyThread); 351 352 // Are we done? 353 if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP; 354 355 if(this->io.pending && !this->io.dirty) { 356 __cfa_io_flush( this ); 357 } 358 359 ready_schedule_lock(); 360 __maybe_io_drain( this ); 361 ready_schedule_unlock(); 362 #endif 384 363 } 385 364 … … 390 369 391 370 post( this->terminated ); 392 393 371 394 372 if(this == mainProcessor) { … … 553 531 static void __schedule_thread( $thread * thrd ) { 554 532 /* paranoid */ verify( ! __preemption_enabled() ); 555 /* paranoid */ verify( kernelTLS().this_proc_id );556 533 /* paranoid */ verify( ready_schedule_islocked()); 557 534 /* paranoid */ verify( thrd ); … … 567 544 /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd->canary ); 568 545 569 546 const bool local = thrd->state != Start; 570 547 if (thrd->preempted == __NO_PREEMPTION) thrd->state = Ready; 571 548 … … 575 552 576 553 // push the thread to the cluster ready-queue 577 push( cl, thrd );554 push( cl, thrd, local ); 578 555 579 556 // variable thrd is no longer safe to use … … 611 588 static inline $thread * __next_thread(cluster * this) with( *this ) { 612 589 /* paranoid */ verify( ! __preemption_enabled() ); 613 /* paranoid */ verify( kernelTLS().this_proc_id );614 590 615 591 ready_schedule_lock(); … … 617 593 ready_schedule_unlock(); 618 594 619 /* paranoid */ verify( kernelTLS().this_proc_id );620 595 /* paranoid */ verify( ! __preemption_enabled() ); 621 596 return thrd; … … 625 600 static inline $thread * __next_thread_slow(cluster * this) with( *this ) { 626 601 /* paranoid */ verify( ! __preemption_enabled() ); 627 /* paranoid */ verify( kernelTLS().this_proc_id );628 602 629 603 ready_schedule_lock(); … … 638 612 ready_schedule_unlock(); 639 613 640 /* paranoid */ verify( kernelTLS().this_proc_id );641 614 /* paranoid */ verify( ! __preemption_enabled() ); 642 615 return thrd; … … 895 868 unsigned tail = *ctx->cq.tail; 896 869 if(head == tail) return false; 870 #if OLD_MAIN 897 871 ready_schedule_lock(); 898 872 ret = __cfa_io_drain( proc ); 899 873 ready_schedule_unlock(); 874 #else 875 ret = __cfa_io_drain( proc ); 876 #endif 900 877 #endif 901 878 return ret; … … 926 903 } 927 904 905 static void crawl_list( cluster * cltr, dlist(processor) & list, unsigned count ) { 906 /* paranoid */ verify( cltr->stats ); 907 908 processor * it = &list`first; 909 for(unsigned i = 0; i < count; i++) { 910 /* paranoid */ verifyf( it, "Unexpected null iterator, at index %u of %u\n", i, count); 911 /* paranoid */ verify( it->local_data->this_stats ); 912 __tally_stats( cltr->stats, it->local_data->this_stats ); 913 it = &(*it)`next; 914 } 915 } 916 917 void crawl_cluster_stats( cluster & this ) { 918 // Stop the world, otherwise stats could get really messed-up 919 // this doesn't solve all problems but does solve many 920 // so it's probably good enough 921 uint_fast32_t last_size = ready_mutate_lock(); 922 923 crawl_list(&this, this.procs.actives, this.procs.total - this.procs.idle); 924 crawl_list(&this, this.procs.idles , this.procs.idle ); 925 926 // Unlock the RWlock 927 ready_mutate_unlock( last_size ); 928 } 929 930 928 931 void print_stats_now( cluster & this, int flags ) { 932 crawl_cluster_stats( this ); 929 933 __print_stats( this.stats, this.print_stats, "Cluster", this.name, (void*)&this ); 930 934 } -
libcfa/src/concurrency/kernel.hfa
r5407cdc r8d66610 49 49 50 50 // Processor id, required for scheduling threads 51 struct __processor_id_t { 52 unsigned id:24; 53 54 #if !defined(__CFA_NO_STATISTICS__) 55 struct __stats_t * stats; 56 #endif 57 }; 51 58 52 59 53 coroutine processorCtx_t { … … 63 57 // Wrapper around kernel threads 64 58 struct __attribute__((aligned(128))) processor { 65 // Main state66 inline __processor_id_t;67 68 59 // Cluster from which to get threads 69 60 struct cluster * cltr; … … 90 81 pthread_t kernel_thread; 91 82 83 // Unique id for the processor (not per cluster) 84 unsigned unique_id; 85 92 86 struct { 93 87 $io_context * ctx; … … 113 107 114 108 // Link lists fields 115 DLISTED_MGD_IMPL_IN(processor)109 inline dlink(processor); 116 110 117 111 // special init fields … … 123 117 } init; 124 118 119 struct KernelThreadData * local_data; 120 125 121 #if !defined(__CFA_NO_STATISTICS__) 126 122 int print_stats; … … 133 129 #endif 134 130 }; 131 P9_EMBEDDED( processor, dlink(processor) ) 135 132 136 133 void ?{}(processor & this, const char name[], struct cluster & cltr); … … 140 137 static inline void ?{}(processor & this, struct cluster & cltr) { this{ "Anonymous Processor", cltr}; } 141 138 static inline void ?{}(processor & this, const char name[]) { this{name, *mainCluster}; } 142 143 DLISTED_MGD_IMPL_OUT(processor)144 139 145 140 //----------------------------------------------------------------------------- … … 152 147 153 148 // Aligned timestamps which are used by the relaxed ready queue 154 struct __attribute__((aligned(128))) __timestamp_t; 155 void ?{}(__timestamp_t & this); 156 void ^?{}(__timestamp_t & this); 149 struct __attribute__((aligned(128))) __timestamp_t { 150 volatile unsigned long long tv; 151 }; 152 153 static inline void ?{}(__timestamp_t & this) { this.tv = 0; } 154 static inline void ^?{}(__timestamp_t & this) {} 157 155 158 156 //TODO adjust cache size to ARCHITECTURE … … 177 175 void ?{}(__ready_queue_t & this); 178 176 void ^?{}(__ready_queue_t & this); 177 #if !defined(__CFA_NO_STATISTICS__) 178 unsigned cnt(const __ready_queue_t & this, unsigned idx); 179 #endif 179 180 180 181 // Idle Sleep … … 190 191 191 192 // List of idle processors 192 dlist(processor , processor) idles;193 dlist(processor) idles; 193 194 194 195 // List of active processors 195 dlist(processor , processor) actives;196 dlist(processor) actives; 196 197 }; 197 198 -
libcfa/src/concurrency/kernel/fwd.hfa
r5407cdc r8d66610 38 38 struct $thread * volatile this_thread; 39 39 struct processor * volatile this_processor; 40 struct __processor_id_t * volatile this_proc_id; 41 struct __stats_t * volatile this_stats; 40 volatile bool sched_lock; 42 41 43 42 struct { … … 56 55 uint64_t bck_seed; 57 56 } ready_rng; 57 58 struct __stats_t * volatile this_stats; 59 60 61 #ifdef __CFA_WITH_VERIFY__ 62 // Debug, check if the rwlock is owned for reading 63 bool in_sched_lock; 64 unsigned sched_id; 65 #endif 58 66 } __cfaabi_tls __attribute__ ((tls_model ( "initial-exec" ))); 59 67 -
libcfa/src/concurrency/kernel/startup.cfa
r5407cdc r8d66610 77 77 static void doregister( struct cluster & cltr ); 78 78 static void unregister( struct cluster & cltr ); 79 static void register_tls( processor * this ); 80 static void unregister_tls( processor * this ); 79 81 static void ?{}( $coroutine & this, current_stack_info_t * info); 80 82 static void ?{}( $thread & this, current_stack_info_t * info); … … 123 125 NULL, // cannot use 0p 124 126 NULL, 127 false, 128 { 1, false, false }, 129 0, 130 { 0, 0 }, 125 131 NULL, 126 NULL, 127 { 1, false, false }, 132 #ifdef __CFA_WITH_VERIFY__ 133 false, 134 0, 135 #endif 128 136 }; 129 137 … … 210 218 (*mainProcessor){}; 211 219 220 register_tls( mainProcessor ); 221 212 222 //initialize the global state variables 213 223 __cfaabi_tls.this_processor = mainProcessor; 214 __cfaabi_tls.this_proc_id = (__processor_id_t*)mainProcessor;215 224 __cfaabi_tls.this_thread = mainThread; 216 225 … … 219 228 __init_stats( __cfaabi_tls.this_stats ); 220 229 #endif 230 mainProcessor->local_data = &__cfaabi_tls; 221 231 222 232 // Enable preemption … … 273 283 #endif 274 284 285 mainProcessor->local_data = 0p; 286 287 unregister_tls( mainProcessor ); 288 275 289 // Destroy the main processor and its context in reverse order of construction 276 290 // These were manually constructed so we need manually destroy them … … 316 330 processor * proc = (processor *) arg; 317 331 __cfaabi_tls.this_processor = proc; 318 __cfaabi_tls.this_proc_id = (__processor_id_t*)proc;319 332 __cfaabi_tls.this_thread = 0p; 320 333 __cfaabi_tls.preemption_state.[enabled, disable_count] = [false, 1]; 334 proc->local_data = &__cfaabi_tls; 335 336 register_tls( proc ); 337 321 338 // SKULLDUGGERY: We want to create a context for the processor coroutine 322 339 // which is needed for the 2-step context switch. However, there is no reason … … 355 372 #endif 356 373 #endif 374 375 proc->local_data = 0p; 376 377 unregister_tls( proc ); 357 378 358 379 return 0p; … … 446 467 self_mon_p = &self_mon; 447 468 link.next = 0p; 448 link. prev = 0p;449 link.preferred = -1u;469 link.ts = 0; 470 preferred = -1u; 450 471 last_proc = 0p; 451 472 #if defined( __CFA_WITH_VERIFY__ ) … … 475 496 this.rdq.id = -1u; 476 497 this.rdq.target = -1u; 477 this.rdq.cutoff = -1ull;498 this.rdq.cutoff = 0ull; 478 499 do_terminate = false; 479 500 preemption_alarm = 0p; … … 485 506 486 507 this.init.thrd = initT; 508 509 this.local_data = 0p; 487 510 488 511 this.idle = eventfd(0, 0); … … 496 519 #endif 497 520 498 // Register and Lock the RWlock so no-one pushes/pops while we are changing the queue499 uint_fast32_t last_size = ready_mutate_register((__processor_id_t*)&this);500 this.cltr->procs.total += 1u;501 insert_last(this.cltr->procs.actives, this);502 503 // Adjust the ready queue size504 ready_queue_grow( cltr );505 506 // Unlock the RWlock507 ready_mutate_unlock( last_size );508 509 521 __cfadbg_print_safe(runtime_core, "Kernel : core %p created\n", &this); 510 522 } … … 512 524 // Not a ctor, it just preps the destruction but should not destroy members 513 525 static void deinit(processor & this) { 514 // Lock the RWlock so no-one pushes/pops while we are changing the queue515 uint_fast32_t last_size = ready_mutate_lock();516 this.cltr->procs.total -= 1u;517 remove(this);518 519 // Adjust the ready queue size520 ready_queue_shrink( this.cltr );521 522 // Unlock the RWlock and unregister: we don't need the read_lock any more523 ready_mutate_unregister((__processor_id_t*)&this, last_size );524 525 526 close(this.idle); 526 527 } … … 656 657 cltr->nthreads -= 1; 657 658 unlock(cltr->thread_list_lock); 659 } 660 661 static void register_tls( processor * this ) { 662 // Register and Lock the RWlock so no-one pushes/pops while we are changing the queue 663 uint_fast32_t last_size; 664 [this->unique_id, last_size] = ready_mutate_register(); 665 666 this->cltr->procs.total += 1u; 667 insert_last(this->cltr->procs.actives, *this); 668 669 // Adjust the ready queue size 670 ready_queue_grow( this->cltr ); 671 672 // Unlock the RWlock 673 ready_mutate_unlock( last_size ); 674 } 675 676 677 static void unregister_tls( processor * this ) { 678 // Lock the RWlock so no-one pushes/pops while we are changing the queue 679 uint_fast32_t last_size = ready_mutate_lock(); 680 this->cltr->procs.total -= 1u; 681 remove(*this); 682 683 // clear the cluster so nothing gets pushed to local queues 684 cluster * cltr = this->cltr; 685 this->cltr = 0p; 686 687 // Adjust the ready queue size 688 ready_queue_shrink( cltr ); 689 690 // Unlock the RWlock and unregister: we don't need the read_lock any more 691 ready_mutate_unregister( this->unique_id, last_size ); 658 692 } 659 693 -
libcfa/src/concurrency/kernel_private.hfa
r5407cdc r8d66610 25 25 // Scheduler 26 26 27 struct __attribute__((aligned(128))) __scheduler_lock_id_t;28 27 29 28 extern "C" { … … 80 79 // Lock-Free registering/unregistering of threads 81 80 // Register a processor to a given cluster and get its unique id in return 82 void register_proc_id( struct __processor_id_t *);81 unsigned register_proc_id( void ); 83 82 84 83 // Unregister a processor from a given cluster using its id, getting back the original pointer 85 void unregister_proc_id( struct __processor_id_t * proc);84 void unregister_proc_id( unsigned ); 86 85 87 86 //======================================================================= … … 112 111 } 113 112 114 // Cells use by the reader writer lock 115 // while not generic it only relies on a opaque pointer 116 struct __attribute__((aligned(128))) __scheduler_lock_id_t { 117 // Spin lock used as the underlying lock 118 volatile bool lock; 119 120 // Handle pointing to the proc owning this cell 121 // Used for allocating cells and debugging 122 __processor_id_t * volatile handle; 123 124 #ifdef __CFA_WITH_VERIFY__ 125 // Debug, check if this is owned for reading 126 bool owned; 127 #endif 128 }; 129 130 static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t)); 113 114 115 131 116 132 117 //----------------------------------------------------------------------- … … 147 132 148 133 // writer lock 149 volatile bool lock;134 volatile bool write_lock; 150 135 151 136 // data pointer 152 __scheduler_lock_id_t* data;137 volatile bool * volatile * data; 153 138 }; 154 139 … … 163 148 static inline void ready_schedule_lock(void) with(*__scheduler_lock) { 164 149 /* paranoid */ verify( ! __preemption_enabled() ); 165 /* paranoid */ verify( kernelTLS().this_proc_id ); 166 167 unsigned iproc = kernelTLS().this_proc_id->id; 168 /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id); 169 /*paranoid*/ verify(iproc < ready); 150 /* paranoid */ verify( ! kernelTLS().in_sched_lock ); 151 /* paranoid */ verify( data[kernelTLS().sched_id] == &kernelTLS().sched_lock ); 152 /* paranoid */ verify( !kernelTLS().this_processor || kernelTLS().this_processor->unique_id == kernelTLS().sched_id ); 170 153 171 154 // Step 1 : make sure no writer are in the middle of the critical section 172 while(__atomic_load_n(& lock, (int)__ATOMIC_RELAXED))155 while(__atomic_load_n(&write_lock, (int)__ATOMIC_RELAXED)) 173 156 Pause(); 174 157 … … 179 162 180 163 // Step 2 : acquire our local lock 181 __atomic_acquire( & data[iproc].lock );182 /*paranoid*/ verify( data[iproc].lock);164 __atomic_acquire( &kernelTLS().sched_lock ); 165 /*paranoid*/ verify(kernelTLS().sched_lock); 183 166 184 167 #ifdef __CFA_WITH_VERIFY__ 185 168 // Debug, check if this is owned for reading 186 data[iproc].owned= true;169 kernelTLS().in_sched_lock = true; 187 170 #endif 188 171 } … … 190 173 static inline void ready_schedule_unlock(void) with(*__scheduler_lock) { 191 174 /* paranoid */ verify( ! __preemption_enabled() ); 192 /* paranoid */ verify( kernelTLS().this_proc_id ); 193 194 unsigned iproc = kernelTLS().this_proc_id->id; 195 /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id); 196 /*paranoid*/ verify(iproc < ready); 197 /*paranoid*/ verify(data[iproc].lock); 198 /*paranoid*/ verify(data[iproc].owned); 175 /* paranoid */ verify( data[kernelTLS().sched_id] == &kernelTLS().sched_lock ); 176 /* paranoid */ verify( !kernelTLS().this_processor || kernelTLS().this_processor->unique_id == kernelTLS().sched_id ); 177 /* paranoid */ verify( kernelTLS().sched_lock ); 178 /* paranoid */ verify( kernelTLS().in_sched_lock ); 199 179 #ifdef __CFA_WITH_VERIFY__ 200 180 // Debug, check if this is owned for reading 201 data[iproc].owned= false;181 kernelTLS().in_sched_lock = false; 202 182 #endif 203 __atomic_unlock(& data[iproc].lock);183 __atomic_unlock(&kernelTLS().sched_lock); 204 184 } 205 185 … … 207 187 static inline bool ready_schedule_islocked(void) { 208 188 /* paranoid */ verify( ! __preemption_enabled() ); 209 /*paranoid*/ verify( kernelTLS().this_proc_id ); 210 __processor_id_t * proc = kernelTLS().this_proc_id; 211 return __scheduler_lock->data[proc->id].owned; 189 /* paranoid */ verify( (!kernelTLS().in_sched_lock) || kernelTLS().sched_lock ); 190 return kernelTLS().sched_lock; 212 191 } 213 192 214 193 static inline bool ready_mutate_islocked() { 215 return __scheduler_lock-> lock;194 return __scheduler_lock->write_lock; 216 195 } 217 196 #endif … … 228 207 // Register a processor to a given cluster and get its unique id in return 229 208 // For convenience, also acquires the lock 230 static inline uint_fast32_t ready_mutate_register( struct __processor_id_t * proc ) { 231 register_proc_id( proc ); 232 return ready_mutate_lock(); 209 static inline [unsigned, uint_fast32_t] ready_mutate_register() { 210 unsigned id = register_proc_id(); 211 uint_fast32_t last = ready_mutate_lock(); 212 return [id, last]; 233 213 } 234 214 235 215 // Unregister a processor from a given cluster using its id, getting back the original pointer 236 216 // assumes the lock is acquired 237 static inline void ready_mutate_unregister( struct __processor_id_t * proc, uint_fast32_t last_s ) {217 static inline void ready_mutate_unregister( unsigned id, uint_fast32_t last_s ) { 238 218 ready_mutate_unlock( last_s ); 239 unregister_proc_id( proc);219 unregister_proc_id( id ); 240 220 } 241 221 … … 281 261 // push thread onto a ready queue for a cluster 282 262 // returns true if the list was previously empty, false otherwise 283 __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd );263 __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd, bool local); 284 264 285 265 //----------------------------------------------------------------------- -
libcfa/src/concurrency/locks.cfa
r5407cdc r8d66610 188 188 alarm_node_t alarm_node; 189 189 condition_variable(L) * cond; 190 info_thread(L) * i ;190 info_thread(L) * info_thd; 191 191 }; 192 192 193 void ?{}( alarm_node_wrap(L) & this, Timealarm, Duration period, Alarm_Callback callback, condition_variable(L) * c, info_thread(L) * i ) {193 void ?{}( alarm_node_wrap(L) & this, Duration alarm, Duration period, Alarm_Callback callback, condition_variable(L) * c, info_thread(L) * i ) { 194 194 this.alarm_node{ callback, alarm, period }; 195 195 this.cond = c; 196 this.i = i;196 this.info_thd = i; 197 197 } 198 198 … … 206 206 // may still be called after a thread has been removed from the queue but 207 207 // before the alarm is unregistered 208 if ( listed(i ) ) { // is thread on queue209 i ->signalled = false;208 if ( listed(info_thd) ) { // is thread on queue 209 info_thd->signalled = false; 210 210 // remove this thread O(1) 211 remove( cond->blocked_threads, *i );211 remove( cond->blocked_threads, *info_thd ); 212 212 cond->count--; 213 if( i ->lock ) {213 if( info_thd->lock ) { 214 214 // call lock's on_notify if a lock was passed 215 on_notify(*i ->lock, i->t);215 on_notify(*info_thd->lock, info_thd->t); 216 216 } else { 217 217 // otherwise wake thread 218 unpark( i ->t );218 unpark( info_thd->t ); 219 219 } 220 220 } … … 313 313 314 314 // helper for wait()'s' with a timeout 315 void queue_info_thread_timeout( condition_variable(L) & this, info_thread(L) & info, Time t) with(this) {315 void queue_info_thread_timeout( condition_variable(L) & this, info_thread(L) & info, Duration t, Alarm_Callback callback ) with(this) { 316 316 lock( lock __cfaabi_dbg_ctx2 ); 317 317 size_t recursion_count = queue_and_get_recursion(this, &info); 318 alarm_node_wrap(L) node_wrap = { t, 0`s, alarm_node_wrap_cast, &this, &info };318 alarm_node_wrap(L) node_wrap = { t, 0`s, callback, &this, &info }; 319 319 register_self( &node_wrap.alarm_node ); 320 320 unlock( lock ); … … 332 332 #define WAIT_TIME( u, l, t ) \ 333 333 info_thread( L ) i = { active_thread(), u, l }; \ 334 queue_info_thread_timeout(this, i, t ); \334 queue_info_thread_timeout(this, i, t, alarm_node_wrap_cast ); \ 335 335 return i.signalled; 336 336 … … 340 340 void wait( condition_variable(L) & this, L & l, uintptr_t info ) with(this) { WAIT( info, &l ) } 341 341 342 bool wait( condition_variable(L) & this, Duration duration ) with(this) { WAIT_TIME( 0 , 0p , __kernel_get_time() + duration ) } 343 bool wait( condition_variable(L) & this, uintptr_t info, Duration duration ) with(this) { WAIT_TIME( info, 0p , __kernel_get_time() + duration ) } 344 bool wait( condition_variable(L) & this, Time time ) with(this) { WAIT_TIME( 0 , 0p , time ) } 345 bool wait( condition_variable(L) & this, uintptr_t info, Time time ) with(this) { WAIT_TIME( info, 0p , time ) } 346 bool wait( condition_variable(L) & this, L & l, Duration duration ) with(this) { WAIT_TIME( 0 , &l , __kernel_get_time() + duration ) } 347 bool wait( condition_variable(L) & this, L & l, uintptr_t info, Duration duration ) with(this) { WAIT_TIME( info, &l , __kernel_get_time() + duration ) } 348 bool wait( condition_variable(L) & this, L & l, Time time ) with(this) { WAIT_TIME( 0 , &l , time ) } 349 bool wait( condition_variable(L) & this, L & l, uintptr_t info, Time time ) with(this) { WAIT_TIME( info, &l , time ) } 342 bool wait( condition_variable(L) & this, Duration duration ) with(this) { WAIT_TIME( 0 , 0p , duration ) } 343 bool wait( condition_variable(L) & this, uintptr_t info, Duration duration ) with(this) { WAIT_TIME( info, 0p , duration ) } 344 bool wait( condition_variable(L) & this, L & l, Duration duration ) with(this) { WAIT_TIME( 0 , &l , duration ) } 345 bool wait( condition_variable(L) & this, L & l, uintptr_t info, Duration duration ) with(this) { WAIT_TIME( info, &l , duration ) } 350 346 } 351 347 -
libcfa/src/concurrency/locks.hfa
r5407cdc r8d66610 290 290 bool wait( condition_variable(L) & this, Duration duration ); 291 291 bool wait( condition_variable(L) & this, uintptr_t info, Duration duration ); 292 bool wait( condition_variable(L) & this, Time time );293 bool wait( condition_variable(L) & this, uintptr_t info, Time time );294 292 295 293 void wait( condition_variable(L) & this, L & l ); … … 297 295 bool wait( condition_variable(L) & this, L & l, Duration duration ); 298 296 bool wait( condition_variable(L) & this, L & l, uintptr_t info, Duration duration ); 299 bool wait( condition_variable(L) & this, L & l, Time time ); 300 bool wait( condition_variable(L) & this, L & l, uintptr_t info, Time time ); 301 } 297 } -
libcfa/src/concurrency/preemption.cfa
r5407cdc r8d66610 18 18 19 19 #include "preemption.hfa" 20 20 21 #include <assert.h> 21 22 … … 26 27 #include <limits.h> // PTHREAD_STACK_MIN 27 28 29 #include "bits/debug.hfa" 28 30 #include "bits/signal.hfa" 29 31 #include "kernel_private.hfa" … … 105 107 static inline alarm_node_t * get_expired( alarm_list_t * alarms, Time currtime ) { 106 108 if( ! & (*alarms)`first ) return 0p; // If no alarms return null 107 if( (*alarms)`first. alarm>= currtime ) return 0p; // If alarms head not expired return null109 if( (*alarms)`first.timeval >= currtime ) return 0p; // If alarms head not expired return null 108 110 return pop(alarms); // Otherwise just pop head 109 111 } … … 141 143 if( period > 0 ) { 142 144 __cfadbg_print_buffer_local( preemption, " KERNEL: alarm period is %lu.\n", period`ns ); 143 node-> alarm = currtime + period;// Alarm is periodic, add currtime to it (used cached current time)145 node->timeval = currtime + period; // Alarm is periodic, add currtime to it (used cached current time) 144 146 insert( alarms, node ); // Reinsert the node for the next time it triggers 145 147 } … … 148 150 // If there are still alarms pending, reset the timer 149 151 if( & (*alarms)`first ) { 150 Duration delta = (*alarms)`first.alarm - currtime; 151 Duration capped = max(delta, 50`us); 152 __kernel_set_timer( capped ); 152 Duration delta = (*alarms)`first.timeval - currtime; 153 __kernel_set_timer( delta ); 153 154 } 154 155 } … … 160 161 // Alarms need to be enabled 161 162 if ( duration > 0 && ! alarm->set ) { 162 alarm-> alarm = __kernel_get_time() +duration;163 alarm->period = duration;163 alarm->initial = duration; 164 alarm->period = duration; 164 165 register_self( alarm ); 165 166 } … … 167 168 else if ( duration == 0 && alarm->set ) { 168 169 unregister_self( alarm ); 169 alarm-> alarm= 0;170 alarm->period = 0;170 alarm->initial = 0; 171 alarm->period = 0; 171 172 } 172 173 // If alarm is different from previous, change it 173 174 else if ( duration > 0 && alarm->period != duration ) { 174 175 unregister_self( alarm ); 175 alarm-> alarm = __kernel_get_time() +duration;176 alarm->period = duration;176 alarm->initial = duration; 177 alarm->period = duration; 177 178 register_self( alarm ); 178 179 } … … 599 600 600 601 // Notify the alarm thread of the shutdown 601 sigval val = { 1 }; 602 sigval val; 603 val.sival_int = 0; 602 604 pthread_sigqueue( alarm_thread, SIGALRM, val ); 603 605 … … 619 621 // Used by thread to control when they want to receive preemption signals 620 622 void ?{}( preemption_scope & this, processor * proc ) { 621 (this.alarm){ proc, (Time){ 0 }, 0`s };623 (this.alarm){ proc, 0`s, 0`s }; 622 624 this.proc = proc; 623 625 this.proc->preemption_alarm = &this.alarm; … … 687 689 // Waits on SIGALRM and send SIGUSR1 to whom ever needs it 688 690 static void * alarm_loop( __attribute__((unused)) void * args ) { 689 __processor_id_t id; 690 register_proc_id(&id); 691 __cfaabi_tls.this_proc_id = &id; 692 691 unsigned id = register_proc_id(); 693 692 694 693 // Block sigalrms to control when they arrive … … 707 706 siginfo_t info; 708 707 int sig = sigwaitinfo( &mask, &info ); 708 709 __cfadbg_print_buffer_decl ( preemption, " KERNEL: sigwaitinfo returned %d, c: %d, v: %d\n", sig, info.si_code, info.si_value.sival_int ); 710 __cfadbg_print_buffer_local( preemption, " KERNEL: SI_QUEUE %d, SI_TIMER %d, SI_KERNEL %d\n", SI_QUEUE, SI_TIMER, SI_KERNEL ); 709 711 710 712 if( sig < 0 ) { … … 714 716 case EAGAIN : 715 717 case EINTR : 716 {__cfa abi_dbg_print_buffer_decl(" KERNEL: Spurious wakeup %d.\n", err );}718 {__cfadbg_print_buffer_local( preemption, " KERNEL: Spurious wakeup %d.\n", err );} 717 719 continue; 718 720 case EINVAL : … … 726 728 assertf(sig == SIGALRM, "Kernel Internal Error, sigwait: Unexpected signal %d (%d : %d)\n", sig, info.si_code, info.si_value.sival_int); 727 729 728 // __cfaabi_dbg_print_safe( "Kernel : Caught alarm from %d with %d\n", info.si_code, info.si_value.sival_int );729 730 // Switch on the code (a.k.a. the sender) to 730 731 switch( info.si_code ) 731 732 { 733 // Signal was not sent by the kernel but by an other thread 734 case SI_QUEUE: 735 // other threads may signal the alarm thread to shut it down 736 // or to manual cause the preemption tick 737 // use info.si_value and handle the case here 738 switch( info.si_value.sival_int ) { 739 case 0: 740 goto EXIT; 741 default: 742 abort( "SI_QUEUE with val %d", info.si_value.sival_int); 743 } 744 // fallthrough 732 745 // Timers can apparently be marked as sent for the kernel 733 746 // In either case, tick preemption … … 739 752 unlock( event_kernel->lock ); 740 753 break; 741 // Signal was not sent by the kernel but by an other thread742 case SI_QUEUE:743 // For now, other thread only signal the alarm thread to shut it down744 // If this needs to change use info.si_value and handle the case here745 goto EXIT;746 754 } 747 755 } … … 749 757 EXIT: 750 758 __cfaabi_dbg_print_safe( "Kernel : Preemption thread stopping\n" ); 751 register_proc_id(&id);759 unregister_proc_id(id); 752 760 753 761 return 0p; -
libcfa/src/concurrency/ready_queue.cfa
r5407cdc r8d66610 17 17 // #define __CFA_DEBUG_PRINT_READY_QUEUE__ 18 18 19 // #define USE_MPSC20 19 21 20 #define USE_RELAXED_FIFO … … 93 92 this.alloc = 0; 94 93 this.ready = 0; 95 this.lock = false;96 94 this.data = alloc(this.max); 97 98 /*paranoid*/ verify( 0 == (((uintptr_t)(this.data )) % 64) ); 99 /*paranoid*/ verify( 0 == (((uintptr_t)(this.data + 1)) % 64) ); 95 this.write_lock = false; 96 100 97 /*paranoid*/ verify(__atomic_is_lock_free(sizeof(this.alloc), &this.alloc)); 101 98 /*paranoid*/ verify(__atomic_is_lock_free(sizeof(this.ready), &this.ready)); … … 106 103 } 107 104 108 void ?{}( __scheduler_lock_id_t & this, __processor_id_t * proc ) {109 this.handle = proc;110 this.lock = false;111 #ifdef __CFA_WITH_VERIFY__112 this.owned = false;113 #endif114 }115 105 116 106 //======================================================================= 117 107 // Lock-Free registering/unregistering of threads 118 void register_proc_id( struct __processor_id_t * proc) with(*__scheduler_lock) {108 unsigned register_proc_id( void ) with(*__scheduler_lock) { 119 109 __cfadbg_print_safe(ready_queue, "Kernel : Registering proc %p for RW-Lock\n", proc); 110 bool * handle = (bool *)&kernelTLS().sched_lock; 120 111 121 112 // Step - 1 : check if there is already space in the data … … 124 115 // Check among all the ready 125 116 for(uint_fast32_t i = 0; i < s; i++) { 126 __processor_id_t * null = 0p; // Re-write every loop since compare thrashes it 127 if( __atomic_load_n(&data[i].handle, (int)__ATOMIC_RELAXED) == null 128 && __atomic_compare_exchange_n( &data[i].handle, &null, proc, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { 129 /*paranoid*/ verify(i < ready); 130 /*paranoid*/ verify(0 == (__alignof__(data[i]) % cache_line_size)); 131 /*paranoid*/ verify((((uintptr_t)&data[i]) % cache_line_size) == 0); 132 proc->id = i; 117 bool * volatile * cell = (bool * volatile *)&data[i]; // Cforall is bugged and the double volatiles causes problems 118 /* paranoid */ verify( handle != *cell ); 119 120 bool * null = 0p; // Re-write every loop since compare thrashes it 121 if( __atomic_load_n(cell, (int)__ATOMIC_RELAXED) == null 122 && __atomic_compare_exchange_n( cell, &null, handle, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { 123 /* paranoid */ verify(i < ready); 124 /* paranoid */ verify( (kernelTLS().sched_id = i, true) ); 125 return i; 133 126 } 134 127 } … … 141 134 142 135 // Step - 3 : Mark space as used and then publish it. 143 __scheduler_lock_id_t * storage = (__scheduler_lock_id_t *)&data[n]; 144 (*storage){ proc }; 136 data[n] = handle; 145 137 while() { 146 138 unsigned copy = n; … … 154 146 155 147 // Return new spot. 156 /*paranoid*/ verify(n < ready); 157 /*paranoid*/ verify(__alignof__(data[n]) == (2 * cache_line_size)); 158 /*paranoid*/ verify((((uintptr_t)&data[n]) % cache_line_size) == 0); 159 proc->id = n; 160 } 161 162 void unregister_proc_id( struct __processor_id_t * proc ) with(*__scheduler_lock) { 163 unsigned id = proc->id; 164 /*paranoid*/ verify(id < ready); 165 /*paranoid*/ verify(proc == __atomic_load_n(&data[id].handle, __ATOMIC_RELAXED)); 166 __atomic_store_n(&data[id].handle, 0p, __ATOMIC_RELEASE); 148 /* paranoid */ verify(n < ready); 149 /* paranoid */ verify( (kernelTLS().sched_id = n, true) ); 150 return n; 151 } 152 153 void unregister_proc_id( unsigned id ) with(*__scheduler_lock) { 154 /* paranoid */ verify(id < ready); 155 /* paranoid */ verify(id == kernelTLS().sched_id); 156 /* paranoid */ verify(data[id] == &kernelTLS().sched_lock); 157 158 bool * volatile * cell = (bool * volatile *)&data[id]; // Cforall is bugged and the double volatiles causes problems 159 160 __atomic_store_n(cell, 0p, __ATOMIC_RELEASE); 167 161 168 162 __cfadbg_print_safe(ready_queue, "Kernel : Unregister proc %p\n", proc); … … 174 168 uint_fast32_t ready_mutate_lock( void ) with(*__scheduler_lock) { 175 169 /* paranoid */ verify( ! __preemption_enabled() ); 170 /* paranoid */ verify( ! kernelTLS().sched_lock ); 176 171 177 172 // Step 1 : lock global lock 178 173 // It is needed to avoid processors that register mid Critical-Section 179 174 // to simply lock their own lock and enter. 180 __atomic_acquire( & lock );175 __atomic_acquire( &write_lock ); 181 176 182 177 // Step 2 : lock per-proc lock … … 186 181 uint_fast32_t s = ready; 187 182 for(uint_fast32_t i = 0; i < s; i++) { 188 __atomic_acquire( &data[i].lock ); 183 volatile bool * llock = data[i]; 184 if(llock) __atomic_acquire( llock ); 189 185 } 190 186 … … 203 199 // Alternative solution : return s in write_lock and pass it to write_unlock 204 200 for(uint_fast32_t i = 0; i < last_s; i++) { 205 v erify(data[i].lock);206 __atomic_store_n(&data[i].lock, (bool)false, __ATOMIC_RELEASE);201 volatile bool * llock = data[i]; 202 if(llock) __atomic_store_n(llock, (bool)false, __ATOMIC_RELEASE); 207 203 } 208 204 209 205 // Step 2 : release global lock 210 /*paranoid*/ assert(true == lock);211 __atomic_store_n(& lock, (bool)false, __ATOMIC_RELEASE);206 /*paranoid*/ assert(true == write_lock); 207 __atomic_store_n(&write_lock, (bool)false, __ATOMIC_RELEASE); 212 208 213 209 /* paranoid */ verify( ! __preemption_enabled() ); … … 253 249 } 254 250 255 __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd ) with (cltr->ready_queue) {251 __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd, bool push_local) with (cltr->ready_queue) { 256 252 __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr); 257 253 258 const bool external = (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr);254 const bool external = !push_local || (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr); 259 255 /* paranoid */ verify(external || kernelTLS().this_processor->rdq.id < lanes.count ); 260 261 // write timestamp262 thrd->link.ts = rdtscl();263 256 264 257 bool local; … … 280 273 #endif 281 274 282 #if defined(USE_MPSC)283 // mpsc always succeeds284 } while( false );285 #else286 275 // If we can't lock it retry 287 276 } while( !__atomic_try_acquire( &lanes.data[i].lock ) ); 288 #endif289 277 290 278 // Actually push it 291 279 push(lanes.data[i], thrd); 292 280 293 #if !defined(USE_MPSC) 294 // Unlock and return 295 __atomic_unlock( &lanes.data[i].lock ); 296 #endif 281 // Unlock and return 282 __atomic_unlock( &lanes.data[i].lock ); 297 283 298 284 // Mark the current index in the tls rng instance as having an item … … 350 336 #endif 351 337 #if defined(USE_WORK_STEALING) 352 __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd ) with (cltr->ready_queue) {338 __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd, bool push_local) with (cltr->ready_queue) { 353 339 __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr); 354 340 355 const bool external = (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr); 341 // #define USE_PREFERRED 342 #if !defined(USE_PREFERRED) 343 const bool external = !push_local || (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr); 356 344 /* paranoid */ verify(external || kernelTLS().this_processor->rdq.id < lanes.count ); 357 358 // write timestamp 359 thrd->link.ts = rdtscl(); 345 #else 346 unsigned preferred = thrd->preferred; 347 const bool external = push_local || (!kernelTLS().this_processor) || preferred == -1u || thrd->curr_cluster != cltr; 348 /* paranoid */ verifyf(external || preferred < lanes.count, "Invalid preferred queue %u for %u lanes", preferred, lanes.count ); 349 350 unsigned r = preferred % READYQ_SHARD_FACTOR; 351 const unsigned start = preferred - r; 352 #endif 360 353 361 354 // Try to pick a lane and lock it … … 371 364 } 372 365 else { 373 processor * proc = kernelTLS().this_processor; 374 unsigned r = proc->rdq.its++; 375 i = proc->rdq.id + (r % READYQ_SHARD_FACTOR); 366 #if !defined(USE_PREFERRED) 367 processor * proc = kernelTLS().this_processor; 368 unsigned r = proc->rdq.its++; 369 i = proc->rdq.id + (r % READYQ_SHARD_FACTOR); 370 #else 371 i = start + (r++ % READYQ_SHARD_FACTOR); 372 #endif 376 373 } 377 378 379 #if defined(USE_MPSC)380 // mpsc always succeeds381 } while( false );382 #else383 374 // If we can't lock it retry 384 375 } while( !__atomic_try_acquire( &lanes.data[i].lock ) ); 385 #endif386 376 387 377 // Actually push it 388 378 push(lanes.data[i], thrd); 389 379 390 #if !defined(USE_MPSC) 391 // Unlock and return 392 __atomic_unlock( &lanes.data[i].lock ); 393 #endif 380 // Unlock and return 381 __atomic_unlock( &lanes.data[i].lock ); 394 382 395 383 #if !defined(__CFA_NO_STATISTICS__) … … 410 398 411 399 if(proc->rdq.target == -1u) { 400 unsigned long long min = ts(lanes.data[proc->rdq.id]); 401 for(int i = 0; i < READYQ_SHARD_FACTOR; i++) { 402 unsigned long long tsc = ts(lanes.data[proc->rdq.id + i]); 403 if(tsc < min) min = tsc; 404 } 405 proc->rdq.cutoff = min; 412 406 proc->rdq.target = __tls_rand() % lanes.count; 413 unsigned it1 = proc->rdq.itr;414 unsigned it2 = proc->rdq.itr + 1;415 unsigned idx1 = proc->rdq.id + (it1 % READYQ_SHARD_FACTOR);416 unsigned idx2 = proc->rdq.id + (it2 % READYQ_SHARD_FACTOR);417 unsigned long long tsc1 = ts(lanes.data[idx1]);418 unsigned long long tsc2 = ts(lanes.data[idx2]);419 proc->rdq.cutoff = min(tsc1, tsc2);420 if(proc->rdq.cutoff == 0) proc->rdq.cutoff = -1ull;421 407 } 422 408 else { 423 409 unsigned target = proc->rdq.target; 424 410 proc->rdq.target = -1u; 425 if(lanes.tscs[target].tv < proc->rdq.cutoff) { 411 const unsigned long long bias = 0; //2_500_000_000; 412 const unsigned long long cutoff = proc->rdq.cutoff > bias ? proc->rdq.cutoff - bias : proc->rdq.cutoff; 413 if(lanes.tscs[target].tv < cutoff && ts(lanes.data[target]) < cutoff) { 426 414 $thread * t = try_pop(cltr, target __STATS(, __tls_stats()->ready.pop.help)); 427 415 if(t) return t; … … 430 418 431 419 for(READYQ_SHARD_FACTOR) { 432 unsigned i = proc->rdq.id + ( --proc->rdq.itr% READYQ_SHARD_FACTOR);420 unsigned i = proc->rdq.id + (proc->rdq.itr++ % READYQ_SHARD_FACTOR); 433 421 if($thread * t = try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.local))) return t; 434 422 } … … 462 450 // If list looks empty retry 463 451 if( is_empty(lane) ) { 464 __STATS( stats.espec++; )465 452 return 0p; 466 453 } … … 468 455 // If we can't get the lock retry 469 456 if( !__atomic_try_acquire(&lane.lock) ) { 470 __STATS( stats.elock++; )471 457 return 0p; 472 458 } … … 475 461 if( is_empty(lane) ) { 476 462 __atomic_unlock(&lane.lock); 477 __STATS( stats.eempty++; )478 463 return 0p; 479 464 } … … 481 466 // Actually pop the list 482 467 struct $thread * thrd; 483 thrd = pop(lane); 468 unsigned long long tsv; 469 [thrd, tsv] = pop(lane); 484 470 485 471 /* paranoid */ verify(thrd); 472 /* paranoid */ verify(tsv); 486 473 /* paranoid */ verify(lane.lock); 487 474 … … 493 480 494 481 #if defined(USE_WORK_STEALING) 495 lanes.tscs[w].tv = t hrd->link.ts;482 lanes.tscs[w].tv = tsv; 496 483 #endif 484 485 thrd->preferred = w; 497 486 498 487 // return the popped thread … … 522 511 // Check that all the intrusive queues in the data structure are still consistent 523 512 static void check( __ready_queue_t & q ) with (q) { 524 #if defined(__CFA_WITH_VERIFY__) && !defined(USE_MPSC)513 #if defined(__CFA_WITH_VERIFY__) 525 514 { 526 515 for( idx ; lanes.count ) { … … 528 517 assert(!lanes.data[idx].lock); 529 518 530 assert(head(sl)->link.prev == 0p ); 531 assert(head(sl)->link.next->link.prev == head(sl) ); 532 assert(tail(sl)->link.next == 0p ); 533 assert(tail(sl)->link.prev->link.next == tail(sl) ); 534 535 if(is_empty(sl)) { 536 assert(tail(sl)->link.prev == head(sl)); 537 assert(head(sl)->link.next == tail(sl)); 538 } else { 539 assert(tail(sl)->link.prev != head(sl)); 540 assert(head(sl)->link.next != tail(sl)); 541 } 519 if(is_empty(sl)) { 520 assert( sl.anchor.next == 0p ); 521 assert( sl.anchor.ts == 0 ); 522 assert( mock_head(sl) == sl.prev ); 523 } else { 524 assert( sl.anchor.next != 0p ); 525 assert( sl.anchor.ts != 0 ); 526 assert( mock_head(sl) != sl.prev ); 527 } 542 528 } 543 529 } … … 560 546 // fixes the list so that the pointers back to anchors aren't left dangling 561 547 static inline void fix(__intrusive_lane_t & ll) { 562 #if !defined(USE_MPSC) 563 // if the list is not empty then follow he pointer and fix its reverse 564 if(!is_empty(ll)) { 565 head(ll)->link.next->link.prev = head(ll); 566 tail(ll)->link.prev->link.next = tail(ll); 567 } 568 // Otherwise just reset the list 569 else { 570 verify(tail(ll)->link.next == 0p); 571 tail(ll)->link.prev = head(ll); 572 head(ll)->link.next = tail(ll); 573 verify(head(ll)->link.prev == 0p); 574 } 575 #endif 576 } 577 578 static void assign_list(unsigned & value, dlist(processor, processor) & list, unsigned count) { 548 if(is_empty(ll)) { 549 verify(ll.anchor.next == 0p); 550 ll.prev = mock_head(ll); 551 } 552 } 553 554 static void assign_list(unsigned & value, dlist(processor) & list, unsigned count) { 579 555 processor * it = &list`first; 580 556 for(unsigned i = 0; i < count; i++) { … … 597 573 lanes.tscs = alloc(lanes.count, lanes.tscs`realloc); 598 574 for(i; lanes.count) { 599 lanes.tscs[i].tv = ts(lanes.data[i]); 575 unsigned long long tsc = ts(lanes.data[i]); 576 lanes.tscs[i].tv = tsc != 0 ? tsc : rdtscl(); 600 577 } 601 578 #endif … … 686 663 while(!is_empty(lanes.data[idx])) { 687 664 struct $thread * thrd; 688 thrd = pop(lanes.data[idx]); 689 690 push(cltr, thrd); 665 unsigned long long _; 666 [thrd, _] = pop(lanes.data[idx]); 667 668 push(cltr, thrd, true); 691 669 692 670 // for printing count the number of displaced threads … … 725 703 /* paranoid */ verify( ready_mutate_islocked() ); 726 704 } 705 706 #if !defined(__CFA_NO_STATISTICS__) 707 unsigned cnt(const __ready_queue_t & this, unsigned idx) { 708 /* paranoid */ verify(this.lanes.count > idx); 709 return this.lanes.data[idx].cnt; 710 } 711 #endif -
libcfa/src/concurrency/ready_subqueue.hfa
r5407cdc r8d66610 7 7 // Intrusives lanes which are used by the relaxed ready queue 8 8 struct __attribute__((aligned(128))) __intrusive_lane_t { 9 10 #if defined(USE_MPSC) 11 mpsc_queue($thread) queue; 12 __attribute__((aligned(128))) 13 #else 14 // anchor for the head and the tail of the queue 15 __attribute__((aligned(128))) struct __sentinel_t { 16 // Link lists fields 17 // instrusive link field for threads 18 // must be exactly as in $thread 19 __thread_desc_link link; 20 } before, after; 21 #endif 9 struct $thread * prev; 22 10 23 11 // spin lock protecting the queue 24 12 volatile bool lock; 25 13 26 // Optional statistic counters 27 #if !defined(__CFA_NO_SCHED_STATS__) 28 struct __attribute__((aligned(64))) { 29 // difference between number of push and pops 30 ssize_t diff; 14 __thread_desc_link anchor; 31 15 32 // total number of pushes and pops 33 size_t push; 34 size_t pop ; 35 } stat; 16 #if !defined(__CFA_NO_STATISTICS__) 17 unsigned cnt; 36 18 #endif 37 19 }; 38 20 39 void ?{}(__intrusive_lane_t & this);40 void ^?{}(__intrusive_lane_t & this);41 42 21 // Get the head pointer (one before the first element) from the anchor 43 static inline $thread * head(const __intrusive_lane_t & this) { 44 #if defined(USE_MPSC) 45 return this.queue.head; 46 #else 47 $thread * rhead = ($thread *)( 48 (uintptr_t)( &this.before ) - offsetof( $thread, link ) 49 ); 50 /* paranoid */ verify(rhead); 51 return rhead; 52 #endif 53 } 54 55 // Get the tail pointer (one after the last element) from the anchor 56 static inline $thread * tail(const __intrusive_lane_t & this) { 57 #if defined(USE_MPSC) 58 return this.queue.tail; 59 #else 60 $thread * rtail = ($thread *)( 61 (uintptr_t)( &this.after ) - offsetof( $thread, link ) 62 ); 63 /* paranoid */ verify(rtail); 64 return rtail; 65 #endif 22 static inline $thread * mock_head(const __intrusive_lane_t & this) { 23 $thread * rhead = ($thread *)( 24 (uintptr_t)( &this.anchor ) - __builtin_offsetof( $thread, link ) 25 ); 26 return rhead; 66 27 } 67 28 … … 69 30 void ?{}( __intrusive_lane_t & this ) { 70 31 this.lock = false; 32 this.prev = mock_head(this); 33 this.anchor.next = 0p; 34 this.anchor.ts = 0; 35 #if !defined(__CFA_NO_STATISTICS__) 36 this.cnt = 0; 37 #endif 71 38 72 #if !defined(USE_MPSC) 73 this.before.link.prev = 0p; 74 this.before.link.next = tail(this); 75 this.before.link.ts = 0; 76 77 this.after .link.prev = head(this); 78 this.after .link.next = 0p; 79 this.after .link.ts = 0; 80 81 #if !defined(__CFA_NO_SCHED_STATS__) 82 this.stat.diff = 0; 83 this.stat.push = 0; 84 this.stat.pop = 0; 85 #endif 86 87 // We add a boat-load of assertions here because the anchor code is very fragile 88 /* paranoid */ verify(((uintptr_t)( head(this) ) + offsetof( $thread, link )) == (uintptr_t)(&this.before)); 89 /* paranoid */ verify(((uintptr_t)( tail(this) ) + offsetof( $thread, link )) == (uintptr_t)(&this.after )); 90 /* paranoid */ verify(head(this)->link.prev == 0p ); 91 /* paranoid */ verify(head(this)->link.next == tail(this) ); 92 /* paranoid */ verify(tail(this)->link.next == 0p ); 93 /* paranoid */ verify(tail(this)->link.prev == head(this) ); 94 /* paranoid */ verify(&head(this)->link.prev == &this.before.link.prev ); 95 /* paranoid */ verify(&head(this)->link.next == &this.before.link.next ); 96 /* paranoid */ verify(&tail(this)->link.prev == &this.after .link.prev ); 97 /* paranoid */ verify(&tail(this)->link.next == &this.after .link.next ); 98 /* paranoid */ verify(__alignof__(__intrusive_lane_t) == 128); 99 /* paranoid */ verify(__alignof__(this) == 128); 100 /* paranoid */ verifyf(((intptr_t)(&this) % 128) == 0, "Expected address to be aligned %p %% 128 == %zd", &this, ((intptr_t)(&this) % 128)); 101 #endif 39 // We add a boat-load of assertions here because the anchor code is very fragile 40 /* paranoid */ _Static_assert( offsetof( $thread, link ) == offsetof(__intrusive_lane_t, anchor) ); 41 /* paranoid */ verify( offsetof( $thread, link ) == offsetof(__intrusive_lane_t, anchor) ); 42 /* paranoid */ verify( ((uintptr_t)( mock_head(this) ) + offsetof( $thread, link )) == (uintptr_t)(&this.anchor) ); 43 /* paranoid */ verify( &mock_head(this)->link.next == &this.anchor.next ); 44 /* paranoid */ verify( &mock_head(this)->link.ts == &this.anchor.ts ); 45 /* paranoid */ verify( mock_head(this)->link.next == 0p ); 46 /* paranoid */ verify( mock_head(this)->link.ts == 0 ); 47 /* paranoid */ verify( mock_head(this) == this.prev ); 48 /* paranoid */ verify( __alignof__(__intrusive_lane_t) == 128 ); 49 /* paranoid */ verify( __alignof__(this) == 128 ); 50 /* paranoid */ verifyf( ((intptr_t)(&this) % 128) == 0, "Expected address to be aligned %p %% 128 == %zd", &this, ((intptr_t)(&this) % 128) ); 102 51 } 103 52 104 53 // Dtor is trivial 105 54 void ^?{}( __intrusive_lane_t & this ) { 106 #if !defined(USE_MPSC) 107 // Make sure the list is empty 108 /* paranoid */ verify(head(this)->link.prev == 0p ); 109 /* paranoid */ verify(head(this)->link.next == tail(this) ); 110 /* paranoid */ verify(tail(this)->link.next == 0p ); 111 /* paranoid */ verify(tail(this)->link.prev == head(this) ); 112 #endif 55 // Make sure the list is empty 56 /* paranoid */ verify( this.anchor.next == 0p ); 57 /* paranoid */ verify( this.anchor.ts == 0 ); 58 /* paranoid */ verify( mock_head(this) == this.prev ); 113 59 } 114 60 115 61 // Push a thread onto this lane 116 62 // returns true of lane was empty before push, false otherwise 117 bool push(__intrusive_lane_t & this, $thread * node) { 118 #if defined(USE_MPSC) 119 inline $thread * volatile & ?`next ( $thread * this ) __attribute__((const)) { 120 return this->link.next; 121 } 122 push(this.queue, node); 123 #else 124 #if defined(__CFA_WITH_VERIFY__) 125 /* paranoid */ verify(this.lock); 126 /* paranoid */ verify(node->link.ts != 0); 127 /* paranoid */ verify(node->link.next == 0p); 128 /* paranoid */ verify(node->link.prev == 0p); 129 /* paranoid */ verify(tail(this)->link.next == 0p); 130 /* paranoid */ verify(head(this)->link.prev == 0p); 63 static inline void push( __intrusive_lane_t & this, $thread * node ) { 64 /* paranoid */ verify( this.lock ); 65 /* paranoid */ verify( node->link.next == 0p ); 66 /* paranoid */ verify( node->link.ts == 0 ); 67 /* paranoid */ verify( this.prev->link.next == 0p ); 68 /* paranoid */ verify( this.prev->link.ts == 0 ); 69 if( this.anchor.next == 0p ) { 70 /* paranoid */ verify( this.anchor.next == 0p ); 71 /* paranoid */ verify( this.anchor.ts == 0 ); 72 /* paranoid */ verify( this.prev == mock_head( this ) ); 73 } else { 74 /* paranoid */ verify( this.anchor.next != 0p ); 75 /* paranoid */ verify( this.anchor.ts != 0 ); 76 /* paranoid */ verify( this.prev != mock_head( this ) ); 77 } 131 78 132 if(this.before.link.ts == 0l) { 133 /* paranoid */ verify(tail(this)->link.prev == head(this)); 134 /* paranoid */ verify(head(this)->link.next == tail(this)); 135 } else { 136 /* paranoid */ verify(tail(this)->link.prev != head(this)); 137 /* paranoid */ verify(head(this)->link.next != tail(this)); 138 } 139 #endif 140 141 // Get the relevant nodes locally 142 $thread * tail = tail(this); 143 $thread * prev = tail->link.prev; 144 145 // Do the push 146 node->link.next = tail; 147 node->link.prev = prev; 148 prev->link.next = node; 149 tail->link.prev = node; 150 151 // Update stats 152 #if !defined(__CFA_NO_SCHED_STATS__) 153 this.stat.diff++; 154 this.stat.push++; 155 #endif 156 157 verify(node->link.next == tail(this)); 158 159 // Check if the queue used to be empty 160 if(this.before.link.ts == 0l) { 161 this.before.link.ts = node->link.ts; 162 /* paranoid */ verify(node->link.prev == head(this)); 163 return true; 164 } 165 return false; 79 // Get the relevant nodes locally 80 this.prev->link.next = node; 81 this.prev->link.ts = rdtscl(); 82 this.prev = node; 83 #if !defined(__CFA_NO_STATISTICS__) 84 this.cnt++; 166 85 #endif 167 86 } … … 170 89 // returns popped 171 90 // returns true of lane was empty before push, false otherwise 172 $thread * pop(__intrusive_lane_t & this) { 173 /* paranoid */ verify(this.lock); 174 #if defined(USE_MPSC) 175 inline $thread * volatile & ?`next ( $thread * this ) __attribute__((const)) { 176 return this->link.next; 177 } 178 return pop(this.queue); 179 #else 180 /* paranoid */ verify(this.before.link.ts != 0ul); 91 static inline [* $thread, unsigned long long] pop( __intrusive_lane_t & this ) { 92 /* paranoid */ verify( this.lock ); 93 /* paranoid */ verify( this.anchor.next != 0p ); 94 /* paranoid */ verify( this.anchor.ts != 0 ); 181 95 182 // Get anchors locally 183 $thread * head = head(this); 184 $thread * tail = tail(this); 96 // Get the relevant nodes locally 97 unsigned long long ts = this.anchor.ts; 98 $thread * node = this.anchor.next; 99 this.anchor.next = node->link.next; 100 this.anchor.ts = node->link.ts; 101 bool is_empty = this.anchor.ts == 0; 102 node->link.next = 0p; 103 node->link.ts = 0; 104 #if !defined(__CFA_NO_STATISTICS__) 105 this.cnt--; 106 #endif 185 107 186 // Get the relevant nodes locally 187 $thread * node = head->link.next; 188 $thread * next = node->link.next; 108 // Update head time stamp 109 if(is_empty) this.prev = mock_head( this ); 189 110 190 /* paranoid */ verify(node != tail); 191 /* paranoid */ verify(node); 192 193 // Do the pop 194 head->link.next = next; 195 next->link.prev = head; 196 node->link.next = 0p; 197 node->link.prev = 0p; 198 199 // Update head time stamp 200 this.before.link.ts = next->link.ts; 201 202 // Update stats 203 #ifndef __CFA_NO_SCHED_STATS__ 204 this.stat.diff--; 205 this.stat.pop ++; 206 #endif 207 208 // Check if we emptied list and return accordingly 209 /* paranoid */ verify(tail(this)->link.next == 0p); 210 /* paranoid */ verify(head(this)->link.prev == 0p); 211 if(next == tail) { 212 /* paranoid */ verify(this.before.link.ts == 0); 213 /* paranoid */ verify(tail(this)->link.prev == head(this)); 214 /* paranoid */ verify(head(this)->link.next == tail(this)); 215 return node; 216 } 217 else { 218 /* paranoid */ verify(next->link.ts != 0); 219 /* paranoid */ verify(tail(this)->link.prev != head(this)); 220 /* paranoid */ verify(head(this)->link.next != tail(this)); 221 /* paranoid */ verify(this.before.link.ts != 0); 222 return node; 223 } 224 #endif 111 /* paranoid */ verify( node->link.next == 0p ); 112 /* paranoid */ verify( node->link.ts == 0 ); 113 return [node, ts]; 225 114 } 226 115 227 116 // Check whether or not list is empty 228 117 static inline bool is_empty(__intrusive_lane_t & this) { 229 #if defined(USE_MPSC) 230 return this.queue.head == 0p; 231 #else 232 // Cannot verify here since it may not be locked 233 return this.before.link.ts == 0; 234 #endif 118 return this.anchor.ts == 0; 235 119 } 236 120 237 121 // Return the timestamp 238 122 static inline unsigned long long ts(__intrusive_lane_t & this) { 239 #if defined(USE_MPSC) 240 $thread * tl = this.queue.head; 241 if(!tl) return -1ull; 242 return tl->link.ts; 243 #else 244 // Cannot verify here since it may not be locked 245 return this.before.link.ts; 246 #endif 123 // Cannot verify here since it may not be locked 124 return this.anchor.ts; 247 125 } 248 249 // Aligned timestamps which are used by the relaxed ready queue250 struct __attribute__((aligned(128))) __timestamp_t {251 volatile unsigned long long tv;252 };253 254 void ?{}(__timestamp_t & this) { this.tv = 0; }255 void ^?{}(__timestamp_t & this) {} -
libcfa/src/concurrency/stats.cfa
r5407cdc r8d66610 19 19 stats->ready.pop.local .attempt = 0; 20 20 stats->ready.pop.local .success = 0; 21 stats->ready.pop.local .elock = 0;22 stats->ready.pop.local .eempty = 0;23 stats->ready.pop.local .espec = 0;24 21 stats->ready.pop.help .attempt = 0; 25 22 stats->ready.pop.help .success = 0; 26 stats->ready.pop.help .elock = 0;27 stats->ready.pop.help .eempty = 0;28 stats->ready.pop.help .espec = 0;29 23 stats->ready.pop.steal .attempt = 0; 30 24 stats->ready.pop.steal .success = 0; 31 stats->ready.pop.steal .elock = 0;32 stats->ready.pop.steal .eempty = 0;33 stats->ready.pop.steal .espec = 0;34 25 stats->ready.pop.search.attempt = 0; 35 26 stats->ready.pop.search.success = 0; 36 stats->ready.pop.search.elock = 0;37 stats->ready.pop.search.eempty = 0;38 stats->ready.pop.search.espec = 0;39 27 stats->ready.threads.migration = 0; 40 28 stats->ready.threads.extunpark = 0; 41 29 stats->ready.threads.threads = 0; 30 stats->ready.threads.cthreads = 0; 42 31 stats->ready.sleep.halts = 0; 43 32 stats->ready.sleep.cancels = 0; … … 59 48 stats->io.calls.completed = 0; 60 49 stats->io.calls.errors.busy = 0; 61 stats->io.poller.sleeps = 0;62 50 #endif 63 51 … … 68 56 } 69 57 58 static inline void tally_one( volatile uint64_t * agg, volatile uint64_t * val) { 59 uint64_t add = __atomic_exchange_n(val, 0_l64u, __ATOMIC_RELAXED); 60 __atomic_fetch_add(agg, add, __ATOMIC_RELAXED); 61 } 62 63 static inline void tally_one( volatile int64_t * agg, volatile int64_t * val) { 64 int64_t add = __atomic_exchange_n(val, 0_l64, __ATOMIC_RELAXED); 65 __atomic_fetch_add(agg, add, __ATOMIC_RELAXED); 66 } 67 70 68 void __tally_stats( struct __stats_t * cltr, struct __stats_t * proc ) { 71 __atomic_fetch_add( &cltr->ready.push.local.attempt, proc->ready.push.local.attempt, __ATOMIC_SEQ_CST ); proc->ready.push.local.attempt = 0; 72 __atomic_fetch_add( &cltr->ready.push.local.success, proc->ready.push.local.success, __ATOMIC_SEQ_CST ); proc->ready.push.local.success = 0; 73 __atomic_fetch_add( &cltr->ready.push.share.attempt, proc->ready.push.share.attempt, __ATOMIC_SEQ_CST ); proc->ready.push.share.attempt = 0; 74 __atomic_fetch_add( &cltr->ready.push.share.success, proc->ready.push.share.success, __ATOMIC_SEQ_CST ); proc->ready.push.share.success = 0; 75 __atomic_fetch_add( &cltr->ready.push.extrn.attempt, proc->ready.push.extrn.attempt, __ATOMIC_SEQ_CST ); proc->ready.push.extrn.attempt = 0; 76 __atomic_fetch_add( &cltr->ready.push.extrn.success, proc->ready.push.extrn.success, __ATOMIC_SEQ_CST ); proc->ready.push.extrn.success = 0; 77 __atomic_fetch_add( &cltr->ready.pop.local .attempt, proc->ready.pop.local .attempt, __ATOMIC_SEQ_CST ); proc->ready.pop.local .attempt = 0; 78 __atomic_fetch_add( &cltr->ready.pop.local .success, proc->ready.pop.local .success, __ATOMIC_SEQ_CST ); proc->ready.pop.local .success = 0; 79 __atomic_fetch_add( &cltr->ready.pop.local .elock , proc->ready.pop.local .elock , __ATOMIC_SEQ_CST ); proc->ready.pop.local .elock = 0; 80 __atomic_fetch_add( &cltr->ready.pop.local .eempty , proc->ready.pop.local .eempty , __ATOMIC_SEQ_CST ); proc->ready.pop.local .eempty = 0; 81 __atomic_fetch_add( &cltr->ready.pop.local .espec , proc->ready.pop.local .espec , __ATOMIC_SEQ_CST ); proc->ready.pop.local .espec = 0; 82 __atomic_fetch_add( &cltr->ready.pop.help .attempt, proc->ready.pop.help .attempt, __ATOMIC_SEQ_CST ); proc->ready.pop.help .attempt = 0; 83 __atomic_fetch_add( &cltr->ready.pop.help .success, proc->ready.pop.help .success, __ATOMIC_SEQ_CST ); proc->ready.pop.help .success = 0; 84 __atomic_fetch_add( &cltr->ready.pop.help .elock , proc->ready.pop.help .elock , __ATOMIC_SEQ_CST ); proc->ready.pop.help .elock = 0; 85 __atomic_fetch_add( &cltr->ready.pop.help .eempty , proc->ready.pop.help .eempty , __ATOMIC_SEQ_CST ); proc->ready.pop.help .eempty = 0; 86 __atomic_fetch_add( &cltr->ready.pop.help .espec , proc->ready.pop.help .espec , __ATOMIC_SEQ_CST ); proc->ready.pop.help .espec = 0; 87 __atomic_fetch_add( &cltr->ready.pop.steal .attempt, proc->ready.pop.steal .attempt, __ATOMIC_SEQ_CST ); proc->ready.pop.steal .attempt = 0; 88 __atomic_fetch_add( &cltr->ready.pop.steal .success, proc->ready.pop.steal .success, __ATOMIC_SEQ_CST ); proc->ready.pop.steal .success = 0; 89 __atomic_fetch_add( &cltr->ready.pop.steal .elock , proc->ready.pop.steal .elock , __ATOMIC_SEQ_CST ); proc->ready.pop.steal .elock = 0; 90 __atomic_fetch_add( &cltr->ready.pop.steal .eempty , proc->ready.pop.steal .eempty , __ATOMIC_SEQ_CST ); proc->ready.pop.steal .eempty = 0; 91 __atomic_fetch_add( &cltr->ready.pop.steal .espec , proc->ready.pop.steal .espec , __ATOMIC_SEQ_CST ); proc->ready.pop.steal .espec = 0; 92 __atomic_fetch_add( &cltr->ready.pop.search.attempt, proc->ready.pop.search.attempt, __ATOMIC_SEQ_CST ); proc->ready.pop.search.attempt = 0; 93 __atomic_fetch_add( &cltr->ready.pop.search.success, proc->ready.pop.search.success, __ATOMIC_SEQ_CST ); proc->ready.pop.search.success = 0; 94 __atomic_fetch_add( &cltr->ready.pop.search.elock , proc->ready.pop.search.elock , __ATOMIC_SEQ_CST ); proc->ready.pop.search.elock = 0; 95 __atomic_fetch_add( &cltr->ready.pop.search.eempty , proc->ready.pop.search.eempty , __ATOMIC_SEQ_CST ); proc->ready.pop.search.eempty = 0; 96 __atomic_fetch_add( &cltr->ready.pop.search.espec , proc->ready.pop.search.espec , __ATOMIC_SEQ_CST ); proc->ready.pop.search.espec = 0; 97 __atomic_fetch_add( &cltr->ready.threads.migration , proc->ready.threads.migration , __ATOMIC_SEQ_CST ); proc->ready.threads.migration = 0; 98 __atomic_fetch_add( &cltr->ready.threads.extunpark , proc->ready.threads.extunpark , __ATOMIC_SEQ_CST ); proc->ready.threads.extunpark = 0; 99 __atomic_fetch_add( &cltr->ready.threads.threads , proc->ready.threads.threads , __ATOMIC_SEQ_CST ); proc->ready.threads.threads = 0; 100 __atomic_fetch_add( &cltr->ready.sleep.halts , proc->ready.sleep.halts , __ATOMIC_SEQ_CST ); proc->ready.sleep.halts = 0; 101 __atomic_fetch_add( &cltr->ready.sleep.cancels , proc->ready.sleep.cancels , __ATOMIC_SEQ_CST ); proc->ready.sleep.cancels = 0; 102 __atomic_fetch_add( &cltr->ready.sleep.wakes , proc->ready.sleep.wakes , __ATOMIC_SEQ_CST ); proc->ready.sleep.wakes = 0; 103 __atomic_fetch_add( &cltr->ready.sleep.exits , proc->ready.sleep.exits , __ATOMIC_SEQ_CST ); proc->ready.sleep.exits = 0; 69 tally_one( &cltr->ready.push.local.attempt, &proc->ready.push.local.attempt ); 70 tally_one( &cltr->ready.push.local.success, &proc->ready.push.local.success ); 71 tally_one( &cltr->ready.push.share.attempt, &proc->ready.push.share.attempt ); 72 tally_one( &cltr->ready.push.share.success, &proc->ready.push.share.success ); 73 tally_one( &cltr->ready.push.extrn.attempt, &proc->ready.push.extrn.attempt ); 74 tally_one( &cltr->ready.push.extrn.success, &proc->ready.push.extrn.success ); 75 tally_one( &cltr->ready.pop.local .attempt, &proc->ready.pop.local .attempt ); 76 tally_one( &cltr->ready.pop.local .success, &proc->ready.pop.local .success ); 77 tally_one( &cltr->ready.pop.help .attempt, &proc->ready.pop.help .attempt ); 78 tally_one( &cltr->ready.pop.help .success, &proc->ready.pop.help .success ); 79 tally_one( &cltr->ready.pop.steal .attempt, &proc->ready.pop.steal .attempt ); 80 tally_one( &cltr->ready.pop.steal .success, &proc->ready.pop.steal .success ); 81 tally_one( &cltr->ready.pop.search.attempt, &proc->ready.pop.search.attempt ); 82 tally_one( &cltr->ready.pop.search.success, &proc->ready.pop.search.success ); 83 tally_one( &cltr->ready.threads.migration , &proc->ready.threads.migration ); 84 tally_one( &cltr->ready.threads.extunpark , &proc->ready.threads.extunpark ); 85 tally_one( &cltr->ready.threads.threads , &proc->ready.threads.threads ); 86 tally_one( &cltr->ready.threads.cthreads , &proc->ready.threads.cthreads ); 87 tally_one( &cltr->ready.sleep.halts , &proc->ready.sleep.halts ); 88 tally_one( &cltr->ready.sleep.cancels , &proc->ready.sleep.cancels ); 89 tally_one( &cltr->ready.sleep.wakes , &proc->ready.sleep.wakes ); 90 tally_one( &cltr->ready.sleep.exits , &proc->ready.sleep.exits ); 104 91 105 92 #if defined(CFA_HAVE_LINUX_IO_URING_H) 106 __atomic_fetch_add( &cltr->io.alloc.fast , proc->io.alloc.fast , __ATOMIC_SEQ_CST ); proc->io.alloc.fast = 0; 107 __atomic_fetch_add( &cltr->io.alloc.slow , proc->io.alloc.slow , __ATOMIC_SEQ_CST ); proc->io.alloc.slow = 0; 108 __atomic_fetch_add( &cltr->io.alloc.fail , proc->io.alloc.fail , __ATOMIC_SEQ_CST ); proc->io.alloc.fail = 0; 109 __atomic_fetch_add( &cltr->io.alloc.revoke , proc->io.alloc.revoke , __ATOMIC_SEQ_CST ); proc->io.alloc.revoke = 0; 110 __atomic_fetch_add( &cltr->io.alloc.block , proc->io.alloc.block , __ATOMIC_SEQ_CST ); proc->io.alloc.block = 0; 111 __atomic_fetch_add( &cltr->io.submit.fast , proc->io.submit.fast , __ATOMIC_SEQ_CST ); proc->io.submit.fast = 0; 112 __atomic_fetch_add( &cltr->io.submit.slow , proc->io.submit.slow , __ATOMIC_SEQ_CST ); proc->io.submit.slow = 0; 113 __atomic_fetch_add( &cltr->io.flush.external , proc->io.flush.external , __ATOMIC_SEQ_CST ); proc->io.flush.external = 0; 114 __atomic_fetch_add( &cltr->io.calls.flush , proc->io.calls.flush , __ATOMIC_SEQ_CST ); proc->io.calls.flush = 0; 115 __atomic_fetch_add( &cltr->io.calls.submitted , proc->io.calls.submitted , __ATOMIC_SEQ_CST ); proc->io.calls.submitted = 0; 116 __atomic_fetch_add( &cltr->io.calls.drain , proc->io.calls.drain , __ATOMIC_SEQ_CST ); proc->io.calls.drain = 0; 117 __atomic_fetch_add( &cltr->io.calls.completed , proc->io.calls.completed , __ATOMIC_SEQ_CST ); proc->io.calls.completed = 0; 118 __atomic_fetch_add( &cltr->io.calls.errors.busy, proc->io.calls.errors.busy, __ATOMIC_SEQ_CST ); proc->io.calls.errors.busy = 0; 119 __atomic_fetch_add( &cltr->io.poller.sleeps , proc->io.poller.sleeps , __ATOMIC_SEQ_CST ); proc->io.poller.sleeps = 0; 93 tally_one( &cltr->io.alloc.fast , &proc->io.alloc.fast ); 94 tally_one( &cltr->io.alloc.slow , &proc->io.alloc.slow ); 95 tally_one( &cltr->io.alloc.fail , &proc->io.alloc.fail ); 96 tally_one( &cltr->io.alloc.revoke , &proc->io.alloc.revoke ); 97 tally_one( &cltr->io.alloc.block , &proc->io.alloc.block ); 98 tally_one( &cltr->io.submit.fast , &proc->io.submit.fast ); 99 tally_one( &cltr->io.submit.slow , &proc->io.submit.slow ); 100 tally_one( &cltr->io.flush.external , &proc->io.flush.external ); 101 tally_one( &cltr->io.calls.flush , &proc->io.calls.flush ); 102 tally_one( &cltr->io.calls.submitted , &proc->io.calls.submitted ); 103 tally_one( &cltr->io.calls.drain , &proc->io.calls.drain ); 104 tally_one( &cltr->io.calls.completed , &proc->io.calls.completed ); 105 tally_one( &cltr->io.calls.errors.busy, &proc->io.calls.errors.busy ); 120 106 #endif 121 107 } … … 130 116 if( flags & CFA_STATS_READY_Q ) { 131 117 132 sstr | "----- " | type | " \"" | name | "\" (" | "" | id | "" | ") - Ready Q Stats -----";118 sstr | "----- " | type | " \"" | name | "\" (" | "" | id | "" | ") - Ready Q Stats -----"; 133 119 134 120 uint64_t totalR = ready.pop.local.success + ready.pop.help.success + ready.pop.steal.success + ready.pop.search.success; 135 121 uint64_t totalS = ready.push.local.success + ready.push.share.success + ready.push.extrn.success; 136 sstr | "- totals : " | eng3(totalR) | "run," | eng3(totalS) | "schd (" | eng3(ready.push.extrn.success) | "ext," | eng3(ready.threads.migration) | "mig," | eng3(ready.threads.extunpark) | " eupk)"; 122 sstr | "- totals : " | eng3(totalR) | "run," | eng3(totalS) | "schd (" | eng3(ready.push.extrn.success) | "ext," 123 | eng3(ready.threads.migration) | "mig," | eng3(ready.threads.extunpark) | " eupk," | ready.threads.threads | " t," | ready.threads.cthreads | " cthr)"; 137 124 138 125 double push_len = ((double)ready.push.local.attempt + ready.push.share.attempt + ready.push.extrn.attempt) / totalS; … … 147 134 double rLcl_pc = (100.0 * (double)ready.pop.local .success) / totalR; 148 135 sstr | "- local : " | eng3(ready.pop.local .success) | "-"| ws(3, 3, rLcl_pc) | '%' 149 | " (" | eng3(ready.pop.local .attempt) | " try ," | eng3(ready.pop.local .espec) | " spc," | eng3(ready.pop.local .elock) | " lck," | eng3(ready.pop.local .eempty) | " ept)";136 | " (" | eng3(ready.pop.local .attempt) | " try)"; 150 137 double rHlp_pc = (100.0 * (double)ready.pop.help .success) / totalR; 151 138 sstr | "- help : " | eng3(ready.pop.help .success) | "-"| ws(3, 3, rHlp_pc) | '%' 152 | " (" | eng3(ready.pop.help .attempt) | " try ," | eng3(ready.pop.help .espec) | " spc," | eng3(ready.pop.help .elock) | " lck," | eng3(ready.pop.help .eempty) | " ept)";139 | " (" | eng3(ready.pop.help .attempt) | " try)"; 153 140 double rStl_pc = (100.0 * (double)ready.pop.steal .success) / totalR; 154 141 sstr | "- steal : " | eng3(ready.pop.steal .success) | "-"| ws(3, 3, rStl_pc) | '%' 155 | " (" | eng3(ready.pop.steal .attempt) | " try ," | eng3(ready.pop.steal .espec) | " spc," | eng3(ready.pop.steal .elock) | " lck," | eng3(ready.pop.steal .eempty) | " ept)";142 | " (" | eng3(ready.pop.steal .attempt) | " try)"; 156 143 double rSch_pc = (100.0 * (double)ready.pop.search.success) / totalR; 157 144 sstr | "- search : " | eng3(ready.pop.search.success) | "-"| ws(3, 3, rSch_pc) | '%' 158 | " (" | eng3(ready.pop.search.attempt) | " try ," | eng3(ready.pop.search.espec) | " spc," | eng3(ready.pop.search.elock) | " lck," | eng3(ready.pop.search.eempty) | " ept)";145 | " (" | eng3(ready.pop.search.attempt) | " try)"; 159 146 160 147 sstr | "- Idle Slp : " | eng3(ready.sleep.halts) | "halt," | eng3(ready.sleep.cancels) | "cancel," | eng3(ready.sleep.wakes) | "wake," | eng3(ready.sleep.exits) | "exit"; … … 164 151 #if defined(CFA_HAVE_LINUX_IO_URING_H) 165 152 if( flags & CFA_STATS_IO ) { 166 sstr | "----- " | type | " \"" | name | "\" (" | "" | id | "" | ") - I/O Stats -----";153 sstr | "----- " | type | " \"" | name | "\" (" | "" | id | "" | ") - I/O Stats -----"; 167 154 168 155 uint64_t total_allocs = io.alloc.fast + io.alloc.slow; 169 double avgfasta = (100.0 * (double)io.alloc.fast) / total_allocs;170 sstr | "- total allocations : " | eng3(io.alloc.fast) | "fast," | eng3(io.alloc.slow) | "slow (" | ws(3, 3, avgfasta) | "%)";171 sstr | "- failures : " | eng3(io.alloc.fail) | "oom, " | eng3(io.alloc.revoke) | "rvk, " | eng3(io.alloc.block) | "blk";172 156 173 157 uint64_t total_submits = io.submit.fast + io.submit.slow; 174 double avgfasts = (100.0 * (double)io.submit.fast) / total_submits; 175 sstr | "- total submits : " | eng3(io.submit.fast) | "fast," | eng3(io.submit.slow) | "slow (" | ws(3, 3, avgfasts) | "%)"; 176 sstr | "- flush external : " | eng3(io.flush.external); 177 178 sstr | "- io_uring_enter : " | eng3(io.calls.flush) | " (" | eng3(io.calls.drain) | ", " | eng3(io.calls.errors.busy) | " EBUSY)"; 158 sstr | "- totals : allc" | eng3(io.alloc .fast) | nonl; 159 if(io.alloc.slow) { 160 double avgfasta = (100.0 * (double)io.alloc.fast) / total_allocs; 161 sstr | "fast," | eng3(io.alloc .slow) | "slow (" | ws(3, 3, avgfasta) | "%)" | nonl; 162 } 163 sstr | " - subm" | eng3(io.submit.fast) | nonl; 164 if(io.alloc.slow) { 165 double avgfasts = (100.0 * (double)io.submit.fast) / total_submits; 166 sstr | "fast," | eng3(io.submit.slow) | "slow (" | ws(3, 3, avgfasts) | "%)" | nonl; 167 } 168 sstr | nl; 169 170 if(io.alloc.fail || io.alloc.revoke || io.alloc.block) 171 sstr | "- failures : " | eng3(io.alloc.fail) | "oom, " | eng3(io.alloc.revoke) | "rvk, " | eng3(io.alloc.block) | "blk"; 172 if(io.flush.external) 173 sstr | "- flush external : " | eng3(io.flush.external); 179 174 180 175 double avgsubs = ((double)io.calls.submitted) / io.calls.flush; 181 176 double avgcomp = ((double)io.calls.completed) / io.calls.drain; 182 sstr | "- submits : " | eng3(io.calls.submitted) | "(" | ws(3, 3, avgsubs) | "/flush)";183 sstr | "- completes : " | eng3(io.calls.completed) | "(" | ws(3, 3, avgcomp) | "/drain)";184 185 sstr | "- poller sleeping : " | eng3(io.poller.sleeps);177 sstr | "- syscll : " 178 | " sub " | eng3(io.calls.flush) | "/" | eng3(io.calls.submitted) | "(" | ws(3, 3, avgsubs) | "/flush)" 179 | " - cmp " | eng3(io.calls.drain) | "/" | eng3(io.calls.completed) | "(" | ws(3, 3, avgcomp) | "/drain)" 180 | " - " | eng3(io.calls.errors.busy) | " EBUSY"; 186 181 sstr | nl; 187 182 } -
libcfa/src/concurrency/stats.hfa
r5407cdc r8d66610 2 2 3 3 // #define CFA_STATS_ARRAY 10000 4 // #define __CFA_NO_STATISTICS__ 4 5 5 6 #include <stdint.h> … … 22 23 // number of successes at poping 23 24 volatile uint64_t success; 24 25 // number of attempts failed due to the lock being held26 volatile uint64_t elock;27 28 // number of attempts failed due to the queue being empty (lock held)29 volatile uint64_t eempty;30 31 // number of attempts failed due to the queue looking empty (lock not held)32 volatile uint64_t espec;33 25 }; 34 26 … … 71 63 volatile uint64_t migration; 72 64 volatile uint64_t extunpark; 73 volatile int64_t threads; // number of threads in the system, includes only local change 65 volatile int64_t threads; // number of threads in the system, includes only local change 66 volatile int64_t cthreads; // number of threads in the system, includes only local change 74 67 } threads; 75 68 struct { -
libcfa/src/concurrency/thread.cfa
r5407cdc r8d66610 38 38 curr_cluster = &cl; 39 39 link.next = 0p; 40 link. prev = 0p;41 link.preferred = -1u;40 link.ts = 0; 41 preferred = -1u; 42 42 last_proc = 0p; 43 43 #if defined( __CFA_WITH_VERIFY__ )
Note:
See TracChangeset
for help on using the changeset viewer.