Changeset df6cc9d for libcfa/src/concurrency
- Timestamp:
- Oct 19, 2022, 4:43:26 PM (3 years ago)
- Branches:
- ADT, ast-experimental, master
- Children:
- 1a45263
- Parents:
- 9cd5bd2 (diff), 135143ba (diff)
 Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
 Use the(diff)links above to see all the changes relative to each parent.
- Location:
- libcfa/src/concurrency
- Files:
- 
      - 12 edited
 
 - 
          
  alarm.cfa (modified) (5 diffs)
- 
          
  alarm.hfa (modified) (1 diff)
- 
          
  io.cfa (modified) (11 diffs)
- 
          
  io/types.hfa (modified) (2 diffs)
- 
          
  kernel.hfa (modified) (3 diffs)
- 
          
  kernel/cluster.cfa (modified) (3 diffs)
- 
          
  kernel/cluster.hfa (modified) (3 diffs)
- 
          
  kernel/fwd.hfa (modified) (1 diff)
- 
          
  kernel/private.hfa (modified) (2 diffs)
- 
          
  kernel/startup.cfa (modified) (3 diffs)
- 
          
  preemption.cfa (modified) (7 diffs)
- 
          
  ready_queue.cfa (modified) (4 diffs)
 
Legend:
- Unmodified
- Added
- Removed
- 
      libcfa/src/concurrency/alarm.cfar9cd5bd2 rdf6cc9d 55 55 this.period = period; 56 56 this.thrd = thrd; 57 this. timeval= __kernel_get_time() + alarm;57 this.deadline = __kernel_get_time() + alarm; 58 58 set = false; 59 59 type = User; … … 64 64 this.period = period; 65 65 this.proc = proc; 66 this. timeval= __kernel_get_time() + alarm;66 this.deadline = __kernel_get_time() + alarm; 67 67 set = false; 68 68 type = Kernel; … … 72 72 this.initial = alarm; 73 73 this.period = period; 74 this. timeval= __kernel_get_time() + alarm;74 this.deadline = __kernel_get_time() + alarm; 75 75 set = false; 76 76 type = Callback; … … 85 85 void insert( alarm_list_t * this, alarm_node_t * n ) { 86 86 alarm_node_t * it = & (*this)`first; 87 while( it && (n-> timeval > it->timeval) ) {87 while( it && (n->deadline > it->deadline) ) { 88 88 it = & (*it)`next; 89 89 } … … 116 116 117 117 Time curr = __kernel_get_time(); 118 __cfadbg_print_safe( preemption, " KERNEL: alarm inserting %p (%lu -> %lu).\n", this, curr.tn, this-> timeval.tn );118 __cfadbg_print_safe( preemption, " KERNEL: alarm inserting %p (%lu -> %lu).\n", this, curr.tn, this->deadline.tn ); 119 119 insert( &alarms, this ); 120 __kernel_set_timer( this-> timeval- curr);120 __kernel_set_timer( this->deadline - curr); 121 121 this->set = true; 122 122 } 
- 
      libcfa/src/concurrency/alarm.hfar9cd5bd2 rdf6cc9d 57 57 }; 58 58 59 Time timeval;// actual time at which the alarm goes off59 Time deadline; // actual time at which the alarm goes off 60 60 enum alarm_type type; // true if this is not a user defined alarm 61 61 bool set :1; // whether or not the alarm has be registered 
- 
      libcfa/src/concurrency/io.cfar9cd5bd2 rdf6cc9d 201 201 __atomic_unlock(&ctx->cq.lock); 202 202 203 touch_tsc( cltr->sched.io.tscs, ctx->cq.id, ts_prev, ts_next );203 touch_tsc( cltr->sched.io.tscs, ctx->cq.id, ts_prev, ts_next, false ); 204 204 205 205 return true; 206 206 } 207 207 208 bool __cfa_io_drain( processor * proc ) {208 bool __cfa_io_drain( struct processor * proc ) { 209 209 bool local = false; 210 210 bool remote = false; … … 243 243 /* paranoid */ verify( io.tscs[target].t.tv != ULLONG_MAX ); 244 244 HELP: if(target < ctxs_count) { 245 const unsigned long long cutoff = calc_cutoff(ctsc, ctx->cq.id, ctxs_count, io.data, io.tscs, __shard_factor.io);246 const unsigned long long age = moving_average(ctsc, io.tscs[target].t.tv, io.tscs[target].t.ma);245 const __readyQ_avg_t cutoff = calc_cutoff(ctsc, ctx->cq.id, ctxs_count, io.data, io.tscs, __shard_factor.io, false); 246 const __readyQ_avg_t age = moving_average(ctsc, io.tscs[target].t.tv, io.tscs[target].t.ma, false); 247 247 __cfadbg_print_safe(io, "Kernel I/O: Help attempt on %u from %u, age %'llu vs cutoff %'llu, %s\n", target, ctx->cq.id, age, cutoff, age > cutoff ? "yes" : "no"); 248 248 if(age <= cutoff) break HELP; … … 273 273 } 274 274 275 bool __cfa_io_flush( processor * proc ) {275 bool __cfa_io_flush( struct processor * proc ) { 276 276 /* paranoid */ verify( ! __preemption_enabled() ); 277 277 /* paranoid */ verify( proc ); … … 353 353 354 354 disable_interrupts(); 355 processor * proc = __cfaabi_tls.this_processor;355 struct processor * proc = __cfaabi_tls.this_processor; 356 356 io_context$ * ctx = proc->io.ctx; 357 357 /* paranoid */ verify( __cfaabi_tls.this_processor ); … … 433 433 disable_interrupts(); 434 434 __STATS__( true, if(!lazy) io.submit.eagr += 1; ) 435 processor * proc = __cfaabi_tls.this_processor;435 struct processor * proc = __cfaabi_tls.this_processor; 436 436 io_context$ * ctx = proc->io.ctx; 437 437 /* paranoid */ verify( __cfaabi_tls.this_processor ); … … 551 551 enqueue(this.pending, (__outstanding_io&)pa); 552 552 553 wait( pa. sem);553 wait( pa.waitctx ); 554 554 555 555 return pa.ctx; … … 578 578 pa.ctx = ctx; 579 579 580 post( pa. sem);580 post( pa.waitctx ); 581 581 } 582 582 … … 613 613 } 614 614 615 wait( ei. sem);615 wait( ei.waitctx ); 616 616 617 617 __cfadbg_print_safe(io, "Kernel I/O : %u submitted from arbiter\n", have); … … 631 631 __submit_only(&ctx, ei.idxs, ei.have); 632 632 633 post( ei. sem);633 post( ei.waitctx ); 634 634 } 635 635 … … 641 641 642 642 #if defined(CFA_WITH_IO_URING_IDLE) 643 bool __kernel_read( processor * proc, io_future_t & future, iovec & iov, int fd) {643 bool __kernel_read(struct processor * proc, io_future_t & future, iovec & iov, int fd) { 644 644 io_context$ * ctx = proc->io.ctx; 645 645 /* paranoid */ verify( ! __preemption_enabled() ); … … 692 692 } 693 693 694 void __cfa_io_idle( processor * proc ) {694 void __cfa_io_idle( struct processor * proc ) { 695 695 iovec iov; 696 696 __atomic_acquire( &proc->io.ctx->cq.lock ); 
- 
      libcfa/src/concurrency/io/types.hfar9cd5bd2 rdf6cc9d 107 107 struct __outstanding_io { 108 108 inline Colable; 109 single_sem sem;109 oneshot waitctx; 110 110 }; 111 111 static inline __outstanding_io *& Next( __outstanding_io * n ) { return (__outstanding_io *)Next( (Colable *)n ); } … … 127 127 struct __attribute__((aligned(64))) io_context$ { 128 128 io_arbiter$ * arbiter; 129 processor * proc;129 struct processor * proc; 130 130 131 131 __outstanding_io_queue ext_sq; 
- 
      libcfa/src/concurrency/kernel.hfar9cd5bd2 rdf6cc9d 136 136 137 137 // Link lists fields 138 inline dlink(processor);138 dlink(processor) link; 139 139 140 140 // special init fields … … 158 158 #endif 159 159 }; 160 P9_EMBEDDED( processor, dlink(processor) ) 160 // P9_EMBEDDED( processor, dlink(processor) ) 161 static inline tytagref( dlink(processor), dlink(processor) ) ?`inner( processor & this ) { 162 dlink(processor) & b = this.link; 163 tytagref( dlink(processor), dlink(processor) ) result = { b }; 164 return result; 165 } 161 166 162 167 void ?{}(processor & this, const char name[], struct cluster & cltr); … … 176 181 177 182 // Aligned timestamps which are used by the ready queue and io subsystem 178 union __attribute__((aligned(64))) __timestamp_t { 179 struct { 180 volatile unsigned long long tv; 181 volatile unsigned long long ma; 182 } t; 183 char __padding[192]; 184 }; 185 186 static inline void ?{}(__timestamp_t & this) { this.t.tv = 0; this.t.ma = 0; } 187 static inline void ^?{}(__timestamp_t &) {} 183 union __attribute__((aligned(64))) __timestamp_t; 184 185 void ?{}(__timestamp_t & this); 186 void ^?{}(__timestamp_t &); 188 187 189 188 
- 
      libcfa/src/concurrency/kernel/cluster.cfar9cd5bd2 rdf6cc9d 221 221 static const unsigned __readyq_single_shard = 2; 222 222 223 void ?{}(__timestamp_t & this) { this.t.tv = 0; this.t.ma = 0; } 224 void ^?{}(__timestamp_t &) {} 225 223 226 //----------------------------------------------------------------------- 224 227 // Check that all the intrusive queues in the data structure are still consistent … … 254 257 } 255 258 256 static void assign_list(unsigned & valrq, unsigned & valio, dlist( processor) & list, unsigned count) {257 processor * it = &list`first;259 static void assign_list(unsigned & valrq, unsigned & valio, dlist(struct processor) & list, unsigned count) { 260 struct processor * it = &list`first; 258 261 for(unsigned i = 0; i < count; i++) { 259 262 /* paranoid */ verifyf( it, "Unexpected null iterator, at index %u of %u\n", i, count); … … 278 281 279 282 #if defined(CFA_HAVE_LINUX_IO_URING_H) 280 static void assign_io(io_context$ ** data, size_t count, dlist( processor) & list) {281 processor * it = &list`first;283 static void assign_io(io_context$ ** data, size_t count, dlist(struct processor) & list) { 284 struct processor * it = &list`first; 282 285 while(it) { 283 286 /* paranoid */ verifyf( it, "Unexpected null iterator\n"); 
- 
      libcfa/src/concurrency/kernel/cluster.hfar9cd5bd2 rdf6cc9d 18 18 #include "device/cpu.hfa" 19 19 #include "kernel/private.hfa" 20 #include "math.hfa" 20 21 21 22 #include <limits.h> 23 #include <inttypes.h> 24 25 #include "clock.hfa" 26 27 #if defined(READYQ_USE_LINEAR_AVG) 28 29 // no conversion needed in this case 30 static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { return intsc; } 31 32 // warn normally all ints 33 #define warn_large_before warnf( !strict || old_avg < 33_000_000_000, "Suspiciously large previous average: %'llu (%llx), %'" PRId64 "ms \n", old_avg, old_avg, program()`ms ) 34 #define warn_large_after warnf( !strict || ret < 33_000_000_000, "Suspiciously large new average after %'" PRId64 "ms cputime: %'llu (%llx) from %'llu-%'llu (%'llu, %'llu) and %'llu\n", program()`ms, ret, ret, currtsc, intsc, new_val, new_val / 1000000, old_avg ) 35 36 // 8X linear factor is just 8 * x 37 #define AVG_FACTOR( x ) (8 * (x)) 38 39 #elif defined(READYQ_USE_LOGDBL_AVG) 40 41 // convert to log2 scale but using double 42 static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { if(unlikely(0 == intsc)) return 0.0; else return log2(intsc); } 43 44 #define warn_large_before warnf( !strict || old_avg < 35.0, "Suspiciously large previous average: %'lf, %'" PRId64 "ms \n", old_avg, program()`ms ) 45 #define warn_large_after warnf( !strict || ret < 35.3, "Suspiciously large new average after %'" PRId64 "ms cputime: %'lf from %'llu-%'llu (%'llu, %'llu) and %'lf\n", program()`ms, ret, currtsc, intsc, new_val, new_val / 1000000, old_avg ); \ 46 verify(ret >= 0) 47 48 // 8X factor in logscale is log2(8X) = log2(8) + log2(X) = 3 + log2(X) 49 #define AVG_FACTOR( x ) (3.0 + (x)) 50 51 // we need to overload the __atomic_load_n because they don't support double 52 static inline double __atomic_load_n(volatile double * ptr, int mem) { 53 volatile uint64_t * uptr = (volatile uint64_t *)ptr; 54 _Static_assert(sizeof(*uptr) == sizeof(*ptr)); 55 uint64_t ret = 0; 56 ret = __atomic_load_n(uptr, mem); 57 uint64_t *rp = &ret; 58 double ret = *(volatile double *)rp; 59 /* paranoid */ verify( ret == 0 || ret > 3e-100 ); 60 return ret; 61 } 62 63 // we need to overload the __atomic_store_n because they don't support double 64 static inline void __atomic_store_n(volatile double * ptr, double val, int mem) { 65 /* paranoid */ verify( val == 0 || val > 3e-100 ); 66 volatile uint64_t * uptr = (volatile uint64_t *)ptr; 67 _Static_assert(sizeof(*uptr) == sizeof(*ptr)); 68 uint64_t * valp = (uint64_t *)&val; 69 __atomic_store_n(uptr, *valp, mem); 70 } 71 72 #elif defined(READYQ_USE_LOGDBL_AVG) 73 74 //convert to log2 scale but with fix point u32.32 values 75 static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { return ulog2_32_32(tsc); } 76 77 // 8X factor, +3 in logscale (see above) is + 0x3.00000000 78 #define AVG_FACTOR( x ) (0x3_00000000ull + (x)) 79 80 #else 81 #error must pick a scheme for averaging 82 #endif 22 83 23 84 //----------------------------------------------------------------------- 24 85 // Calc moving average based on existing average, before and current time. 25 static inline unsigned long long moving_average(unsigned long long currtsc, unsigned long long instsc, unsigned long long old_avg) { 26 /* paranoid */ verifyf( old_avg < 15000000000000, "Suspiciously large previous average: %'llu (%llx)\n", old_avg, old_avg ); 86 static inline __readyQ_avg_t moving_average(unsigned long long currtsc, unsigned long long intsc, __readyQ_avg_t old_avg, bool strict) { 87 (void)strict; // disable the warning around the fact this is unused in release. 88 /* paranoid */ warn_large_before; 27 89 28 const unsigned long long new_val = currtsc > instsc ? currtsc - instsc : 0; 29 const unsigned long long total_weight = 16; 30 const unsigned long long new_weight = 4; 31 const unsigned long long old_weight = total_weight - new_weight; 32 const unsigned long long ret = ((new_weight * new_val) + (old_weight * old_avg)) / total_weight; 90 const unsigned long long new_val = currtsc > intsc ? currtsc - intsc : 0; 91 const __readyQ_avg_t total_weight = 16; 92 const __readyQ_avg_t new_weight = 12; 93 const __readyQ_avg_t old_weight = total_weight - new_weight; 94 const __readyQ_avg_t ret = ((new_weight * __to_readyQ_avg(new_val)) + (old_weight * old_avg)) / total_weight; 95 96 /* paranoid */ warn_large_after; 33 97 return ret; 34 98 } 35 99 36 static inline void touch_tsc(__timestamp_t * tscs, size_t idx, unsigned long long ts_prev, unsigned long long ts_next ) {100 static inline void touch_tsc(__timestamp_t * tscs, size_t idx, unsigned long long ts_prev, unsigned long long ts_next, bool strict) { 37 101 if (ts_next == ULLONG_MAX) return; 38 102 unsigned long long now = rdtscl(); 39 unsigned long longpma = __atomic_load_n(&tscs[ idx ].t.ma, __ATOMIC_RELAXED);103 __readyQ_avg_t pma = __atomic_load_n(&tscs[ idx ].t.ma, __ATOMIC_RELAXED); 40 104 __atomic_store_n(&tscs[ idx ].t.tv, ts_next, __ATOMIC_RELAXED); 41 __atomic_store_n(&tscs[ idx ].t.ma, moving_average(now, ts_prev, pma ), __ATOMIC_RELAXED);105 __atomic_store_n(&tscs[ idx ].t.ma, moving_average(now, ts_prev, pma, strict), __ATOMIC_RELAXED); 42 106 } 43 107 … … 45 109 // Calc age a timestamp should be before needing help. 46 110 forall(Data_t * | { unsigned long long ts(Data_t & this); }) 47 static inline unsigned long longcalc_cutoff(111 static inline __readyQ_avg_t calc_cutoff( 48 112 const unsigned long long ctsc, 49 113 unsigned procid, … … 51 115 Data_t * data, 52 116 __timestamp_t * tscs, 53 const unsigned shard_factor 117 const unsigned shard_factor, 118 bool strict 54 119 ) { 55 120 unsigned start = procid; 56 unsigned long longmax = 0;121 __readyQ_avg_t max = 0; 57 122 for(i; shard_factor) { 58 123 unsigned long long ptsc = ts(data[start + i]); 59 124 if(ptsc != ULLONG_MAX) { 60 125 /* paranoid */ verify( start + i < count ); 61 unsigned long long tsc = moving_average(ctsc, ptsc, tscs[start + i].t.ma);62 if( tsc > max) max = tsc;126 __readyQ_avg_t avg = moving_average(ctsc, ptsc, tscs[start + i].t.ma, strict); 127 if(avg > max) max = avg; 63 128 } 64 129 } 65 return 8 * max;130 return AVG_FACTOR( max ); 66 131 } 67 132 
- 
      libcfa/src/concurrency/kernel/fwd.hfar9cd5bd2 rdf6cc9d 276 276 // intented to be use by wait, wait_any, waitfor, etc. rather than used directly 277 277 bool retract( future_t & this, oneshot & wait_ctx ) { 278 struct oneshot * expected = this.ptr;278 struct oneshot * expected = &wait_ctx; 279 279 280 280 // attempt to remove the context so it doesn't get consumed. 
- 
      libcfa/src/concurrency/kernel/private.hfar9cd5bd2 rdf6cc9d 50 50 #endif 51 51 #endif 52 // #define READYQ_USE_LINEAR_AVG 53 #define READYQ_USE_LOGDBL_AVG 54 // #define READYQ_USE_LOGINT_AVG 55 56 #if defined(READYQ_USE_LINEAR_AVG) 57 typedef unsigned long long __readyQ_avg_t; 58 #elif defined(READYQ_USE_LOGDBL_AVG) 59 typedef double __readyQ_avg_t; 60 #elif defined(READYQ_USE_LOGDBL_AVG) 61 typedef unsigned long long __readyQ_avg_t; 62 #else 63 #error must pick a scheme for averaging 64 #endif 52 65 53 66 extern "C" { … … 65 78 //----------------------------------------------------------------------------- 66 79 // Scheduler 80 union __attribute__((aligned(64))) __timestamp_t { 81 struct { 82 volatile unsigned long long tv; 83 volatile __readyQ_avg_t ma; 84 } t; 85 char __padding[192]; 86 }; 87 67 88 extern "C" { 68 89 void disable_interrupts() OPTIONAL_THREAD; 
- 
      libcfa/src/concurrency/kernel/startup.cfar9cd5bd2 rdf6cc9d 184 184 185 185 186 extern void heapManagerCtor(); 187 extern void heapManagerDtor(); 188 186 189 //============================================================================================= 187 190 // Kernel Setup logic … … 374 377 proc->local_data = &__cfaabi_tls; 375 378 379 heapManagerCtor(); // initialize heap 380 376 381 __cfa_io_start( proc ); 377 382 register_tls( proc ); … … 425 430 unregister_tls( proc ); 426 431 __cfa_io_stop( proc ); 432 433 heapManagerDtor(); // de-initialize heap 427 434 428 435 return 0p; 
- 
      libcfa/src/concurrency/preemption.cfar9cd5bd2 rdf6cc9d 104 104 static inline alarm_node_t * get_expired( alarm_list_t * alarms, Time currtime ) { 105 105 if( ! & (*alarms)`first ) return 0p; // If no alarms return null 106 if( (*alarms)`first. timeval>= currtime ) return 0p; // If alarms head not expired return null106 if( (*alarms)`first.deadline >= currtime ) return 0p; // If alarms head not expired return null 107 107 return pop(alarms); // Otherwise just pop head 108 108 } … … 140 140 if( period > 0 ) { 141 141 __cfadbg_print_buffer_local( preemption, " KERNEL: alarm period is %lu.\n", period`ns ); 142 node-> timeval= currtime + period; // Alarm is periodic, add currtime to it (used cached current time)142 node->deadline = currtime + period; // Alarm is periodic, add currtime to it (used cached current time) 143 143 insert( alarms, node ); // Reinsert the node for the next time it triggers 144 144 } … … 147 147 // If there are still alarms pending, reset the timer 148 148 if( & (*alarms)`first ) { 149 Duration delta = (*alarms)`first. timeval- currtime;149 Duration delta = (*alarms)`first.deadline - currtime; 150 150 __kernel_set_timer( delta ); 151 151 } … … 232 232 // available. 233 233 234 //-----------------------------------------------------------------------------235 // Some assembly required236 #define __cfaasm_label(label, when) when: asm volatile goto(".global __cfaasm_" #label "_" #when "\n" "__cfaasm_" #label "_" #when ":":::"memory":when)237 238 234 //---------- 239 235 // special case for preemption since used often 240 __attribute__((optimize("no-reorder-blocks"))) bool __preemption_enabled() libcfa_nopreempt libcfa_public { 241 // create a assembler label before 242 // marked as clobber all to avoid movement 243 __cfaasm_label(check, before); 244 236 bool __preemption_enabled() libcfa_nopreempt libcfa_public { 245 237 // access tls as normal 246 bool enabled = __cfaabi_tls.preemption_state.enabled; 247 248 // Check if there is a pending preemption 249 processor * proc = __cfaabi_tls.this_processor; 250 bool pending = proc ? proc->pending_preemption : false; 251 if( enabled && pending ) proc->pending_preemption = false; 252 253 // create a assembler label after 254 // marked as clobber all to avoid movement 255 __cfaasm_label(check, after); 256 257 // If we can preempt and there is a pending one 258 // this is a good time to yield 259 if( enabled && pending ) { 260 force_yield( __POLL_PREEMPTION ); 261 } 262 return enabled; 263 } 264 265 struct asm_region { 266 void * before; 267 void * after; 268 }; 269 270 static inline bool __cfaasm_in( void * ip, struct asm_region & region ) { 271 return ip >= region.before && ip <= region.after; 238 return __cfaabi_tls.preemption_state.enabled; 272 239 } 273 240 … … 293 260 uintptr_t __cfatls_get( unsigned long int offset ) libcfa_nopreempt libcfa_public; //no inline to avoid problems 294 261 uintptr_t __cfatls_get( unsigned long int offset ) { 295 // create a assembler label before296 // marked as clobber all to avoid movement297 __cfaasm_label(get, before);298 299 262 // access tls as normal (except for pointer arithmetic) 300 263 uintptr_t val = *(uintptr_t*)((uintptr_t)&__cfaabi_tls + offset); 301 264 302 // create a assembler label after303 // marked as clobber all to avoid movement304 __cfaasm_label(get, after);305 306 265 // This is used everywhere, to avoid cost, we DO NOT poll pending preemption 307 266 return val; … … 310 269 extern "C" { 311 270 // Disable interrupts by incrementing the counter 312 void disable_interrupts() libcfa_nopreempt libcfa_public { 313 // create a assembler label before 314 // marked as clobber all to avoid movement 315 __cfaasm_label(dsable, before); 316 317 with( __cfaabi_tls.preemption_state ) { 318 #if GCC_VERSION > 50000 319 static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free"); 320 #endif 321 322 // Set enabled flag to false 323 // should be atomic to avoid preemption in the middle of the operation. 324 // use memory order RELAXED since there is no inter-thread on this variable requirements 325 __atomic_store_n(&enabled, false, __ATOMIC_RELAXED); 326 327 // Signal the compiler that a fence is needed but only for signal handlers 328 __atomic_signal_fence(__ATOMIC_ACQUIRE); 329 330 __attribute__((unused)) unsigned short new_val = disable_count + 1; 331 disable_count = new_val; 332 verify( new_val < 65_000u ); // If this triggers someone is disabling interrupts without enabling them 333 } 334 335 // create a assembler label after 336 // marked as clobber all to avoid movement 337 __cfaasm_label(dsable, after); 338 271 void disable_interrupts() libcfa_nopreempt libcfa_public with( __cfaabi_tls.preemption_state ) { 272 #if GCC_VERSION > 50000 273 static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free"); 274 #endif 275 276 // Set enabled flag to false 277 // should be atomic to avoid preemption in the middle of the operation. 278 // use memory order RELAXED since there is no inter-thread on this variable requirements 279 __atomic_store_n(&enabled, false, __ATOMIC_RELAXED); 280 281 // Signal the compiler that a fence is needed but only for signal handlers 282 __atomic_signal_fence(__ATOMIC_ACQUIRE); 283 284 __attribute__((unused)) unsigned short new_val = disable_count + 1; 285 disable_count = new_val; 286 verify( new_val < 65_000u ); // If this triggers someone is disabling interrupts without enabling them 339 287 } 340 288 … … 379 327 // i.e. on a real processor and not in the kernel 380 328 // (can return true even if no preemption was pending) 381 bool poll_interrupts() libcfa_ public {329 bool poll_interrupts() libcfa_nopreempt libcfa_public { 382 330 // Cache the processor now since interrupts can start happening after the atomic store 383 processor * proc = publicTLS_get( this_processor );331 processor * proc = __cfaabi_tls.this_processor; 384 332 if ( ! proc ) return false; 385 if ( ! __preemption_enabled() ) return false; 386 387 with( __cfaabi_tls.preemption_state ){ 388 // Signal the compiler that a fence is needed but only for signal handlers 389 __atomic_signal_fence(__ATOMIC_RELEASE); 390 if( proc->pending_preemption ) { 391 proc->pending_preemption = false; 392 force_yield( __POLL_PREEMPTION ); 393 } 333 if ( ! __cfaabi_tls.preemption_state.enabled ) return false; 334 335 // Signal the compiler that a fence is needed but only for signal handlers 336 __atomic_signal_fence(__ATOMIC_RELEASE); 337 if( unlikely( proc->pending_preemption ) ) { 338 proc->pending_preemption = false; 339 force_yield( __POLL_PREEMPTION ); 394 340 } 395 341 
- 
      libcfa/src/concurrency/ready_queue.cfar9cd5bd2 rdf6cc9d 62 62 //----------------------------------------------------------------------- 63 63 __attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, unpark_hint hint) with (cltr->sched) { 64 processor * const proc = kernelTLS().this_processor;64 struct processor * const proc = kernelTLS().this_processor; 65 65 const bool external = (!proc) || (cltr != proc->cltr); 66 66 const bool remote = hint == UNPARK_REMOTE; … … 116 116 /* paranoid */ verify( kernelTLS().this_processor->rdq.id < lanes_count ); 117 117 118 processor * const proc = kernelTLS().this_processor;118 struct processor * const proc = kernelTLS().this_processor; 119 119 unsigned this = proc->rdq.id; 120 120 /* paranoid */ verify( this < lanes_count ); … … 139 139 /* paranoid */ verify( readyQ.tscs[target].t.tv != ULLONG_MAX ); 140 140 if(target < lanes_count) { 141 const unsigned long long cutoff = calc_cutoff(ctsc, proc->rdq.id, lanes_count, cltr->sched.readyQ.data, cltr->sched.readyQ.tscs, __shard_factor.readyq);142 const unsigned long long age = moving_average(ctsc, readyQ.tscs[target].t.tv, readyQ.tscs[target].t.ma);141 const __readyQ_avg_t cutoff = calc_cutoff(ctsc, proc->rdq.id, lanes_count, cltr->sched.readyQ.data, cltr->sched.readyQ.tscs, __shard_factor.readyq, true); 142 const __readyQ_avg_t age = moving_average(ctsc, readyQ.tscs[target].t.tv, readyQ.tscs[target].t.ma, false); 143 143 __cfadbg_print_safe(ready_queue, "Kernel : Help attempt on %u from %u, age %'llu vs cutoff %'llu, %s\n", target, this, age, cutoff, age > cutoff ? "yes" : "no"); 144 144 if(age > cutoff) { … … 214 214 __STATS( stats.success++; ) 215 215 216 touch_tsc(readyQ.tscs, w, ts_prev, ts_next );216 touch_tsc(readyQ.tscs, w, ts_prev, ts_next, true); 217 217 218 218 thrd->preferred = w / __shard_factor.readyq; 
  Note:
 See   TracChangeset
 for help on using the changeset viewer.
  