Ignore:
Timestamp:
Oct 19, 2022, 4:43:26 PM (3 years ago)
Author:
Thierry Delisle <tdelisle@…>
Branches:
ADT, ast-experimental, master
Children:
1a45263
Parents:
9cd5bd2 (diff), 135143ba (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'master' into pthread-emulation

Location:
libcfa/src/concurrency
Files:
12 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/concurrency/alarm.cfa

    r9cd5bd2 rdf6cc9d  
    5555        this.period  = period;
    5656        this.thrd = thrd;
    57         this.timeval = __kernel_get_time() + alarm;
     57        this.deadline = __kernel_get_time() + alarm;
    5858        set = false;
    5959        type = User;
     
    6464        this.period  = period;
    6565        this.proc = proc;
    66         this.timeval = __kernel_get_time() + alarm;
     66        this.deadline = __kernel_get_time() + alarm;
    6767        set = false;
    6868        type = Kernel;
     
    7272        this.initial = alarm;
    7373        this.period  = period;
    74         this.timeval = __kernel_get_time() + alarm;
     74        this.deadline = __kernel_get_time() + alarm;
    7575        set = false;
    7676        type = Callback;
     
    8585void insert( alarm_list_t * this, alarm_node_t * n ) {
    8686        alarm_node_t * it = & (*this)`first;
    87         while( it && (n->timeval > it->timeval) ) {
     87        while( it && (n->deadline > it->deadline) ) {
    8888                it = & (*it)`next;
    8989        }
     
    116116
    117117                Time curr = __kernel_get_time();
    118                 __cfadbg_print_safe( preemption, " KERNEL: alarm inserting %p (%lu -> %lu).\n", this, curr.tn, this->timeval.tn );
     118                __cfadbg_print_safe( preemption, " KERNEL: alarm inserting %p (%lu -> %lu).\n", this, curr.tn, this->deadline.tn );
    119119                insert( &alarms, this );
    120                 __kernel_set_timer( this->timeval - curr);
     120                __kernel_set_timer( this->deadline - curr);
    121121                this->set = true;
    122122        }
  • libcfa/src/concurrency/alarm.hfa

    r9cd5bd2 rdf6cc9d  
    5757        };
    5858
    59         Time timeval;           // actual time at which the alarm goes off
     59        Time deadline;          // actual time at which the alarm goes off
    6060        enum alarm_type type;   // true if this is not a user defined alarm
    6161        bool set                :1;     // whether or not the alarm has be registered
  • libcfa/src/concurrency/io.cfa

    r9cd5bd2 rdf6cc9d  
    201201                __atomic_unlock(&ctx->cq.lock);
    202202
    203                 touch_tsc( cltr->sched.io.tscs, ctx->cq.id, ts_prev, ts_next );
     203                touch_tsc( cltr->sched.io.tscs, ctx->cq.id, ts_prev, ts_next, false );
    204204
    205205                return true;
    206206        }
    207207
    208         bool __cfa_io_drain( processor * proc ) {
     208        bool __cfa_io_drain( struct processor * proc ) {
    209209                bool local = false;
    210210                bool remote = false;
     
    243243                                /* paranoid */ verify( io.tscs[target].t.tv != ULLONG_MAX );
    244244                                HELP: if(target < ctxs_count) {
    245                                         const unsigned long long cutoff = calc_cutoff(ctsc, ctx->cq.id, ctxs_count, io.data, io.tscs, __shard_factor.io);
    246                                         const unsigned long long age = moving_average(ctsc, io.tscs[target].t.tv, io.tscs[target].t.ma);
     245                                        const __readyQ_avg_t cutoff = calc_cutoff(ctsc, ctx->cq.id, ctxs_count, io.data, io.tscs, __shard_factor.io, false);
     246                                        const __readyQ_avg_t age = moving_average(ctsc, io.tscs[target].t.tv, io.tscs[target].t.ma, false);
    247247                                        __cfadbg_print_safe(io, "Kernel I/O: Help attempt on %u from %u, age %'llu vs cutoff %'llu, %s\n", target, ctx->cq.id, age, cutoff, age > cutoff ? "yes" : "no");
    248248                                        if(age <= cutoff) break HELP;
     
    273273        }
    274274
    275         bool __cfa_io_flush( processor * proc ) {
     275        bool __cfa_io_flush( struct processor * proc ) {
    276276                /* paranoid */ verify( ! __preemption_enabled() );
    277277                /* paranoid */ verify( proc );
     
    353353
    354354                disable_interrupts();
    355                 processor * proc = __cfaabi_tls.this_processor;
     355                struct processor * proc = __cfaabi_tls.this_processor;
    356356                io_context$ * ctx = proc->io.ctx;
    357357                /* paranoid */ verify( __cfaabi_tls.this_processor );
     
    433433                disable_interrupts();
    434434                __STATS__( true, if(!lazy) io.submit.eagr += 1; )
    435                 processor * proc = __cfaabi_tls.this_processor;
     435                struct processor * proc = __cfaabi_tls.this_processor;
    436436                io_context$ * ctx = proc->io.ctx;
    437437                /* paranoid */ verify( __cfaabi_tls.this_processor );
     
    551551                enqueue(this.pending, (__outstanding_io&)pa);
    552552
    553                 wait( pa.sem );
     553                wait( pa.waitctx );
    554554
    555555                return pa.ctx;
     
    578578                                pa.ctx = ctx;
    579579
    580                                 post( pa.sem );
     580                                post( pa.waitctx );
    581581                        }
    582582
     
    613613                }
    614614
    615                 wait( ei.sem );
     615                wait( ei.waitctx );
    616616
    617617                __cfadbg_print_safe(io, "Kernel I/O : %u submitted from arbiter\n", have);
     
    631631                                        __submit_only(&ctx, ei.idxs, ei.have);
    632632
    633                                         post( ei.sem );
     633                                        post( ei.waitctx );
    634634                                }
    635635
     
    641641
    642642        #if defined(CFA_WITH_IO_URING_IDLE)
    643                 bool __kernel_read(processor * proc, io_future_t & future, iovec & iov, int fd) {
     643                bool __kernel_read(struct processor * proc, io_future_t & future, iovec & iov, int fd) {
    644644                        io_context$ * ctx = proc->io.ctx;
    645645                        /* paranoid */ verify( ! __preemption_enabled() );
     
    692692                }
    693693
    694                 void __cfa_io_idle( processor * proc ) {
     694                void __cfa_io_idle( struct processor * proc ) {
    695695                        iovec iov;
    696696                        __atomic_acquire( &proc->io.ctx->cq.lock );
  • libcfa/src/concurrency/io/types.hfa

    r9cd5bd2 rdf6cc9d  
    107107        struct __outstanding_io {
    108108                inline Colable;
    109                 single_sem sem;
     109                oneshot waitctx;
    110110        };
    111111        static inline __outstanding_io *& Next( __outstanding_io * n ) { return (__outstanding_io *)Next( (Colable *)n ); }
     
    127127        struct __attribute__((aligned(64))) io_context$ {
    128128                io_arbiter$ * arbiter;
    129                 processor * proc;
     129                struct processor * proc;
    130130
    131131                __outstanding_io_queue ext_sq;
  • libcfa/src/concurrency/kernel.hfa

    r9cd5bd2 rdf6cc9d  
    136136
    137137        // Link lists fields
    138         inline dlink(processor);
     138        dlink(processor) link;
    139139
    140140        // special init fields
     
    158158#endif
    159159};
    160 P9_EMBEDDED( processor, dlink(processor) )
     160// P9_EMBEDDED( processor, dlink(processor) )
     161static inline tytagref( dlink(processor), dlink(processor) ) ?`inner( processor & this ) {
     162    dlink(processor) & b = this.link;
     163    tytagref( dlink(processor), dlink(processor) ) result = { b };
     164    return result;
     165}
    161166
    162167void  ?{}(processor & this, const char name[], struct cluster & cltr);
     
    176181
    177182// Aligned timestamps which are used by the ready queue and io subsystem
    178 union __attribute__((aligned(64))) __timestamp_t {
    179         struct {
    180                 volatile unsigned long long tv;
    181                 volatile unsigned long long ma;
    182         } t;
    183         char __padding[192];
    184 };
    185 
    186 static inline void  ?{}(__timestamp_t & this) { this.t.tv = 0; this.t.ma = 0; }
    187 static inline void ^?{}(__timestamp_t &) {}
     183union __attribute__((aligned(64))) __timestamp_t;
     184
     185void  ?{}(__timestamp_t & this);
     186void ^?{}(__timestamp_t &);
    188187
    189188
  • libcfa/src/concurrency/kernel/cluster.cfa

    r9cd5bd2 rdf6cc9d  
    221221static const unsigned __readyq_single_shard = 2;
    222222
     223void  ?{}(__timestamp_t & this) { this.t.tv = 0; this.t.ma = 0; }
     224void ^?{}(__timestamp_t &) {}
     225
    223226//-----------------------------------------------------------------------
    224227// Check that all the intrusive queues in the data structure are still consistent
     
    254257}
    255258
    256 static void assign_list(unsigned & valrq, unsigned & valio, dlist(processor) & list, unsigned count) {
    257         processor * it = &list`first;
     259static void assign_list(unsigned & valrq, unsigned & valio, dlist(struct processor) & list, unsigned count) {
     260        struct processor * it = &list`first;
    258261        for(unsigned i = 0; i < count; i++) {
    259262                /* paranoid */ verifyf( it, "Unexpected null iterator, at index %u of %u\n", i, count);
     
    278281
    279282#if defined(CFA_HAVE_LINUX_IO_URING_H)
    280         static void assign_io(io_context$ ** data, size_t count, dlist(processor) & list) {
    281                 processor * it = &list`first;
     283        static void assign_io(io_context$ ** data, size_t count, dlist(struct processor) & list) {
     284                struct processor * it = &list`first;
    282285                while(it) {
    283286                        /* paranoid */ verifyf( it, "Unexpected null iterator\n");
  • libcfa/src/concurrency/kernel/cluster.hfa

    r9cd5bd2 rdf6cc9d  
    1818#include "device/cpu.hfa"
    1919#include "kernel/private.hfa"
     20#include "math.hfa"
    2021
    2122#include <limits.h>
     23#include <inttypes.h>
     24
     25#include "clock.hfa"
     26
     27#if   defined(READYQ_USE_LINEAR_AVG)
     28
     29// no conversion needed in this case
     30static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { return intsc; }
     31
     32// warn normally all ints
     33#define warn_large_before warnf( !strict || old_avg < 33_000_000_000, "Suspiciously large previous average: %'llu (%llx), %'" PRId64 "ms \n", old_avg, old_avg, program()`ms )
     34#define warn_large_after warnf( !strict || ret < 33_000_000_000, "Suspiciously large new average after %'" PRId64 "ms cputime: %'llu (%llx) from %'llu-%'llu (%'llu, %'llu) and %'llu\n", program()`ms, ret, ret, currtsc, intsc, new_val, new_val / 1000000, old_avg )
     35
     36// 8X linear factor is just 8 * x
     37#define AVG_FACTOR( x ) (8 * (x))
     38
     39#elif defined(READYQ_USE_LOGDBL_AVG)
     40
     41// convert to log2 scale but using double
     42static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { if(unlikely(0 == intsc)) return 0.0; else return log2(intsc); }
     43
     44#define warn_large_before warnf( !strict || old_avg < 35.0, "Suspiciously large previous average: %'lf, %'" PRId64 "ms \n", old_avg, program()`ms )
     45#define warn_large_after warnf( !strict || ret < 35.3, "Suspiciously large new average after %'" PRId64 "ms cputime: %'lf from %'llu-%'llu (%'llu, %'llu) and %'lf\n", program()`ms, ret, currtsc, intsc, new_val, new_val / 1000000, old_avg ); \
     46verify(ret >= 0)
     47
     48// 8X factor in logscale is log2(8X) = log2(8) + log2(X) = 3 + log2(X)
     49#define AVG_FACTOR( x ) (3.0 + (x))
     50
     51// we need to overload the __atomic_load_n because they don't support double
     52static inline double __atomic_load_n(volatile double * ptr, int mem) {
     53        volatile uint64_t * uptr = (volatile uint64_t *)ptr;
     54        _Static_assert(sizeof(*uptr) == sizeof(*ptr));
     55        uint64_t ret = 0;
     56        ret = __atomic_load_n(uptr, mem);
     57        uint64_t *rp = &ret;
     58        double ret = *(volatile double *)rp;
     59        /* paranoid */ verify( ret == 0 || ret > 3e-100 );
     60        return ret;
     61}
     62
     63// we need to overload the __atomic_store_n because they don't support double
     64static inline void __atomic_store_n(volatile double * ptr, double val, int mem) {
     65        /* paranoid */ verify( val == 0 || val > 3e-100 );
     66        volatile uint64_t * uptr = (volatile uint64_t *)ptr;
     67        _Static_assert(sizeof(*uptr) == sizeof(*ptr));
     68        uint64_t * valp = (uint64_t *)&val;
     69        __atomic_store_n(uptr, *valp, mem);
     70}
     71
     72#elif defined(READYQ_USE_LOGDBL_AVG)
     73
     74//convert to log2 scale but with fix point u32.32 values
     75static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { return ulog2_32_32(tsc); }
     76
     77// 8X factor, +3 in logscale (see above) is + 0x3.00000000
     78#define AVG_FACTOR( x ) (0x3_00000000ull + (x))
     79
     80#else
     81#error must pick a scheme for averaging
     82#endif
    2283
    2384//-----------------------------------------------------------------------
    2485// Calc moving average based on existing average, before and current time.
    25 static inline unsigned long long moving_average(unsigned long long currtsc, unsigned long long instsc, unsigned long long old_avg) {
    26         /* paranoid */ verifyf( old_avg < 15000000000000, "Suspiciously large previous average: %'llu (%llx)\n", old_avg, old_avg );
     86static inline __readyQ_avg_t moving_average(unsigned long long currtsc, unsigned long long intsc, __readyQ_avg_t old_avg, bool strict) {
     87        (void)strict; // disable the warning around the fact this is unused in release.
     88        /* paranoid */ warn_large_before;
    2789
    28         const unsigned long long new_val = currtsc > instsc ? currtsc - instsc : 0;
    29         const unsigned long long total_weight = 16;
    30         const unsigned long long new_weight   = 4;
    31         const unsigned long long old_weight = total_weight - new_weight;
    32         const unsigned long long ret = ((new_weight * new_val) + (old_weight * old_avg)) / total_weight;
     90        const unsigned long long new_val = currtsc > intsc ? currtsc - intsc : 0;
     91        const __readyQ_avg_t total_weight = 16;
     92        const __readyQ_avg_t new_weight   = 12;
     93        const __readyQ_avg_t old_weight = total_weight - new_weight;
     94        const __readyQ_avg_t ret = ((new_weight * __to_readyQ_avg(new_val)) + (old_weight * old_avg)) / total_weight;
     95
     96        /* paranoid */ warn_large_after;
    3397        return ret;
    3498}
    3599
    36 static inline void touch_tsc(__timestamp_t * tscs, size_t idx, unsigned long long ts_prev, unsigned long long ts_next) {
     100static inline void touch_tsc(__timestamp_t * tscs, size_t idx, unsigned long long ts_prev, unsigned long long ts_next, bool strict) {
    37101        if (ts_next == ULLONG_MAX) return;
    38102        unsigned long long now = rdtscl();
    39         unsigned long long pma = __atomic_load_n(&tscs[ idx ].t.ma, __ATOMIC_RELAXED);
     103        __readyQ_avg_t pma = __atomic_load_n(&tscs[ idx ].t.ma, __ATOMIC_RELAXED);
    40104        __atomic_store_n(&tscs[ idx ].t.tv, ts_next, __ATOMIC_RELAXED);
    41         __atomic_store_n(&tscs[ idx ].t.ma, moving_average(now, ts_prev, pma), __ATOMIC_RELAXED);
     105        __atomic_store_n(&tscs[ idx ].t.ma, moving_average(now, ts_prev, pma, strict), __ATOMIC_RELAXED);
    42106}
    43107
     
    45109// Calc age a timestamp should be before needing help.
    46110forall(Data_t * | { unsigned long long ts(Data_t & this); })
    47 static inline unsigned long long calc_cutoff(
     111static inline __readyQ_avg_t calc_cutoff(
    48112        const unsigned long long ctsc,
    49113        unsigned procid,
     
    51115        Data_t * data,
    52116        __timestamp_t * tscs,
    53         const unsigned shard_factor
     117        const unsigned shard_factor,
     118        bool strict
    54119) {
    55120        unsigned start = procid;
    56         unsigned long long max = 0;
     121        __readyQ_avg_t max = 0;
    57122        for(i; shard_factor) {
    58123                unsigned long long ptsc = ts(data[start + i]);
    59124                if(ptsc != ULLONG_MAX) {
    60125                        /* paranoid */ verify( start + i < count );
    61                         unsigned long long tsc = moving_average(ctsc, ptsc, tscs[start + i].t.ma);
    62                         if(tsc > max) max = tsc;
     126                        __readyQ_avg_t avg = moving_average(ctsc, ptsc, tscs[start + i].t.ma, strict);
     127                        if(avg > max) max = avg;
    63128                }
    64129        }
    65         return 8 * max;
     130        return AVG_FACTOR( max );
    66131}
    67132
  • libcfa/src/concurrency/kernel/fwd.hfa

    r9cd5bd2 rdf6cc9d  
    276276                        // intented to be use by wait, wait_any, waitfor, etc. rather than used directly
    277277                        bool retract( future_t & this, oneshot & wait_ctx ) {
    278                                 struct oneshot * expected = this.ptr;
     278                                struct oneshot * expected = &wait_ctx;
    279279
    280280                                // attempt to remove the context so it doesn't get consumed.
  • libcfa/src/concurrency/kernel/private.hfa

    r9cd5bd2 rdf6cc9d  
    5050        #endif
    5151#endif
     52// #define READYQ_USE_LINEAR_AVG
     53#define READYQ_USE_LOGDBL_AVG
     54// #define READYQ_USE_LOGINT_AVG
     55
     56#if   defined(READYQ_USE_LINEAR_AVG)
     57typedef unsigned long long __readyQ_avg_t;
     58#elif defined(READYQ_USE_LOGDBL_AVG)
     59typedef double __readyQ_avg_t;
     60#elif defined(READYQ_USE_LOGDBL_AVG)
     61typedef unsigned long long __readyQ_avg_t;
     62#else
     63#error must pick a scheme for averaging
     64#endif
    5265
    5366extern "C" {
     
    6578//-----------------------------------------------------------------------------
    6679// Scheduler
     80union __attribute__((aligned(64))) __timestamp_t {
     81        struct {
     82                volatile unsigned long long tv;
     83                volatile __readyQ_avg_t ma;
     84        } t;
     85        char __padding[192];
     86};
     87
    6788extern "C" {
    6889        void disable_interrupts() OPTIONAL_THREAD;
  • libcfa/src/concurrency/kernel/startup.cfa

    r9cd5bd2 rdf6cc9d  
    184184
    185185
     186extern void heapManagerCtor();
     187extern void heapManagerDtor();
     188
    186189//=============================================================================================
    187190// Kernel Setup logic
     
    374377        proc->local_data = &__cfaabi_tls;
    375378
     379        heapManagerCtor();                                                                      // initialize heap
     380
    376381        __cfa_io_start( proc );
    377382        register_tls( proc );
     
    425430        unregister_tls( proc );
    426431        __cfa_io_stop( proc );
     432
     433        heapManagerDtor();                                                                      // de-initialize heap
    427434
    428435        return 0p;
  • libcfa/src/concurrency/preemption.cfa

    r9cd5bd2 rdf6cc9d  
    104104static inline alarm_node_t * get_expired( alarm_list_t * alarms, Time currtime ) {
    105105        if( ! & (*alarms)`first ) return 0p;                                            // If no alarms return null
    106         if( (*alarms)`first.timeval >= currtime ) return 0p;    // If alarms head not expired return null
     106        if( (*alarms)`first.deadline >= currtime ) return 0p;   // If alarms head not expired return null
    107107        return pop(alarms);                                                                     // Otherwise just pop head
    108108}
     
    140140                if( period > 0 ) {
    141141                        __cfadbg_print_buffer_local( preemption, " KERNEL: alarm period is %lu.\n", period`ns );
    142                         node->timeval = currtime + period;  // Alarm is periodic, add currtime to it (used cached current time)
     142                        node->deadline = currtime + period;  // Alarm is periodic, add currtime to it (used cached current time)
    143143                        insert( alarms, node );             // Reinsert the node for the next time it triggers
    144144                }
     
    147147        // If there are still alarms pending, reset the timer
    148148        if( & (*alarms)`first ) {
    149                 Duration delta = (*alarms)`first.timeval - currtime;
     149                Duration delta = (*alarms)`first.deadline - currtime;
    150150                __kernel_set_timer( delta );
    151151        }
     
    232232// available.
    233233
    234 //-----------------------------------------------------------------------------
    235 // Some assembly required
    236 #define __cfaasm_label(label, when) when: asm volatile goto(".global __cfaasm_" #label "_" #when "\n" "__cfaasm_" #label "_" #when ":":::"memory":when)
    237 
    238234//----------
    239235// special case for preemption since used often
    240 __attribute__((optimize("no-reorder-blocks"))) bool __preemption_enabled() libcfa_nopreempt libcfa_public {
    241         // create a assembler label before
    242         // marked as clobber all to avoid movement
    243         __cfaasm_label(check, before);
    244 
     236bool __preemption_enabled() libcfa_nopreempt libcfa_public {
    245237        // access tls as normal
    246         bool enabled = __cfaabi_tls.preemption_state.enabled;
    247 
    248         // Check if there is a pending preemption
    249         processor   * proc = __cfaabi_tls.this_processor;
    250         bool pending = proc ? proc->pending_preemption : false;
    251         if( enabled && pending ) proc->pending_preemption = false;
    252 
    253         // create a assembler label after
    254         // marked as clobber all to avoid movement
    255         __cfaasm_label(check, after);
    256 
    257         // If we can preempt and there is a pending one
    258         // this is a good time to yield
    259         if( enabled && pending ) {
    260                 force_yield( __POLL_PREEMPTION );
    261         }
    262         return enabled;
    263 }
    264 
    265 struct asm_region {
    266         void * before;
    267         void * after;
    268 };
    269 
    270 static inline bool __cfaasm_in( void * ip, struct asm_region & region ) {
    271         return ip >= region.before && ip <= region.after;
     238        return __cfaabi_tls.preemption_state.enabled;
    272239}
    273240
     
    293260uintptr_t __cfatls_get( unsigned long int offset ) libcfa_nopreempt libcfa_public; //no inline to avoid problems
    294261uintptr_t __cfatls_get( unsigned long int offset ) {
    295         // create a assembler label before
    296         // marked as clobber all to avoid movement
    297         __cfaasm_label(get, before);
    298 
    299262        // access tls as normal (except for pointer arithmetic)
    300263        uintptr_t val = *(uintptr_t*)((uintptr_t)&__cfaabi_tls + offset);
    301264
    302         // create a assembler label after
    303         // marked as clobber all to avoid movement
    304         __cfaasm_label(get, after);
    305 
    306265        // This is used everywhere, to avoid cost, we DO NOT poll pending preemption
    307266        return val;
     
    310269extern "C" {
    311270        // Disable interrupts by incrementing the counter
    312         void disable_interrupts() libcfa_nopreempt libcfa_public {
    313                 // create a assembler label before
    314                 // marked as clobber all to avoid movement
    315                 __cfaasm_label(dsable, before);
    316 
    317                 with( __cfaabi_tls.preemption_state ) {
    318                         #if GCC_VERSION > 50000
    319                         static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free");
    320                         #endif
    321 
    322                         // Set enabled flag to false
    323                         // should be atomic to avoid preemption in the middle of the operation.
    324                         // use memory order RELAXED since there is no inter-thread on this variable requirements
    325                         __atomic_store_n(&enabled, false, __ATOMIC_RELAXED);
    326 
    327                         // Signal the compiler that a fence is needed but only for signal handlers
    328                         __atomic_signal_fence(__ATOMIC_ACQUIRE);
    329 
    330                         __attribute__((unused)) unsigned short new_val = disable_count + 1;
    331                         disable_count = new_val;
    332                         verify( new_val < 65_000u );              // If this triggers someone is disabling interrupts without enabling them
    333                 }
    334 
    335                 // create a assembler label after
    336                 // marked as clobber all to avoid movement
    337                 __cfaasm_label(dsable, after);
    338 
     271        void disable_interrupts() libcfa_nopreempt libcfa_public with( __cfaabi_tls.preemption_state ) {
     272                #if GCC_VERSION > 50000
     273                static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free");
     274                #endif
     275
     276                // Set enabled flag to false
     277                // should be atomic to avoid preemption in the middle of the operation.
     278                // use memory order RELAXED since there is no inter-thread on this variable requirements
     279                __atomic_store_n(&enabled, false, __ATOMIC_RELAXED);
     280
     281                // Signal the compiler that a fence is needed but only for signal handlers
     282                __atomic_signal_fence(__ATOMIC_ACQUIRE);
     283
     284                __attribute__((unused)) unsigned short new_val = disable_count + 1;
     285                disable_count = new_val;
     286                verify( new_val < 65_000u );              // If this triggers someone is disabling interrupts without enabling them
    339287        }
    340288
     
    379327        // i.e. on a real processor and not in the kernel
    380328        // (can return true even if no preemption was pending)
    381         bool poll_interrupts() libcfa_public {
     329        bool poll_interrupts() libcfa_nopreempt libcfa_public {
    382330                // Cache the processor now since interrupts can start happening after the atomic store
    383                 processor   * proc = publicTLS_get( this_processor );
     331                processor   * proc =  __cfaabi_tls.this_processor;
    384332                if ( ! proc ) return false;
    385                 if ( ! __preemption_enabled() ) return false;
    386 
    387                 with( __cfaabi_tls.preemption_state ){
    388                         // Signal the compiler that a fence is needed but only for signal handlers
    389                         __atomic_signal_fence(__ATOMIC_RELEASE);
    390                         if( proc->pending_preemption ) {
    391                                 proc->pending_preemption = false;
    392                                 force_yield( __POLL_PREEMPTION );
    393                         }
     333                if ( ! __cfaabi_tls.preemption_state.enabled ) return false;
     334
     335                // Signal the compiler that a fence is needed but only for signal handlers
     336                __atomic_signal_fence(__ATOMIC_RELEASE);
     337                if( unlikely( proc->pending_preemption ) ) {
     338                        proc->pending_preemption = false;
     339                        force_yield( __POLL_PREEMPTION );
    394340                }
    395341
  • libcfa/src/concurrency/ready_queue.cfa

    r9cd5bd2 rdf6cc9d  
    6262//-----------------------------------------------------------------------
    6363__attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, unpark_hint hint) with (cltr->sched) {
    64         processor * const proc = kernelTLS().this_processor;
     64        struct processor * const proc = kernelTLS().this_processor;
    6565        const bool external = (!proc) || (cltr != proc->cltr);
    6666        const bool remote   = hint == UNPARK_REMOTE;
     
    116116        /* paranoid */ verify( kernelTLS().this_processor->rdq.id < lanes_count );
    117117
    118         processor * const proc = kernelTLS().this_processor;
     118        struct processor * const proc = kernelTLS().this_processor;
    119119        unsigned this = proc->rdq.id;
    120120        /* paranoid */ verify( this < lanes_count );
     
    139139                /* paranoid */ verify( readyQ.tscs[target].t.tv != ULLONG_MAX );
    140140                if(target < lanes_count) {
    141                         const unsigned long long cutoff = calc_cutoff(ctsc, proc->rdq.id, lanes_count, cltr->sched.readyQ.data, cltr->sched.readyQ.tscs, __shard_factor.readyq);
    142                         const unsigned long long age = moving_average(ctsc, readyQ.tscs[target].t.tv, readyQ.tscs[target].t.ma);
     141                        const __readyQ_avg_t cutoff = calc_cutoff(ctsc, proc->rdq.id, lanes_count, cltr->sched.readyQ.data, cltr->sched.readyQ.tscs, __shard_factor.readyq, true);
     142                        const __readyQ_avg_t age = moving_average(ctsc, readyQ.tscs[target].t.tv, readyQ.tscs[target].t.ma, false);
    143143                        __cfadbg_print_safe(ready_queue, "Kernel : Help attempt on %u from %u, age %'llu vs cutoff %'llu, %s\n", target, this, age, cutoff, age > cutoff ? "yes" : "no");
    144144                        if(age > cutoff) {
     
    214214        __STATS( stats.success++; )
    215215
    216         touch_tsc(readyQ.tscs, w, ts_prev, ts_next);
     216        touch_tsc(readyQ.tscs, w, ts_prev, ts_next, true);
    217217
    218218        thrd->preferred = w / __shard_factor.readyq;
Note: See TracChangeset for help on using the changeset viewer.