Context Navigation

← Previous Change
Next Change →

Changeset df6cc9d for libcfa/src/concurrency

Timestamp:

Oct 19, 2022, 4:43:26 PM (3 years ago)

Author:

Thierry Delisle <tdelisle@…>

Branches:

ADT, ast-experimental, master

Children:

Parents:

9cd5bd2 (diff), 135143ba (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.

Message:

Merge branch 'master' into pthread-emulation

Location:

libcfa/src/concurrency

Files:

: 12 edited

alarm.cfa (modified) (5 diffs)
alarm.hfa (modified) (1 diff)
io.cfa (modified) (11 diffs)
io/types.hfa (modified) (2 diffs)
kernel.hfa (modified) (3 diffs)
kernel/cluster.cfa (modified) (3 diffs)
kernel/cluster.hfa (modified) (3 diffs)
kernel/fwd.hfa (modified) (1 diff)
kernel/private.hfa (modified) (2 diffs)
kernel/startup.cfa (modified) (3 diffs)
preemption.cfa (modified) (7 diffs)
ready_queue.cfa (modified) (4 diffs)

Legend:

: Unmodified
: Added
: Removed

libcfa/src/concurrency/alarm.cfa

-              r9cd5bd2
+              rdf6cc9d
         this.period  = period;
         this.thrd = thrd;
         this.timeval = __kernel_get_time() + alarm;
+        this.deadline = __kernel_get_time() + alarm;
         set = false;
         type = User;
 …
         this.period  = period;
         this.proc = proc;
         this.timeval = __kernel_get_time() + alarm;
+        this.deadline = __kernel_get_time() + alarm;
         set = false;
         type = Kernel;
 …
         this.initial = alarm;
         this.period  = period;
         this.timeval = __kernel_get_time() + alarm;
+        this.deadline = __kernel_get_time() + alarm;
         set = false;
         type = Callback;
 …
 void insert( alarm_list_t * this, alarm_node_t * n ) {
         alarm_node_t * it = & (*this)`first;
         while( it && (n->timeval > it->timeval) ) {
+        while( it && (n->deadline > it->deadline) ) {
                 it = & (*it)`next;
+        }
 …
                 Time curr = __kernel_get_time();
                 __cfadbg_print_safe( preemption, " KERNEL: alarm inserting %p (%lu -> %lu).\n", this, curr.tn, this->timeval.tn );
+                __cfadbg_print_safe( preemption, " KERNEL: alarm inserting %p (%lu -> %lu).\n", this, curr.tn, this->deadline.tn );
                 insert( &alarms, this );
                 __kernel_set_timer( this->timeval - curr);
+                __kernel_set_timer( this->deadline - curr);
                 this->set = true;
+        }

libcfa/src/concurrency/alarm.hfa

r9cd5bd2	rdf6cc9d
57	57	};
58	58
59		Time ~~timeval;~~ // actual time at which the alarm goes off
	59	Time deadline; // actual time at which the alarm goes off
60	60	enum alarm_type type; // true if this is not a user defined alarm
61	61	bool set :1; // whether or not the alarm has be registered

libcfa/src/concurrency/io.cfa

-              r9cd5bd2
+              rdf6cc9d
                 __atomic_unlock(&ctx->cq.lock);
                 touch_tsc( cltr->sched.io.tscs, ctx->cq.id, ts_prev, ts_next );
+                touch_tsc( cltr->sched.io.tscs, ctx->cq.id, ts_prev, ts_next, false );
                 return true;
+        }
         bool __cfa_io_drain( processor * proc ) {
+        bool __cfa_io_drain( struct processor * proc ) {
                 bool local = false;
                 bool remote = false;
 …
                                 /* paranoid */ verify( io.tscs[target].t.tv != ULLONG_MAX );
                                 HELP: if(target < ctxs_count) {
                                         const unsigned long long cutoff = calc_cutoff(ctsc, ctx->cq.id, ctxs_count, io.data, io.tscs, __shard_factor.io);
                                         const unsigned long long age = moving_average(ctsc, io.tscs[target].t.tv, io.tscs[target].t.ma);
+                                        const __readyQ_avg_t cutoff = calc_cutoff(ctsc, ctx->cq.id, ctxs_count, io.data, io.tscs, __shard_factor.io, false);
+                                        const __readyQ_avg_t age = moving_average(ctsc, io.tscs[target].t.tv, io.tscs[target].t.ma, false);
                                         __cfadbg_print_safe(io, "Kernel I/O: Help attempt on %u from %u, age %'llu vs cutoff %'llu, %s\n", target, ctx->cq.id, age, cutoff, age > cutoff ? "yes" : "no");
                                         if(age <= cutoff) break HELP;
 …
+        }
         bool __cfa_io_flush( processor * proc ) {
+        bool __cfa_io_flush( struct processor * proc ) {
                 /* paranoid */ verify( ! __preemption_enabled() );
                 /* paranoid */ verify( proc );
 …
                 disable_interrupts();
                 processor * proc = __cfaabi_tls.this_processor;
+                struct processor * proc = __cfaabi_tls.this_processor;
                 io_context$ * ctx = proc->io.ctx;
                 /* paranoid */ verify( __cfaabi_tls.this_processor );
 …
                 disable_interrupts();
                 __STATS__( true, if(!lazy) io.submit.eagr += 1; )
                 processor * proc = __cfaabi_tls.this_processor;
+                struct processor * proc = __cfaabi_tls.this_processor;
                 io_context$ * ctx = proc->io.ctx;
                 /* paranoid */ verify( __cfaabi_tls.this_processor );
 …
                 enqueue(this.pending, (__outstanding_io&)pa);
                 wait( pa.sem );
+                wait( pa.waitctx );
                 return pa.ctx;
 …
                                 pa.ctx = ctx;
                                 post( pa.sem );
+                                post( pa.waitctx );
+                        }
 …
+                }
                 wait( ei.sem );
+                wait( ei.waitctx );
                 __cfadbg_print_safe(io, "Kernel I/O : %u submitted from arbiter\n", have);
 …
                                         __submit_only(&ctx, ei.idxs, ei.have);
                                         post( ei.sem );
+                                        post( ei.waitctx );
+                                }
 …
         #if defined(CFA_WITH_IO_URING_IDLE)
                 bool __kernel_read(processor * proc, io_future_t & future, iovec & iov, int fd) {
+                bool __kernel_read(struct processor * proc, io_future_t & future, iovec & iov, int fd) {
                         io_context$ * ctx = proc->io.ctx;
                         /* paranoid */ verify( ! __preemption_enabled() );
 …
+                }
                 void __cfa_io_idle( processor * proc ) {
+                void __cfa_io_idle( struct processor * proc ) {
                         iovec iov;
                         __atomic_acquire( &proc->io.ctx->cq.lock );

libcfa/src/concurrency/io/types.hfa

-              r9cd5bd2
+              rdf6cc9d
         struct __outstanding_io {
                 inline Colable;
                 single_sem sem;
+                oneshot waitctx;
         };
         static inline __outstanding_io *& Next( __outstanding_io * n ) { return (__outstanding_io *)Next( (Colable *)n ); }
 …
         struct __attribute__((aligned(64))) io_context$ {
                 io_arbiter$ * arbiter;
                 processor * proc;
+                struct processor * proc;
                 __outstanding_io_queue ext_sq;

libcfa/src/concurrency/kernel.hfa

-              r9cd5bd2
+              rdf6cc9d
         // Link lists fields
         inline dlink(processor);
+        dlink(processor) link;
         // special init fields
 …
 #endif
 };
+P9_EMBEDDED( processor, dlink(processor) )
+// P9_EMBEDDED( processor, dlink(processor) )
+static inline tytagref( dlink(processor), dlink(processor) ) ?`inner( processor & this ) {
+    dlink(processor) & b = this.link;
+    tytagref( dlink(processor), dlink(processor) ) result = { b };
+    return result;
+}
 void  ?{}(processor & this, const char name[], struct cluster & cltr);
 …
 // Aligned timestamps which are used by the ready queue and io subsystem
+union __attribute__((aligned(64))) __timestamp_t {
+        struct {
+                volatile unsigned long long tv;
+                volatile unsigned long long ma;
+        } t;
+        char __padding[192];
+};
+static inline void  ?{}(__timestamp_t & this) { this.t.tv = 0; this.t.ma = 0; }
+static inline void ^?{}(__timestamp_t &) {}
+union __attribute__((aligned(64))) __timestamp_t;
+void  ?{}(__timestamp_t & this);
+void ^?{}(__timestamp_t &);

libcfa/src/concurrency/kernel/cluster.cfa

-              r9cd5bd2
+              rdf6cc9d
 static const unsigned __readyq_single_shard = 2;
+void  ?{}(__timestamp_t & this) { this.t.tv = 0; this.t.ma = 0; }
+void ^?{}(__timestamp_t &) {}
 //-----------------------------------------------------------------------
 // Check that all the intrusive queues in the data structure are still consistent
 …
+}
 static void assign_list(unsigned & valrq, unsigned & valio, dlist(processor) & list, unsigned count) {
         processor * it = &list`first;
+static void assign_list(unsigned & valrq, unsigned & valio, dlist(struct processor) & list, unsigned count) {
+        struct processor * it = &list`first;
         for(unsigned i = 0; i < count; i++) {
                 /* paranoid */ verifyf( it, "Unexpected null iterator, at index %u of %u\n", i, count);
 …
 #if defined(CFA_HAVE_LINUX_IO_URING_H)
         static void assign_io(io_context$ ** data, size_t count, dlist(processor) & list) {
                 processor * it = &list`first;
+        static void assign_io(io_context$ ** data, size_t count, dlist(struct processor) & list) {
+                struct processor * it = &list`first;
                 while(it) {
                         /* paranoid */ verifyf( it, "Unexpected null iterator\n");

libcfa/src/concurrency/kernel/cluster.hfa

-              r9cd5bd2
+              rdf6cc9d
 #include "device/cpu.hfa"
 #include "kernel/private.hfa"
+#include "math.hfa"
 #include <limits.h>
+#include <inttypes.h>
+#include "clock.hfa"
+#if   defined(READYQ_USE_LINEAR_AVG)
+// no conversion needed in this case
+static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { return intsc; }
+// warn normally all ints
+#define warn_large_before warnf( !strict || old_avg < 33_000_000_000, "Suspiciously large previous average: %'llu (%llx), %'" PRId64 "ms \n", old_avg, old_avg, program()`ms )
+#define warn_large_after warnf( !strict || ret < 33_000_000_000, "Suspiciously large new average after %'" PRId64 "ms cputime: %'llu (%llx) from %'llu-%'llu (%'llu, %'llu) and %'llu\n", program()`ms, ret, ret, currtsc, intsc, new_val, new_val / 1000000, old_avg )
+// 8X linear factor is just 8 * x
+#define AVG_FACTOR( x ) (8 * (x))
+#elif defined(READYQ_USE_LOGDBL_AVG)
+// convert to log2 scale but using double
+static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { if(unlikely(0 == intsc)) return 0.0; else return log2(intsc); }
+#define warn_large_before warnf( !strict || old_avg < 35.0, "Suspiciously large previous average: %'lf, %'" PRId64 "ms \n", old_avg, program()`ms )
+#define warn_large_after warnf( !strict || ret < 35.3, "Suspiciously large new average after %'" PRId64 "ms cputime: %'lf from %'llu-%'llu (%'llu, %'llu) and %'lf\n", program()`ms, ret, currtsc, intsc, new_val, new_val / 1000000, old_avg ); \
+verify(ret >= 0)
+// 8X factor in logscale is log2(8X) = log2(8) + log2(X) = 3 + log2(X)
+#define AVG_FACTOR( x ) (3.0 + (x))
+// we need to overload the __atomic_load_n because they don't support double
+static inline double __atomic_load_n(volatile double * ptr, int mem) {
+        volatile uint64_t * uptr = (volatile uint64_t *)ptr;
+        _Static_assert(sizeof(*uptr) == sizeof(*ptr));
+        uint64_t ret = 0;
+        ret = __atomic_load_n(uptr, mem);
+        uint64_t *rp = &ret;
+        double ret = *(volatile double *)rp;
+        /* paranoid */ verify( ret == 0 || ret > 3e-100 );
+        return ret;
+}
+// we need to overload the __atomic_store_n because they don't support double
+static inline void __atomic_store_n(volatile double * ptr, double val, int mem) {
+        /* paranoid */ verify( val == 0 || val > 3e-100 );
+        volatile uint64_t * uptr = (volatile uint64_t *)ptr;
+        _Static_assert(sizeof(*uptr) == sizeof(*ptr));
+        uint64_t * valp = (uint64_t *)&val;
+        __atomic_store_n(uptr, *valp, mem);
+}
+#elif defined(READYQ_USE_LOGDBL_AVG)
+//convert to log2 scale but with fix point u32.32 values
+static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { return ulog2_32_32(tsc); }
+// 8X factor, +3 in logscale (see above) is + 0x3.00000000
+#define AVG_FACTOR( x ) (0x3_00000000ull + (x))
+#else
+#error must pick a scheme for averaging
+#endif
 //-----------------------------------------------------------------------
 // Calc moving average based on existing average, before and current time.
+static inline unsigned long long moving_average(unsigned long long currtsc, unsigned long long instsc, unsigned long long old_avg) {
+        /* paranoid */ verifyf( old_avg < 15000000000000, "Suspiciously large previous average: %'llu (%llx)\n", old_avg, old_avg );
+static inline __readyQ_avg_t moving_average(unsigned long long currtsc, unsigned long long intsc, __readyQ_avg_t old_avg, bool strict) {
+        (void)strict; // disable the warning around the fact this is unused in release.
+        /* paranoid */ warn_large_before;
+        const unsigned long long new_val = currtsc > instsc ? currtsc - instsc : 0;
+        const unsigned long long total_weight = 16;
+        const unsigned long long new_weight   = 4;
+        const unsigned long long old_weight = total_weight - new_weight;
+        const unsigned long long ret = ((new_weight * new_val) + (old_weight * old_avg)) / total_weight;
+        const unsigned long long new_val = currtsc > intsc ? currtsc - intsc : 0;
+        const __readyQ_avg_t total_weight = 16;
+        const __readyQ_avg_t new_weight   = 12;
+        const __readyQ_avg_t old_weight = total_weight - new_weight;
+        const __readyQ_avg_t ret = ((new_weight * __to_readyQ_avg(new_val)) + (old_weight * old_avg)) / total_weight;
+        /* paranoid */ warn_large_after;
         return ret;
+}
 static inline void touch_tsc(__timestamp_t * tscs, size_t idx, unsigned long long ts_prev, unsigned long long ts_next) {
+static inline void touch_tsc(__timestamp_t * tscs, size_t idx, unsigned long long ts_prev, unsigned long long ts_next, bool strict) {
         if (ts_next == ULLONG_MAX) return;
         unsigned long long now = rdtscl();
         unsigned long long pma = __atomic_load_n(&tscs[ idx ].t.ma, __ATOMIC_RELAXED);
+        __readyQ_avg_t pma = __atomic_load_n(&tscs[ idx ].t.ma, __ATOMIC_RELAXED);
         __atomic_store_n(&tscs[ idx ].t.tv, ts_next, __ATOMIC_RELAXED);
         __atomic_store_n(&tscs[ idx ].t.ma, moving_average(now, ts_prev, pma), __ATOMIC_RELAXED);
+        __atomic_store_n(&tscs[ idx ].t.ma, moving_average(now, ts_prev, pma, strict), __ATOMIC_RELAXED);
+}
 …
 // Calc age a timestamp should be before needing help.
 forall(Data_t * | { unsigned long long ts(Data_t & this); })
 static inline unsigned long long calc_cutoff(
+static inline __readyQ_avg_t calc_cutoff(
         const unsigned long long ctsc,
         unsigned procid,
 …
         Data_t * data,
         __timestamp_t * tscs,
+        const unsigned shard_factor
+        const unsigned shard_factor,
+        bool strict
 ) {
         unsigned start = procid;
         unsigned long long max = 0;
+        __readyQ_avg_t max = 0;
         for(i; shard_factor) {
                 unsigned long long ptsc = ts(data[start + i]);
                 if(ptsc != ULLONG_MAX) {
                         /* paranoid */ verify( start + i < count );
                         unsigned long long tsc = moving_average(ctsc, ptsc, tscs[start + i].t.ma);
                         if(tsc > max) max = tsc;
+                        __readyQ_avg_t avg = moving_average(ctsc, ptsc, tscs[start + i].t.ma, strict);
+                        if(avg > max) max = avg;
+                }
+        }
         return 8 * max;
+        return AVG_FACTOR( max );
+}

libcfa/src/concurrency/kernel/fwd.hfa

r9cd5bd2	rdf6cc9d
276	276	// intented to be use by wait, wait_any, waitfor, etc. rather than used directly
277	277	bool retract( future_t & this, oneshot & wait_ctx ) {
278		struct oneshot * expected = ~~this.ptr~~;
	278	struct oneshot * expected = &wait_ctx;
279	279
280	280	// attempt to remove the context so it doesn't get consumed.

libcfa/src/concurrency/kernel/private.hfa

-              r9cd5bd2
+              rdf6cc9d
         #endif
 #endif
+// #define READYQ_USE_LINEAR_AVG
+#define READYQ_USE_LOGDBL_AVG
+// #define READYQ_USE_LOGINT_AVG
+#if   defined(READYQ_USE_LINEAR_AVG)
+typedef unsigned long long __readyQ_avg_t;
+#elif defined(READYQ_USE_LOGDBL_AVG)
+typedef double __readyQ_avg_t;
+#elif defined(READYQ_USE_LOGDBL_AVG)
+typedef unsigned long long __readyQ_avg_t;
+#else
+#error must pick a scheme for averaging
+#endif
 extern "C" {
 …
 //-----------------------------------------------------------------------------
 // Scheduler
+union __attribute__((aligned(64))) __timestamp_t {
+        struct {
+                volatile unsigned long long tv;
+                volatile __readyQ_avg_t ma;
+        } t;
+        char __padding[192];
+};
 extern "C" {
         void disable_interrupts() OPTIONAL_THREAD;

libcfa/src/concurrency/kernel/startup.cfa

-              r9cd5bd2
+              rdf6cc9d
+extern void heapManagerCtor();
+extern void heapManagerDtor();
 //=============================================================================================
 // Kernel Setup logic
 …
         proc->local_data = &__cfaabi_tls;
+        heapManagerCtor();                                                                      // initialize heap
         __cfa_io_start( proc );
         register_tls( proc );
 …
         unregister_tls( proc );
         __cfa_io_stop( proc );
+        heapManagerDtor();                                                                      // de-initialize heap
         return 0p;

libcfa/src/concurrency/preemption.cfa

-              r9cd5bd2
+              rdf6cc9d
 static inline alarm_node_t * get_expired( alarm_list_t * alarms, Time currtime ) {
         if( ! & (*alarms)`first ) return 0p;                                            // If no alarms return null
         if( (*alarms)`first.timeval >= currtime ) return 0p;    // If alarms head not expired return null
+        if( (*alarms)`first.deadline >= currtime ) return 0p;   // If alarms head not expired return null
         return pop(alarms);                                                                     // Otherwise just pop head
+}
 …
                 if( period > 0 ) {
                         __cfadbg_print_buffer_local( preemption, " KERNEL: alarm period is %lu.\n", period`ns );
                         node->timeval = currtime + period;  // Alarm is periodic, add currtime to it (used cached current time)
+                        node->deadline = currtime + period;  // Alarm is periodic, add currtime to it (used cached current time)
                         insert( alarms, node );             // Reinsert the node for the next time it triggers
+                }
 …
         // If there are still alarms pending, reset the timer
         if( & (*alarms)`first ) {
                 Duration delta = (*alarms)`first.timeval - currtime;
+                Duration delta = (*alarms)`first.deadline - currtime;
                 __kernel_set_timer( delta );
+        }
 …
 // available.
-//-----------------------------------------------------------------------------
-// Some assembly required
-#define __cfaasm_label(label, when) when: asm volatile goto(".global __cfaasm_" #label "_" #when "\n" "__cfaasm_" #label "_" #when ":":::"memory":when)
 //----------
 // special case for preemption since used often
+__attribute__((optimize("no-reorder-blocks"))) bool __preemption_enabled() libcfa_nopreempt libcfa_public {
+        // create a assembler label before
+        // marked as clobber all to avoid movement
+        __cfaasm_label(check, before);
+bool __preemption_enabled() libcfa_nopreempt libcfa_public {
         // access tls as normal
+        bool enabled = __cfaabi_tls.preemption_state.enabled;
+        // Check if there is a pending preemption
+        processor   * proc = __cfaabi_tls.this_processor;
+        bool pending = proc ? proc->pending_preemption : false;
+        if( enabled && pending ) proc->pending_preemption = false;
+        // create a assembler label after
+        // marked as clobber all to avoid movement
+        __cfaasm_label(check, after);
+        // If we can preempt and there is a pending one
+        // this is a good time to yield
+        if( enabled && pending ) {
+                force_yield( __POLL_PREEMPTION );
+        }
+        return enabled;
+}
+struct asm_region {
+        void * before;
+        void * after;
+};
+static inline bool __cfaasm_in( void * ip, struct asm_region & region ) {
+        return ip >= region.before && ip <= region.after;
+        return __cfaabi_tls.preemption_state.enabled;
+}
 …
 uintptr_t __cfatls_get( unsigned long int offset ) libcfa_nopreempt libcfa_public; //no inline to avoid problems
 uintptr_t __cfatls_get( unsigned long int offset ) {
-        // create a assembler label before
-        // marked as clobber all to avoid movement
-        __cfaasm_label(get, before);
         // access tls as normal (except for pointer arithmetic)
         uintptr_t val = *(uintptr_t*)((uintptr_t)&__cfaabi_tls + offset);
-        // create a assembler label after
-        // marked as clobber all to avoid movement
-        __cfaasm_label(get, after);
         // This is used everywhere, to avoid cost, we DO NOT poll pending preemption
         return val;
 …
 extern "C" {
         // Disable interrupts by incrementing the counter
+        void disable_interrupts() libcfa_nopreempt libcfa_public {
+                // create a assembler label before
+                // marked as clobber all to avoid movement
+                __cfaasm_label(dsable, before);
+                with( __cfaabi_tls.preemption_state ) {
+                        #if GCC_VERSION > 50000
+                        static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free");
+                        #endif
+                        // Set enabled flag to false
+                        // should be atomic to avoid preemption in the middle of the operation.
+                        // use memory order RELAXED since there is no inter-thread on this variable requirements
+                        __atomic_store_n(&enabled, false, __ATOMIC_RELAXED);
+                        // Signal the compiler that a fence is needed but only for signal handlers
+                        __atomic_signal_fence(__ATOMIC_ACQUIRE);
+                        __attribute__((unused)) unsigned short new_val = disable_count + 1;
+                        disable_count = new_val;
+                        verify( new_val < 65_000u );              // If this triggers someone is disabling interrupts without enabling them
+                }
+                // create a assembler label after
+                // marked as clobber all to avoid movement
+                __cfaasm_label(dsable, after);
+        void disable_interrupts() libcfa_nopreempt libcfa_public with( __cfaabi_tls.preemption_state ) {
+                #if GCC_VERSION > 50000
+                static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free");
+                #endif
+                // Set enabled flag to false
+                // should be atomic to avoid preemption in the middle of the operation.
+                // use memory order RELAXED since there is no inter-thread on this variable requirements
+                __atomic_store_n(&enabled, false, __ATOMIC_RELAXED);
+                // Signal the compiler that a fence is needed but only for signal handlers
+                __atomic_signal_fence(__ATOMIC_ACQUIRE);
+                __attribute__((unused)) unsigned short new_val = disable_count + 1;
+                disable_count = new_val;
+                verify( new_val < 65_000u );              // If this triggers someone is disabling interrupts without enabling them
+        }
 …
         // i.e. on a real processor and not in the kernel
         // (can return true even if no preemption was pending)
         bool poll_interrupts() libcfa_public {
+        bool poll_interrupts() libcfa_nopreempt libcfa_public {
                 // Cache the processor now since interrupts can start happening after the atomic store
                 processor   * proc = publicTLS_get( this_processor );
+                processor   * proc =  __cfaabi_tls.this_processor;
                 if ( ! proc ) return false;
+                if ( ! __preemption_enabled() ) return false;
+                with( __cfaabi_tls.preemption_state ){
+                        // Signal the compiler that a fence is needed but only for signal handlers
+                        __atomic_signal_fence(__ATOMIC_RELEASE);
+                        if( proc->pending_preemption ) {
+                                proc->pending_preemption = false;
+                                force_yield( __POLL_PREEMPTION );
+                        }
+                if ( ! __cfaabi_tls.preemption_state.enabled ) return false;
+                // Signal the compiler that a fence is needed but only for signal handlers
+                __atomic_signal_fence(__ATOMIC_RELEASE);
+                if( unlikely( proc->pending_preemption ) ) {
+                        proc->pending_preemption = false;
+                        force_yield( __POLL_PREEMPTION );
+                }

libcfa/src/concurrency/ready_queue.cfa

-              r9cd5bd2
+              rdf6cc9d
 //-----------------------------------------------------------------------
 __attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, unpark_hint hint) with (cltr->sched) {
         processor * const proc = kernelTLS().this_processor;
+        struct processor * const proc = kernelTLS().this_processor;
         const bool external = (!proc) || (cltr != proc->cltr);
         const bool remote   = hint == UNPARK_REMOTE;
 …
         /* paranoid */ verify( kernelTLS().this_processor->rdq.id < lanes_count );
         processor * const proc = kernelTLS().this_processor;
+        struct processor * const proc = kernelTLS().this_processor;
         unsigned this = proc->rdq.id;
         /* paranoid */ verify( this < lanes_count );
 …
                 /* paranoid */ verify( readyQ.tscs[target].t.tv != ULLONG_MAX );
                 if(target < lanes_count) {
                         const unsigned long long cutoff = calc_cutoff(ctsc, proc->rdq.id, lanes_count, cltr->sched.readyQ.data, cltr->sched.readyQ.tscs, __shard_factor.readyq);
                         const unsigned long long age = moving_average(ctsc, readyQ.tscs[target].t.tv, readyQ.tscs[target].t.ma);
+                        const __readyQ_avg_t cutoff = calc_cutoff(ctsc, proc->rdq.id, lanes_count, cltr->sched.readyQ.data, cltr->sched.readyQ.tscs, __shard_factor.readyq, true);
+                        const __readyQ_avg_t age = moving_average(ctsc, readyQ.tscs[target].t.tv, readyQ.tscs[target].t.ma, false);
                         __cfadbg_print_safe(ready_queue, "Kernel : Help attempt on %u from %u, age %'llu vs cutoff %'llu, %s\n", target, this, age, cutoff, age > cutoff ? "yes" : "no");
                         if(age > cutoff) {
 …
         __STATS( stats.success++; )
         touch_tsc(readyQ.tscs, w, ts_prev, ts_next);
+        touch_tsc(readyQ.tscs, w, ts_prev, ts_next, true);
         thrd->preferred = w / __shard_factor.readyq;

Note: See TracChangeset for help on using the changeset viewer.

Download in other formats: