Context Navigation

← Previous Change
Next Change →

Changeset 7f6a7c9 for libcfa/src/concurrency/kernel

Timestamp:

Sep 21, 2022, 11:02:15 AM (22 months ago)

Author:

Thierry Delisle <tdelisle@…>

Branches:

ADT, ast-experimental, master, pthread-emulation

Children:

Parents:

428adbc (diff), 0bd46fd (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.

Message:

Merge branch 'master' into pthread-emulation

Location:

libcfa/src/concurrency/kernel

Files:

: 5 edited

cluster.cfa (modified) (7 diffs)
cluster.hfa (modified) (2 diffs)
fwd.hfa (modified) (14 diffs)
private.hfa (modified) (9 diffs)
startup.cfa (modified) (6 diffs)

Legend:

: Unmodified
: Added
: Removed

libcfa/src/concurrency/kernel/cluster.cfa

-                      r428adbc
+                      r7f6a7c9
 //=======================================================================
 void  ?{}(__scheduler_RWLock_t & this) {
         this.max   = __max_processors();
         this.alloc = 0;
         this.ready = 0;
         this.data  = alloc(this.max);
         this.write_lock  = false;
         /*paranoid*/ verify(__atomic_is_lock_free(sizeof(this.alloc), &this.alloc));
         /*paranoid*/ verify(__atomic_is_lock_free(sizeof(this.ready), &this.ready));
+        this.lock.max   = __max_processors();
+        this.lock.alloc = 0;
+        this.lock.ready = 0;
+        this.lock.data  = alloc(this.lock.max);
+        this.lock.write_lock  = false;
+        /*paranoid*/ verify(__atomic_is_lock_free(sizeof(this.lock.alloc), &this.lock.alloc));
+        /*paranoid*/ verify(__atomic_is_lock_free(sizeof(this.lock.ready), &this.lock.ready));
+}
 void ^?{}(__scheduler_RWLock_t & this) {
         free(this.data);
+        free(this.lock.data);
+}
 …
 //=======================================================================
 // Lock-Free registering/unregistering of threads
 unsigned register_proc_id( void ) with(*__scheduler_lock) {
+unsigned register_proc_id( void ) with(__scheduler_lock.lock) {
         __kernel_rseq_register();
 …
+        }
         if(max <= alloc) abort("Trying to create more than %ud processors", __scheduler_lock->max);
+        if(max <= alloc) abort("Trying to create more than %ud processors", __scheduler_lock.lock.max);
         // Step - 2 : F&A to get a new spot in the array.
         uint_fast32_t n = __atomic_fetch_add(&alloc, 1, __ATOMIC_SEQ_CST);
         if(max <= n) abort("Trying to create more than %ud processors", __scheduler_lock->max);
+        if(max <= n) abort("Trying to create more than %ud processors", __scheduler_lock.lock.max);
         // Step - 3 : Mark space as used and then publish it.
 …
+}
 void unregister_proc_id( unsigned id ) with(*__scheduler_lock) {
+void unregister_proc_id( unsigned id ) with(__scheduler_lock.lock) {
         /* paranoid */ verify(id < ready);
         /* paranoid */ verify(id == kernelTLS().sched_id);
 …
 // Writer side : acquire when changing the ready queue, e.g. adding more
 //  queues or removing them.
 uint_fast32_t ready_mutate_lock( void ) with(*__scheduler_lock) {
+uint_fast32_t ready_mutate_lock( void ) with(__scheduler_lock.lock) {
         /* paranoid */ verify( ! __preemption_enabled() );
 …
+}
 void ready_mutate_unlock( uint_fast32_t last_s ) with(*__scheduler_lock) {
+void ready_mutate_unlock( uint_fast32_t last_s ) with(__scheduler_lock.lock) {
         /* paranoid */ verify( ! __preemption_enabled() );
 …
 #if defined(CFA_HAVE_LINUX_IO_URING_H)
         static void assign_io($io_context ** data, size_t count, dlist(processor) & list) {
+        static void assign_io(io_context$ ** data, size_t count, dlist(processor) & list) {
                 processor * it = &list`first;
                 while(it) {

libcfa/src/concurrency/kernel/cluster.hfa

r428adbc	r7f6a7c9
24	24	// Calc moving average based on existing average, before and current time.
25	25	static inline unsigned long long moving_average(unsigned long long currtsc, unsigned long long instsc, unsigned long long old_avg) {
26		~~/* paranoid */ verifyf( currtsc < 45000000000000000, "Suspiciously large current time: %'llu (%llx)\n", currtsc, currtsc );~~
27		~~/* paranoid */ verifyf( instsc < 45000000000000000, "Suspiciously large insert time: %'llu (%llx)\n", instsc, instsc );~~
28	26	/* paranoid */ verifyf( old_avg < 15000000000000, "Suspiciously large previous average: %'llu (%llx)\n", old_avg, old_avg );
29	27
…	…
65	63	}
66	64	}
67		return ~~(max + 2 * max) / 2~~;
	65	return 8 * max;
68	66	}
69	67

libcfa/src/concurrency/kernel/fwd.hfa

-                      r428adbc
+                      r7f6a7c9
 extern "C" {
         extern "Cforall" {
                 extern __attribute__((aligned(64))) thread_local struct KernelThreadData {
+                extern __attribute__((aligned(64))) __thread struct KernelThreadData {
                         struct thread$          * volatile this_thread;
                         struct processor        * volatile this_processor;
 …
                 // Similar to a binary semaphore with a 'one shot' semantic
                 // is expected to be discarded after each party call their side
+                enum(struct thread$ *) { oneshot_ARMED = 0p, oneshot_FULFILLED = 1p };
                 struct oneshot {
                         // Internal state :
                         //     0p     : is initial state (wait will block)
                         //     1p     : fulfilled (wait won't block)
+                        // armed      : initial state, wait will block
+                        // fulfilled  : wait won't block
                         // any thread : a thread is currently waiting
                         struct thread$ * volatile ptr;
 …
                 static inline {
                         void  ?{}(oneshot & this) {
                                 this.ptr = 0p;
+                                this.ptr = oneshot_ARMED;
+                        }
 …
                                 for() {
                                         struct thread$ * expected = this.ptr;
                                         if(expected == 1p) return false;
+                                        if(expected == oneshot_FULFILLED) return false;
                                         if(__atomic_compare_exchange_n(&this.ptr, &expected, active_thread(), false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
                                                 park();
                                                 /* paranoid */ verify( this.ptr == 1p );
+                                                /* paranoid */ verify( this.ptr == oneshot_FULFILLED );
                                                 return true;
+                                        }
 …
                         // return true if a thread was unparked
                         thread$ * post(oneshot & this, bool do_unpark = true) {
                                 struct thread$ * got = __atomic_exchange_n( &this.ptr, 1p, __ATOMIC_SEQ_CST);
                                 if( got == 0p || got == 1p ) return 0p;
+                                struct thread$ * got = __atomic_exchange_n( &this.ptr, oneshot_FULFILLED, __ATOMIC_SEQ_CST);
+                                if( got == oneshot_ARMED || got == oneshot_FULFILLED ) return 0p;
                                 if(do_unpark) unpark( got );
                                 return got;
 …
                 // thread on "any of" [a given set of] futures.
                 // does not support multiple threads waiting on the same future
+                enum(struct oneshot *) { future_ARMED = 0p, future_FULFILLED = 1p, future_PROGRESS = 2p, future_ABANDONED = 3p };
                 struct future_t {
                         // Internal state :
                         //     0p      : is initial state (wait will block)
                         //     1p      : fulfilled (wait won't block)
                         //     2p      : in progress ()
                         //     3p      : abandoned, server should delete
+                        // armed       : initial state, wait will block
+                        // fulfilled   : result is ready, wait won't block
+                        // progress    : someone else is in the process of fulfilling this
+                        // abandoned   : client no longer cares, server should delete
                         // any oneshot : a context has been setup to wait, a thread could wait on it
                         struct oneshot * volatile ptr;
 …
                 static inline {
                         void  ?{}(future_t & this) {
                                 this.ptr = 0p;
+                                this.ptr = future_ARMED;
+                        }
 …
                         void reset(future_t & this) {
                                 // needs to be in 0p or 1p
                                 __atomic_exchange_n( &this.ptr, 0p, __ATOMIC_SEQ_CST);
+                                __atomic_exchange_n( &this.ptr, future_ARMED, __ATOMIC_SEQ_CST);
+                        }
                         // check if the future is available
                         bool available( future_t & this ) {
                                 while( this.ptr == 2p ) Pause();
                                 return this.ptr == 1p;
+                                while( this.ptr == future_PROGRESS ) Pause();
+                                return this.ptr == future_FULFILLED;
+                        }
 …
                         // intented to be use by wait, wait_any, waitfor, etc. rather than used directly
                         bool setup( future_t & this, oneshot & wait_ctx ) {
                                 /* paranoid */ verify( wait_ctx.ptr == 0p || wait_ctx.ptr == 1p );
+                                /* paranoid */ verify( wait_ctx.ptr == oneshot_ARMED || wait_ctx.ptr == oneshot_FULFILLED );
                                 // The future needs to set the wait context
                                 for() {
                                         struct oneshot * expected = this.ptr;
                                         // Is the future already fulfilled?
                                         if(expected == 1p) return false; // Yes, just return false (didn't block)
+                                        if(expected == future_FULFILLED) return false; // Yes, just return false (didn't block)
                                         // The future is not fulfilled, try to setup the wait context
 …
                                 // attempt to remove the context so it doesn't get consumed.
                                 if(__atomic_compare_exchange_n( &this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
+                                if(__atomic_compare_exchange_n( &this.ptr, &expected, future_ARMED, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
                                         // we still have the original context, then no one else saw it
                                         return false;
+                                }
                                 // expected == 0p: future was never actually setup, just return
                                 if( expected == 0p ) return false;
                                 // expected == 1p: the future is ready and the context was fully consumed
+                                // expected == ARMED: future was never actually setup, just return
+                                if( expected == future_ARMED ) return false;
+                                // expected == FULFILLED: the future is ready and the context was fully consumed
                                 // the server won't use the pointer again
                                 // It is safe to delete (which could happen after the return)
                                 if( expected == 1p ) return true;
                                 // expected == 2p: the future is ready but the context hasn't fully been consumed
+                                if( expected == future_FULFILLED ) return true;
+                                // expected == PROGRESS: the future is ready but the context hasn't fully been consumed
                                 // spin until it is safe to move on
                                 if( expected == 2p ) {
                                         while( this.ptr != 1p ) Pause();
                                         /* paranoid */ verify( this.ptr == 1p );
+                                if( expected == future_PROGRESS ) {
+                                        while( this.ptr != future_FULFILLED ) Pause();
+                                        /* paranoid */ verify( this.ptr == future_FULFILLED );
                                         return true;
+                                }
 …
                         // Mark the future as abandoned, meaning it will be deleted by the server
                         bool abandon( future_t & this ) {
                                 /* paranoid */ verify( this.ptr != 3p );
+                                /* paranoid */ verify( this.ptr != future_ABANDONED );
                                 // Mark the future as abandonned
                                 struct oneshot * got = __atomic_exchange_n( &this.ptr, 3p, __ATOMIC_SEQ_CST);
+                                struct oneshot * got = __atomic_exchange_n( &this.ptr, future_ABANDONED, __ATOMIC_SEQ_CST);
                                 // If the future isn't already fulfilled, let the server delete it
                                 if( got == 0p ) return false;
                                 // got == 2p: the future is ready but the context hasn't fully been consumed
+                                if( got == future_ARMED ) return false;
+                                // got == PROGRESS: the future is ready but the context hasn't fully been consumed
                                 // spin until it is safe to move on
                                 if( got == 2p ) {
                                         while( this.ptr != 1p ) Pause();
                                         got = 1p;
+                                if( got == future_PROGRESS ) {
+                                        while( this.ptr != future_FULFILLED ) Pause();
+                                        got = future_FULFILLED;
+                                }
                                 // The future is completed delete it now
                                 /* paranoid */ verify( this.ptr != 1p );
+                                /* paranoid */ verify( this.ptr != future_FULFILLED );
                                 free( &this );
                                 return true;
 …
                                                 #pragma GCC diagnostic ignored "-Wfree-nonheap-object"
                                         #endif
                                                 if( expected == 3p ) { free( &this ); return 0p; }
+                                                if( expected == future_ABANDONED ) { free( &this ); return 0p; }
                                         #if defined(__GNUC__) && __GNUC__ >= 7
                                                 #pragma GCC diagnostic pop
                                         #endif
                                         /* paranoid */ verify( expected != 1p ); // Future is already fulfilled, should not happen
                                         /* paranoid */ verify( expected != 2p ); // Future is bein fulfilled by someone else, this is even less supported then the previous case.
+                                        /* paranoid */ verify( expected != future_FULFILLED ); // Future is already fulfilled, should not happen
+                                        /* paranoid */ verify( expected != future_PROGRESS ); // Future is bein fulfilled by someone else, this is even less supported then the previous case.
                                         // If there is a wait context, we need to consume it and mark it as consumed after
                                         // If there is no context then we can skip the in progress phase
                                         struct oneshot * want = expected == 0p ? 1p : 2p;
+                                        struct oneshot * want = expected == future_ARMED ? future_FULFILLED : future_PROGRESS;
                                         if(__atomic_compare_exchange_n(&this.ptr, &expected, want, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
                                                 if( expected == 0p ) { return 0p; }
+                                                if( expected == future_ARMED ) { return 0p; }
                                                 thread$ * ret = post( *expected, do_unpark );
                                                 __atomic_store_n( &this.ptr, 1p, __ATOMIC_SEQ_CST);
+                                                __atomic_store_n( &this.ptr, future_FULFILLED, __ATOMIC_SEQ_CST);
                                                 return ret;
+                                        }
 …
                                 // Wait for the future to tru
                                 while( this.ptr == 2p ) Pause();
+                                while( this.ptr == future_PROGRESS ) Pause();
                                 // Make sure the state makes sense
                                 // Should be fulfilled, could be in progress but it's out of date if so
 …
                                 // and the oneshot should not be needed any more
                                 __attribute__((unused)) struct oneshot * was = this.ptr;
                                 /* paranoid */ verifyf( was == 1p, "Expected this.ptr to be 1p, was %p\n", was );
+                                /* paranoid */ verifyf( was == future_FULFILLED, "Expected this.ptr to be 1p, was %p\n", was );
                                 // Mark the future as fulfilled, to be consistent

libcfa/src/concurrency/kernel/private.hfa

-                      r428adbc
+                      r7f6a7c9
 #elif defined(CFA_HAVE_LINUX_RSEQ_H)
         extern "Cforall" {
                 extern __attribute__((aligned(64))) thread_local volatile struct rseq __cfaabi_rseq;
+                extern __attribute__((aligned(64))) __thread volatile struct rseq __cfaabi_rseq;
+        }
 #else
 …
 //-----------------------------------------------------------------------------
 // I/O
 $io_arbiter * create(void);
 void destroy($io_arbiter *);
+io_arbiter$ * create(void);
+void destroy(io_arbiter$ *);
 //=======================================================================
 …
 // Blocking acquire
 static inline void __atomic_acquire(volatile bool * ll) {
+        /* paranoid */ verify( ! __preemption_enabled() );
+        /* paranoid */ verify(ll);
         while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) {
                 while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED))
 …
+        }
         /* paranoid */ verify(*ll);
+        /* paranoid */ verify( ! __preemption_enabled() );
+}
 // Non-Blocking acquire
 static inline bool __atomic_try_acquire(volatile bool * ll) {
+        /* paranoid */ verify( ! __preemption_enabled() );
+        /* paranoid */ verify(ll);
         return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST);
+}
 …
 // Release
 static inline void __atomic_unlock(volatile bool * ll) {
+        /* paranoid */ verify( ! __preemption_enabled() );
+        /* paranoid */ verify(ll);
         /* paranoid */ verify(*ll);
         __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
 …
 // have been hard-coded to for the ready-queue for
 // simplicity and performance
+struct __scheduler_RWLock_t {
+        // total cachelines allocated
+        unsigned int max;
+        // cachelines currently in use
+        volatile unsigned int alloc;
+        // cachelines ready to itereate over
+        // (!= to alloc when thread is in second half of doregister)
+        volatile unsigned int ready;
+        // writer lock
+        volatile bool write_lock;
+        // data pointer
+        volatile bool * volatile * data;
+union __attribute__((aligned(64))) __scheduler_RWLock_t {
+        struct {
+                __attribute__((aligned(64))) char padding;
+                // total cachelines allocated
+                __attribute__((aligned(64))) unsigned int max;
+                // cachelines currently in use
+                volatile unsigned int alloc;
+                // cachelines ready to itereate over
+                // (!= to alloc when thread is in second half of doregister)
+                volatile unsigned int ready;
+                // writer lock
+                volatile bool write_lock;
+                // data pointer
+                volatile bool * volatile * data;
+        } lock;
+        char pad[192];
 };
 …
 void ^?{}(__scheduler_RWLock_t & this);
 extern __scheduler_RWLock_t * __scheduler_lock;
+extern __scheduler_RWLock_t __scheduler_lock;
 //-----------------------------------------------------------------------
 // Reader side : acquire when using the ready queue to schedule but not
 //  creating/destroying queues
 static inline void ready_schedule_lock(void) with(*__scheduler_lock) {
+static inline void ready_schedule_lock(void) with(__scheduler_lock.lock) {
         /* paranoid */ verify( ! __preemption_enabled() );
         /* paranoid */ verify( ! kernelTLS().in_sched_lock );
 …
+}
 static inline void ready_schedule_unlock(void) with(*__scheduler_lock) {
+static inline void ready_schedule_unlock(void) with(__scheduler_lock.lock) {
         /* paranoid */ verify( ! __preemption_enabled() );
         /* paranoid */ verify( data[kernelTLS().sched_id] == &kernelTLS().sched_lock );
 …
         static inline bool ready_mutate_islocked() {
                 return __scheduler_lock->write_lock;
+                return __scheduler_lock.lock.write_lock;
+        }
 #endif

libcfa/src/concurrency/kernel/startup.cfa

-                      r428adbc
+                      r7f6a7c9
 KERNEL_STORAGE(thread$,              mainThread);
 KERNEL_STORAGE(__stack_t,            mainThreadCtx);
 KERNEL_STORAGE(__scheduler_RWLock_t, __scheduler_lock);
+// KERNEL_STORAGE(__scheduler_RWLock_t, __scheduler_lock);
 KERNEL_STORAGE(eventfd_t,            mainIdleEventFd);
 KERNEL_STORAGE(io_future_t,          mainIdleFuture);
 …
 processor            * mainProcessor;
 thread$              * mainThread;
-__scheduler_RWLock_t * __scheduler_lock;
 extern "C" {
 …
 //-----------------------------------------------------------------------------
 // Global state
 thread_local struct KernelThreadData __cfaabi_tls __attribute__ ((tls_model ( "initial-exec" ))) @= {
+__thread struct KernelThreadData __cfaabi_tls __attribute__ ((tls_model ( "initial-exec" ))) @= {
         NULL,                                                                                           // cannot use 0p
         NULL,
 …
 };
+__scheduler_RWLock_t __scheduler_lock @= { 0 };
 #if   defined(CFA_HAVE_LINUX_LIBRSEQ)
         // No data needed
 #elif defined(CFA_HAVE_LINUX_RSEQ_H)
         extern "Cforall" {
                 __attribute__((aligned(64))) thread_local volatile struct rseq __cfaabi_rseq @= {
+                __attribute__((aligned(64))) __thread volatile struct rseq __cfaabi_rseq @= {
                         .cpu_id : RSEQ_CPU_ID_UNINITIALIZED,
                 };
 …
         // Initialize the global scheduler lock
         __scheduler_lock = (__scheduler_RWLock_t*)&storage___scheduler_lock;
         (*__scheduler_lock){};
+        // __scheduler_lock = (__scheduler_RWLock_t*)&storage___scheduler_lock;
+        (__scheduler_lock){};
         // Initialize the main cluster
 …
         ^(*mainCluster){};
         ^(*__scheduler_lock){};
+        ^(__scheduler_lock){};
         ^(__cfa_dbg_global_clusters.list){};

Note: See TracChangeset for help on using the changeset viewer.

Download in other formats: