Context Navigation

← Previous Change
Next Change →

kernel_private.hfa

Timestamp:

Jan 7, 2021, 3:27:00 PM (5 years ago)

Author:

Thierry Delisle <tdelisle@…>

Branches:

ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast-unique-expr, pthread-emulation, qualifiedEnum

Children:

2b4daf2, 64aeca0

Parents:

3c64c668 (diff), eef8dfb (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.

Message:

Merge branch 'master' into park_unpark

File:

: 1 edited

libcfa/src/concurrency/kernel_private.hfa (modified) (5 diffs)

Legend:

: Unmodified
: Added
: Removed

libcfa/src/concurrency/kernel_private.hfa

-              r3c64c668
+              r58fe85a
 // Created On       : Mon Feb 13 12:27:26 2017
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Sat Nov 30 19:25:02 2019
 // Update Count     : 8
+// Last Modified On : Wed Aug 12 08:21:33 2020
+// Update Count     : 9
 //
 …
 #include "alarm.hfa"
+#include "stats.hfa"
 //-----------------------------------------------------------------------------
 // Scheduler
+struct __attribute__((aligned(128))) __scheduler_lock_id_t;
 extern "C" {
 …
+}
+void __schedule_thread( $thread * ) __attribute__((nonnull (1)));
+//Block current thread and release/wake-up the following resources
+void __leave_thread() __attribute__((noreturn));
+void __schedule_thread( $thread * )
+#if defined(NDEBUG) || (!defined(__CFA_DEBUG__) && !defined(__CFA_VERIFY__))
+        __attribute__((nonnull (1)))
+#endif
+;
+extern bool __preemption_enabled();
+//release/wake-up the following resources
+void __thread_finish( $thread * thrd );
 //-----------------------------------------------------------------------------
 …
 void * __create_pthread( pthread_t *, void * (*)(void *), void * );
+static inline void wake_fast(processor * this) {
+        __cfaabi_dbg_print_safe("Kernel : Waking up processor %p\n", this);
+        post( this->idleLock );
+}
+static inline void wake(processor * this) {
+        disable_interrupts();
+        wake_fast(this);
+        enable_interrupts( __cfaabi_dbg_ctx );
+}
+struct event_kernel_t {
+        alarm_list_t alarms;
+        __spinlock_t lock;
+};
+extern event_kernel_t * event_kernel;
+struct __cfa_kernel_preemption_state_t {
+        bool enabled;
+        bool in_progress;
+        unsigned short disable_count;
+};
+extern volatile thread_local __cfa_kernel_preemption_state_t preemption_state __attribute__ ((tls_model ( "initial-exec" )));
+void __destroy_pthread( pthread_t pthread, void * stack, void ** retval );
+extern cluster * mainCluster;
 //-----------------------------------------------------------------------------
 …
+)
+#define TICKET_BLOCKED (-1) // thread is blocked
+#define TICKET_RUNNING ( 0) // thread is running
+#define TICKET_UNBLOCK ( 1) // thread should ignore next block
 //-----------------------------------------------------------------------------
 // Utils
-#define KERNEL_STORAGE(T,X) static char storage_##X[sizeof(T)]
-static inline uint32_t __tls_rand() {
-        kernelTLS.rand_seed ^= kernelTLS.rand_seed << 6;
-        kernelTLS.rand_seed ^= kernelTLS.rand_seed >> 21;
-        kernelTLS.rand_seed ^= kernelTLS.rand_seed << 7;
-        return kernelTLS.rand_seed;
+}
-void doregister( struct cluster & cltr );
-void unregister( struct cluster & cltr );
 void doregister( struct cluster * cltr, struct $thread & thrd );
 void unregister( struct cluster * cltr, struct $thread & thrd );
+void doregister( struct cluster * cltr, struct processor * proc );
+void unregister( struct cluster * cltr, struct processor * proc );
+//-----------------------------------------------------------------------------
+// I/O
+void ^?{}(io_context & this, bool );
+//=======================================================================
+// Cluster lock API
+//=======================================================================
+// Cells use by the reader writer lock
+// while not generic it only relies on a opaque pointer
+struct __attribute__((aligned(128))) __scheduler_lock_id_t {
+        // Spin lock used as the underlying lock
+        volatile bool lock;
+        // Handle pointing to the proc owning this cell
+        // Used for allocating cells and debugging
+        __processor_id_t * volatile handle;
+        #ifdef __CFA_WITH_VERIFY__
+                // Debug, check if this is owned for reading
+                bool owned;
+        #endif
+};
+static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t));
+// Lock-Free registering/unregistering of threads
+// Register a processor to a given cluster and get its unique id in return
+unsigned doregister( struct __processor_id_t * proc );
+// Unregister a processor from a given cluster using its id, getting back the original pointer
+void     unregister( struct __processor_id_t * proc );
+//-----------------------------------------------------------------------
+// Cluster idle lock/unlock
+static inline void lock(__cluster_idles & this) {
+        for() {
+                uint64_t l = this.lock;
+                if(
+                        (0 == (l % 2))
+                        && __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
+                ) return;
+                Pause();
+        }
+}
+static inline void unlock(__cluster_idles & this) {
+        /* paranoid */ verify( 1 == (this.lock % 2) );
+        __atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST );
+}
+//=======================================================================
+// Reader-writer lock implementation
+// Concurrent with doregister/unregister,
+//    i.e., threads can be added at any point during or between the entry/exit
+//-----------------------------------------------------------------------
+// simple spinlock underlying the RWLock
+// Blocking acquire
+static inline void __atomic_acquire(volatile bool * ll) {
+        while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) {
+                while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED))
+                        Pause();
+        }
+        /* paranoid */ verify(*ll);
+}
+// Non-Blocking acquire
+static inline bool __atomic_try_acquire(volatile bool * ll) {
+        return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST);
+}
+// Release
+static inline void __atomic_unlock(volatile bool * ll) {
+        /* paranoid */ verify(*ll);
+        __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
+}
+//-----------------------------------------------------------------------
+// Reader-Writer lock protecting the ready-queues
+// while this lock is mostly generic some aspects
+// have been hard-coded to for the ready-queue for
+// simplicity and performance
+struct __scheduler_RWLock_t {
+        // total cachelines allocated
+        unsigned int max;
+        // cachelines currently in use
+        volatile unsigned int alloc;
+        // cachelines ready to itereate over
+        // (!= to alloc when thread is in second half of doregister)
+        volatile unsigned int ready;
+        // writer lock
+        volatile bool lock;
+        // data pointer
+        __scheduler_lock_id_t * data;
+};
+void  ?{}(__scheduler_RWLock_t & this);
+void ^?{}(__scheduler_RWLock_t & this);
+extern __scheduler_RWLock_t * __scheduler_lock;
+//-----------------------------------------------------------------------
+// Reader side : acquire when using the ready queue to schedule but not
+//  creating/destroying queues
+static inline void ready_schedule_lock(void) with(*__scheduler_lock) {
+        /* paranoid */ verify( ! __preemption_enabled() );
+        /* paranoid */ verify( kernelTLS().this_proc_id );
+        unsigned iproc = kernelTLS().this_proc_id->id;
+        /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id);
+        /*paranoid*/ verify(iproc < ready);
+        // Step 1 : make sure no writer are in the middle of the critical section
+        while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED))
+                Pause();
+        // Fence needed because we don't want to start trying to acquire the lock
+        // before we read a false.
+        // Not needed on x86
+        // std::atomic_thread_fence(std::memory_order_seq_cst);
+        // Step 2 : acquire our local lock
+        __atomic_acquire( &data[iproc].lock );
+        /*paranoid*/ verify(data[iproc].lock);
+        #ifdef __CFA_WITH_VERIFY__
+                // Debug, check if this is owned for reading
+                data[iproc].owned = true;
+        #endif
+}
+static inline void ready_schedule_unlock(void) with(*__scheduler_lock) {
+        /* paranoid */ verify( ! __preemption_enabled() );
+        /* paranoid */ verify( kernelTLS().this_proc_id );
+        unsigned iproc = kernelTLS().this_proc_id->id;
+        /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id);
+        /*paranoid*/ verify(iproc < ready);
+        /*paranoid*/ verify(data[iproc].lock);
+        /*paranoid*/ verify(data[iproc].owned);
+        #ifdef __CFA_WITH_VERIFY__
+                // Debug, check if this is owned for reading
+                data[iproc].owned = false;
+        #endif
+        __atomic_unlock(&data[iproc].lock);
+}
+#ifdef __CFA_WITH_VERIFY__
+        static inline bool ready_schedule_islocked(void) {
+                /* paranoid */ verify( ! __preemption_enabled() );
+                /*paranoid*/ verify( kernelTLS().this_proc_id );
+                __processor_id_t * proc = kernelTLS().this_proc_id;
+                return __scheduler_lock->data[proc->id].owned;
+        }
+        static inline bool ready_mutate_islocked() {
+                return __scheduler_lock->lock;
+        }
+#endif
+//-----------------------------------------------------------------------
+// Writer side : acquire when changing the ready queue, e.g. adding more
+//  queues or removing them.
+uint_fast32_t ready_mutate_lock( void );
+void ready_mutate_unlock( uint_fast32_t /* value returned by lock */ );
+//=======================================================================
+// Ready-Queue API
+//-----------------------------------------------------------------------
+// pop thread from the ready queue of a cluster
+// returns 0p if empty
+__attribute__((hot)) bool query(struct cluster * cltr);
+//-----------------------------------------------------------------------
+// push thread onto a ready queue for a cluster
+// returns true if the list was previously empty, false otherwise
+__attribute__((hot)) bool push(struct cluster * cltr, struct $thread * thrd);
+//-----------------------------------------------------------------------
+// pop thread from the ready queue of a cluster
+// returns 0p if empty
+// May return 0p spuriously
+__attribute__((hot)) struct $thread * pop(struct cluster * cltr);
+//-----------------------------------------------------------------------
+// pop thread from the ready queue of a cluster
+// returns 0p if empty
+// guaranteed to find any threads added before this call
+__attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr);
+//-----------------------------------------------------------------------
+// remove thread from the ready queue of a cluster
+// returns bool if it wasn't found
+bool remove_head(struct cluster * cltr, struct $thread * thrd);
+//-----------------------------------------------------------------------
+// Increase the width of the ready queue (number of lanes) by 4
+void ready_queue_grow  (struct cluster * cltr, int target);
+//-----------------------------------------------------------------------
+// Decrease the width of the ready queue (number of lanes) by 4
+void ready_queue_shrink(struct cluster * cltr, int target);
 // Local Variables: //

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 58fe85a for libcfa/src/concurrency/kernel_private.hfa

Legend:

libcfa/src/concurrency/kernel_private.hfa

Download in other formats: