// // Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo // // The contents of this file are covered under the licence agreement in the // file "LICENCE" distributed with Cforall. // // kernel_private.hfa -- // // Author : Thierry Delisle // Created On : Mon Feb 13 12:27:26 2017 // Last Modified By : Peter A. Buhr // Last Modified On : Wed Aug 12 08:21:33 2020 // Update Count : 9 // #pragma once #include "kernel.hfa" #include "thread.hfa" #include "alarm.hfa" #include "stats.hfa" //----------------------------------------------------------------------------- // Scheduler struct __attribute__((aligned(128))) __scheduler_lock_id_t; extern "C" { void disable_interrupts() OPTIONAL_THREAD; void enable_interrupts_noPoll(); void enable_interrupts( __cfaabi_dbg_ctx_param ); } void __schedule_thread( struct __processor_id_t *, $thread * ) #if defined(NDEBUG) || (!defined(__CFA_DEBUG__) && !defined(__CFA_VERIFY__)) __attribute__((nonnull (2))) #endif ; //Block current thread and release/wake-up the following resources void __leave_thread() __attribute__((noreturn)); //----------------------------------------------------------------------------- // Processor void main(processorCtx_t *); void * __create_pthread( pthread_t *, void * (*)(void *), void * ); extern cluster * mainCluster; //----------------------------------------------------------------------------- // Threads extern "C" { void __cfactx_invoke_thread(void (*main)(void *), void * this); } __cfaabi_dbg_debug_do( extern void __cfaabi_dbg_thread_register ( $thread * thrd ); extern void __cfaabi_dbg_thread_unregister( $thread * thrd ); ) // KERNEL ONLY unpark with out disabling interrupts void __unpark( struct __processor_id_t *, $thread * thrd __cfaabi_dbg_ctx_param2 ); static inline bool __post(single_sem & this, struct __processor_id_t * id) { for() { struct $thread * expected = this.ptr; if(expected == 1p) return false; if(expected == 0p) { if(__atomic_compare_exchange_n(&this.ptr, &expected, 1p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { return false; } } else { if(__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { __unpark( id, expected __cfaabi_dbg_ctx2 ); return true; } } } } //----------------------------------------------------------------------------- // Utils void doregister( struct cluster * cltr, struct $thread & thrd ); void unregister( struct cluster * cltr, struct $thread & thrd ); //----------------------------------------------------------------------------- // I/O void ^?{}(io_context & this, bool ); //======================================================================= // Cluster lock API //======================================================================= // Cells use by the reader writer lock // while not generic it only relies on a opaque pointer struct __attribute__((aligned(128))) __scheduler_lock_id_t { // Spin lock used as the underlying lock volatile bool lock; // Handle pointing to the proc owning this cell // Used for allocating cells and debugging __processor_id_t * volatile handle; #ifdef __CFA_WITH_VERIFY__ // Debug, check if this is owned for reading bool owned; #endif }; static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t)); // Lock-Free registering/unregistering of threads // Register a processor to a given cluster and get its unique id in return unsigned doregister( struct __processor_id_t * proc ); // Unregister a processor from a given cluster using its id, getting back the original pointer void unregister( struct __processor_id_t * proc ); //----------------------------------------------------------------------- // Cluster idle lock/unlock static inline void lock(__cluster_idles & this) { for() { uint64_t l = this.lock; if( (0 == (l % 2)) && __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) ) return; Pause(); } } static inline void unlock(__cluster_idles & this) { /* paranoid */ verify( 1 == (this.lock % 2) ); __atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST ); } //======================================================================= // Reader-writer lock implementation // Concurrent with doregister/unregister, // i.e., threads can be added at any point during or between the entry/exit //----------------------------------------------------------------------- // simple spinlock underlying the RWLock // Blocking acquire static inline void __atomic_acquire(volatile bool * ll) { while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) { while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED)) Pause(); } /* paranoid */ verify(*ll); } // Non-Blocking acquire static inline bool __atomic_try_acquire(volatile bool * ll) { return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST); } // Release static inline void __atomic_unlock(volatile bool * ll) { /* paranoid */ verify(*ll); __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE); } //----------------------------------------------------------------------- // Reader-Writer lock protecting the ready-queues // while this lock is mostly generic some aspects // have been hard-coded to for the ready-queue for // simplicity and performance struct __scheduler_RWLock_t { // total cachelines allocated unsigned int max; // cachelines currently in use volatile unsigned int alloc; // cachelines ready to itereate over // (!= to alloc when thread is in second half of doregister) volatile unsigned int ready; // writer lock volatile bool lock; // data pointer __scheduler_lock_id_t * data; }; void ?{}(__scheduler_RWLock_t & this); void ^?{}(__scheduler_RWLock_t & this); extern __scheduler_RWLock_t * __scheduler_lock; //----------------------------------------------------------------------- // Reader side : acquire when using the ready queue to schedule but not // creating/destroying queues static inline void ready_schedule_lock( struct __processor_id_t * proc) with(*__scheduler_lock) { unsigned iproc = proc->id; /*paranoid*/ verify(data[iproc].handle == proc); /*paranoid*/ verify(iproc < ready); // Step 1 : make sure no writer are in the middle of the critical section while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED)) Pause(); // Fence needed because we don't want to start trying to acquire the lock // before we read a false. // Not needed on x86 // std::atomic_thread_fence(std::memory_order_seq_cst); // Step 2 : acquire our local lock __atomic_acquire( &data[iproc].lock ); /*paranoid*/ verify(data[iproc].lock); #ifdef __CFA_WITH_VERIFY__ // Debug, check if this is owned for reading data[iproc].owned = true; #endif } static inline void ready_schedule_unlock( struct __processor_id_t * proc) with(*__scheduler_lock) { unsigned iproc = proc->id; /*paranoid*/ verify(data[iproc].handle == proc); /*paranoid*/ verify(iproc < ready); /*paranoid*/ verify(data[iproc].lock); /*paranoid*/ verify(data[iproc].owned); #ifdef __CFA_WITH_VERIFY__ // Debug, check if this is owned for reading data[iproc].owned = false; #endif __atomic_unlock(&data[iproc].lock); } #ifdef __CFA_WITH_VERIFY__ static inline bool ready_schedule_islocked( struct __processor_id_t * proc) { return __scheduler_lock->data[proc->id].owned; } static inline bool ready_mutate_islocked() { return __scheduler_lock->lock; } #endif //----------------------------------------------------------------------- // Writer side : acquire when changing the ready queue, e.g. adding more // queues or removing them. uint_fast32_t ready_mutate_lock( void ); void ready_mutate_unlock( uint_fast32_t /* value returned by lock */ ); //======================================================================= // Ready-Queue API //----------------------------------------------------------------------- // pop thread from the ready queue of a cluster // returns 0p if empty __attribute__((hot)) bool query(struct cluster * cltr); //----------------------------------------------------------------------- // push thread onto a ready queue for a cluster // returns true if the list was previously empty, false otherwise __attribute__((hot)) bool push(struct cluster * cltr, struct $thread * thrd); //----------------------------------------------------------------------- // pop thread from the ready queue of a cluster // returns 0p if empty // May return 0p spuriously __attribute__((hot)) struct $thread * pop(struct cluster * cltr); //----------------------------------------------------------------------- // pop thread from the ready queue of a cluster // returns 0p if empty // guaranteed to find any threads added before this call __attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr); //----------------------------------------------------------------------- // remove thread from the ready queue of a cluster // returns bool if it wasn't found bool remove_head(struct cluster * cltr, struct $thread * thrd); //----------------------------------------------------------------------- // Increase the width of the ready queue (number of lanes) by 4 void ready_queue_grow (struct cluster * cltr, int target); //----------------------------------------------------------------------- // Decrease the width of the ready queue (number of lanes) by 4 void ready_queue_shrink(struct cluster * cltr, int target); // Local Variables: // // mode: c // // tab-width: 4 // // End: //