// // Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo // // The contents of this file are covered under the licence agreement in the // file "LICENCE" distributed with Cforall. // // kernel/private.hfa -- // // Author : Thierry Delisle // Created On : Mon Feb 13 12:27:26 2017 // Last Modified By : Peter A. Buhr // Last Modified On : Thu Mar 2 16:04:46 2023 // Update Count : 11 // #pragma once #if !defined(__cforall_thread__) #error kernel/private.hfa should only be included in libcfathread source #endif #include #include "kernel.hfa" #include "thread.hfa" #include "alarm.hfa" #include "stats.hfa" extern "C" { #include } // #define READYQ_USE_LINEAR_AVG #define READYQ_USE_LOGDBL_AVG // #define READYQ_USE_LOGINT_AVG #if defined(READYQ_USE_LINEAR_AVG) typedef unsigned long long __readyQ_avg_t; #elif defined(READYQ_USE_LOGDBL_AVG) typedef double __readyQ_avg_t; #elif defined(READYQ_USE_LOGDBL_AVG) typedef unsigned long long __readyQ_avg_t; #else #error must pick a scheme for averaging #endif extern "C" { __attribute__((visibility("protected"))) int __cfaabi_pthread_create(pthread_t *_thread, const pthread_attr_t *attr, void *(*start_routine) (void *), void *arg); __attribute__((visibility("protected"))) int __cfaabi_pthread_join(pthread_t _thread, void **retval); __attribute__((visibility("protected"))) pthread_t __cfaabi_pthread_self(void); __attribute__((visibility("protected"))) int __cfaabi_pthread_attr_init(pthread_attr_t *attr); __attribute__((visibility("protected"))) int __cfaabi_pthread_attr_destroy(pthread_attr_t *attr); __attribute__((visibility("protected"))) int __cfaabi_pthread_attr_setstack( pthread_attr_t *attr, void *stackaddr, size_t stacksize ); __attribute__((visibility("protected"))) int __cfaabi_pthread_attr_getstacksize( const pthread_attr_t *attr, size_t *stacksize ); __attribute__((visibility("protected"))) int __cfaabi_pthread_sigqueue(pthread_t _thread, int sig, const union sigval value); __attribute__((visibility("protected"))) int __cfaabi_pthread_sigmask( int how, const sigset_t *set, sigset_t *oset); } //----------------------------------------------------------------------------- // Scheduler union __attribute__((aligned(64))) __timestamp_t { struct { volatile unsigned long long tv; volatile __readyQ_avg_t ma; } t; char __padding[192]; }; extern "C" { void disable_interrupts() OPTIONAL_THREAD; void enable_interrupts( bool poll = true ); } void schedule_thread$( thread$ *, unpark_hint hint ) __attribute__((nonnull (1))); extern bool __preemption_enabled(); enum { PREEMPT_NORMAL = 0, PREEMPT_TERMINATE = 1, PREEMPT_IO = 2, }; static inline void __disable_interrupts_checked() { /* paranoid */ verify( __preemption_enabled() ); disable_interrupts(); /* paranoid */ verify( ! __preemption_enabled() ); } static inline void __enable_interrupts_checked( bool poll = true ) { /* paranoid */ verify( ! __preemption_enabled() ); enable_interrupts( poll ); /* paranoid */ verify( __preemption_enabled() ); } //release/wake-up the following resources void __thread_finish( thread$ * thrd ); //----------------------------------------------------------------------------- // Hardware static inline int __kernel_getcpu() { /* paranoid */ verify( ! __preemption_enabled() ); return sched_getcpu(); } //----------------------------------------------------------------------------- // Processor void main(processorCtx_t &); static inline coroutine$* get_coroutine(processorCtx_t & this) { return &this.self; } void * __create_pthread( pthread_t *, void * (*)(void *), void * ); void __destroy_pthread( pthread_t pthread, void * stack, void ** retval ); extern cluster * mainCluster; //----------------------------------------------------------------------------- // Threads extern "C" { void __cfactx_invoke_thread(void (*main)(void *), void * this); } __cfaabi_dbg_debug_do( extern void __cfaabi_dbg_thread_register ( thread$ * thrd ); extern void __cfaabi_dbg_thread_unregister( thread$ * thrd ); ) #define TICKET_BLOCKED (-1) // thread is blocked #define TICKET_RUNNING ( 0) // thread is running #define TICKET_UNBLOCK ( 1) // thread should ignore next block #define TICKET_DEAD (0xDEAD) // thread should never be unparked //----------------------------------------------------------------------------- // Utils void doregister( struct cluster * cltr, struct thread$ & thrd ); void unregister( struct cluster * cltr, struct thread$ & thrd ); //----------------------------------------------------------------------------- // I/O io_arbiter$ * create(void); void destroy(io_arbiter$ *); //======================================================================= // Cluster lock API //======================================================================= // Lock-Free registering/unregistering of threads // Register a processor to a given cluster and get its unique id in return unsigned register_proc_id( void ); // Unregister a processor from a given cluster using its id, getting back the original pointer void unregister_proc_id( unsigned ); //======================================================================= // Reader-writer lock implementation // Concurrent with doregister/unregister, // i.e., threads can be added at any point during or between the entry/exit //----------------------------------------------------------------------- // simple spinlock underlying the RWLock // Blocking acquire static inline void __atomic_acquire(volatile bool * ll) { /* paranoid */ verify( ! __preemption_enabled() ); /* paranoid */ verify(ll); while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) { while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED)) Pause(); } /* paranoid */ verify(*ll); /* paranoid */ verify( ! __preemption_enabled() ); } // Non-Blocking acquire static inline bool __atomic_try_acquire(volatile bool * ll) { /* paranoid */ verify( ! __preemption_enabled() ); /* paranoid */ verify(ll); return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST); } // Release static inline void __atomic_unlock(volatile bool * ll) { /* paranoid */ verify( ! __preemption_enabled() ); /* paranoid */ verify(ll); /* paranoid */ verify(*ll); __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE); } //----------------------------------------------------------------------- // Reader-Writer lock protecting the ready-queues // while this lock is mostly generic some aspects // have been hard-coded to for the ready-queue for // simplicity and performance union __attribute__((aligned(64))) __scheduler_RWLock_t { struct { __attribute__((aligned(64))) char padding; // total cachelines allocated __attribute__((aligned(64))) unsigned int max; // cachelines currently in use volatile unsigned int alloc; // cachelines ready to itereate over // (!= to alloc when thread is in second half of doregister) volatile unsigned int ready; // writer lock volatile bool write_lock; // data pointer volatile bool * volatile * data; } lock; char pad[192]; }; void ?{}(__scheduler_RWLock_t & this); void ^?{}(__scheduler_RWLock_t & this); extern __scheduler_RWLock_t __scheduler_lock; //----------------------------------------------------------------------- // Reader side : acquire when using the ready queue to schedule but not // creating/destroying queues static inline void ready_schedule_lock(void) with(__scheduler_lock.lock) { /* paranoid */ verify( ! __preemption_enabled() ); /* paranoid */ verify( ! kernelTLS().in_sched_lock ); /* paranoid */ verify( data[kernelTLS().sched_id] == &kernelTLS().sched_lock ); /* paranoid */ verify( !kernelTLS().this_processor || kernelTLS().this_processor->unique_id == kernelTLS().sched_id ); // Step 1 : make sure no writer are in the middle of the critical section while(__atomic_load_n(&write_lock, (int)__ATOMIC_RELAXED)) Pause(); // Fence needed because we don't want to start trying to acquire the lock // before we read a false. // Not needed on x86 // std::atomic_thread_fence(std::memory_order_seq_cst); // Step 2 : acquire our local lock __atomic_acquire( &kernelTLS().sched_lock ); /*paranoid*/ verify(kernelTLS().sched_lock); #ifdef __CFA_WITH_VERIFY__ // Debug, check if this is owned for reading kernelTLS().in_sched_lock = true; #endif } static inline void ready_schedule_unlock(void) with(__scheduler_lock.lock) { /* paranoid */ verify( ! __preemption_enabled() ); /* paranoid */ verify( data[kernelTLS().sched_id] == &kernelTLS().sched_lock ); /* paranoid */ verify( !kernelTLS().this_processor || kernelTLS().this_processor->unique_id == kernelTLS().sched_id ); /* paranoid */ verify( kernelTLS().sched_lock ); /* paranoid */ verify( kernelTLS().in_sched_lock ); #ifdef __CFA_WITH_VERIFY__ // Debug, check if this is owned for reading kernelTLS().in_sched_lock = false; #endif __atomic_unlock(&kernelTLS().sched_lock); } #ifdef __CFA_WITH_VERIFY__ static inline bool ready_schedule_islocked(void) { /* paranoid */ verify( ! __preemption_enabled() ); /* paranoid */ verify( (!kernelTLS().in_sched_lock) || kernelTLS().sched_lock ); return kernelTLS().sched_lock; } static inline bool ready_mutate_islocked() { return __scheduler_lock.lock.write_lock; } #endif //----------------------------------------------------------------------- // Writer side : acquire when changing the ready queue, e.g. adding more // queues or removing them. uint_fast32_t ready_mutate_lock( void ); void ready_mutate_unlock( uint_fast32_t /* value returned by lock */ ); //----------------------------------------------------------------------- // Lock-Free registering/unregistering of threads // Register a processor to a given cluster and get its unique id in return // For convenience, also acquires the lock static inline [unsigned, uint_fast32_t] ready_mutate_register() { unsigned id = register_proc_id(); uint_fast32_t last = ready_mutate_lock(); return [id, last]; } // Unregister a processor from a given cluster using its id, getting back the original pointer // assumes the lock is acquired static inline void ready_mutate_unregister( unsigned id, uint_fast32_t last_s ) { ready_mutate_unlock( last_s ); unregister_proc_id( id ); } //----------------------------------------------------------------------- // Cluster idle lock/unlock static inline void lock(__cluster_proc_list & this) { /* paranoid */ verify( ! __preemption_enabled() ); // Start by locking the global RWlock so that we know no-one is // adding/removing processors while we mess with the idle lock ready_schedule_lock(); lock( this.lock __cfaabi_dbg_ctx2 ); /* paranoid */ verify( ! __preemption_enabled() ); } static inline bool try_lock(__cluster_proc_list & this) { /* paranoid */ verify( ! __preemption_enabled() ); // Start by locking the global RWlock so that we know no-one is // adding/removing processors while we mess with the idle lock ready_schedule_lock(); if(try_lock( this.lock __cfaabi_dbg_ctx2 )) { // success /* paranoid */ verify( ! __preemption_enabled() ); return true; } // failed to lock ready_schedule_unlock(); /* paranoid */ verify( ! __preemption_enabled() ); return false; } static inline void unlock(__cluster_proc_list & this) { /* paranoid */ verify( ! __preemption_enabled() ); unlock(this.lock); // Release the global lock, which we acquired when locking ready_schedule_unlock(); /* paranoid */ verify( ! __preemption_enabled() ); } //======================================================================= // Ready-Queue API //----------------------------------------------------------------------- // push thread onto a ready queue for a cluster // returns true if the list was previously empty, false otherwise __attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, unpark_hint hint); //----------------------------------------------------------------------- // pop thread from the local queues of a cluster // returns 0p if empty // May return 0p spuriously __attribute__((hot)) struct thread$ * pop_fast(struct cluster * cltr); //----------------------------------------------------------------------- // pop thread from any ready queue of a cluster // returns 0p if empty // May return 0p spuriously __attribute__((hot)) struct thread$ * pop_slow(struct cluster * cltr); //----------------------------------------------------------------------- // search all ready queues of a cluster for any thread // returns 0p if empty // guaranteed to find any threads added before this call __attribute__((hot)) struct thread$ * pop_search(struct cluster * cltr); //----------------------------------------------------------------------- // get preferred ready for new thread unsigned ready_queue_new_preferred(); //----------------------------------------------------------------------- // Increase the width of the ready queue (number of lanes) by 4 void ready_queue_grow (struct cluster * cltr); //----------------------------------------------------------------------- // Decrease the width of the ready queue (number of lanes) by 4 void ready_queue_shrink(struct cluster * cltr); //----------------------------------------------------------------------- // Decrease the width of the ready queue (number of lanes) by 4 void ready_queue_close(struct cluster * cltr); // Local Variables: // // mode: c // // tab-width: 4 // // End: //