// // Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo // // The contents of this file are covered under the licence agreement in the // file "LICENCE" distributed with Cforall. // // kernel_private.hfa -- // // Author : Thierry Delisle // Created On : Mon Feb 13 12:27:26 2017 // Last Modified By : Peter A. Buhr // Last Modified On : Thu Mar 29 14:06:40 2018 // Update Count : 3 // #pragma once #include "kernel.hfa" #include "thread.hfa" #include "alarm.hfa" //----------------------------------------------------------------------------- // Scheduler extern "C" { void disable_interrupts() OPTIONAL_THREAD; void enable_interrupts_noPoll(); void enable_interrupts( __cfaabi_dbg_ctx_param ); } void ScheduleThread( thread_desc * ); static inline void WakeThread( thread_desc * thrd ) { if( !thrd ) return; verify(thrd->state == Inactive); disable_interrupts(); ScheduleThread( thrd ); enable_interrupts( __cfaabi_dbg_ctx ); } thread_desc * nextThread(cluster * this); //Block current thread and release/wake-up the following resources void BlockInternal(void); void BlockInternal(__spinlock_t * lock); void BlockInternal(thread_desc * thrd); void BlockInternal(__spinlock_t * lock, thread_desc * thrd); void BlockInternal(__spinlock_t * locks [], unsigned short count); void BlockInternal(__spinlock_t * locks [], unsigned short count, thread_desc * thrds [], unsigned short thrd_count); void BlockInternal(__finish_callback_fptr_t callback); void LeaveThread(__spinlock_t * lock, thread_desc * thrd); //----------------------------------------------------------------------------- // Processor void main(processorCtx_t *); static inline void wake_fast(processor * this) { __cfaabi_dbg_print_safe("Kernel : Waking up processor %p\n", this); post( this->idleLock ); } static inline void wake(processor * this) { disable_interrupts(); wake_fast(this); enable_interrupts( __cfaabi_dbg_ctx ); } struct event_kernel_t { alarm_list_t alarms; __spinlock_t lock; }; extern event_kernel_t * event_kernel; struct __cfa_kernel_preemption_state_t { bool enabled; bool in_progress; unsigned short disable_count; }; extern volatile thread_local __cfa_kernel_preemption_state_t preemption_state __attribute__ ((tls_model ( "initial-exec" ))); //----------------------------------------------------------------------------- // Threads extern "C" { forall(dtype T | is_thread(T)) void CtxInvokeThread(T * this); } extern void ThreadCtxSwitch(coroutine_desc * src, coroutine_desc * dst); __cfaabi_dbg_debug_do( extern void __cfaabi_dbg_thread_register ( thread_desc * thrd ); extern void __cfaabi_dbg_thread_unregister( thread_desc * thrd ); ) //----------------------------------------------------------------------------- // Utils #define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)] static inline uint32_t tls_rand() { kernelTLS.rand_seed ^= kernelTLS.rand_seed << 6; kernelTLS.rand_seed ^= kernelTLS.rand_seed >> 21; kernelTLS.rand_seed ^= kernelTLS.rand_seed << 7; return kernelTLS.rand_seed; } void doregister( struct cluster & cltr ); void unregister( struct cluster & cltr ); void doregister( struct cluster * cltr, struct thread_desc & thrd ); void unregister( struct cluster * cltr, struct thread_desc & thrd ); //======================================================================= // Cluster lock API //======================================================================= struct __attribute__((aligned(64))) __processor_id { processor * volatile handle; volatile bool lock; }; // Lock-Free registering/unregistering of threads // Register a processor to a given cluster and get its unique id in return unsigned doregister( struct cluster * cltr, struct processor * proc ); // Unregister a processor from a given cluster using its id, getting back the original pointer void unregister( struct cluster * cltr, struct processor * proc ); //======================================================================= // Reader-writer lock implementation // Concurrent with doregister/unregister, // i.e., threads can be added at any point during or between the entry/exit static inline void __atomic_acquire(volatile bool * ll) { while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) { while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED)) asm volatile("pause"); } /* paranoid */ verify(*ll); } static inline bool __atomic_try_acquire(volatile bool * ll) { return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST); } static inline void __atomic_unlock(volatile bool * ll) { /* paranoid */ verify(*ll); __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE); } //----------------------------------------------------------------------- // Reader side : acquire when using the ready queue to schedule but not // creating/destroying queues static inline void ready_schedule_lock( struct cluster * cltr, struct processor * proc) with(cltr->ready_lock) { unsigned iproc = proc->id; /*paranoid*/ verify(data[iproc].handle == proc); /*paranoid*/ verify(iproc < ready); // Step 1 : make sure no writer are in the middle of the critical section while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED)) asm volatile("pause"); // Fence needed because we don't want to start trying to acquire the lock // before we read a false. // Not needed on x86 // std::atomic_thread_fence(std::memory_order_seq_cst); // Step 2 : acquire our local lock __atomic_acquire( &data[iproc].lock ); /*paranoid*/ verify(data[iproc].lock); } static inline void ready_schedule_unlock( struct cluster * cltr, struct processor * proc) with(cltr->ready_lock) { unsigned iproc = proc->id; /*paranoid*/ verify(data[iproc].handle == proc); /*paranoid*/ verify(iproc < ready); /*paranoid*/ verify(data[iproc].lock); __atomic_store_n(&data[iproc].lock, false, __ATOMIC_RELEASE); } //----------------------------------------------------------------------- // Writer side : acquire when changing the ready queue, e.g. adding more // queues or removing them. uint_fast32_t ready_mutate_lock( struct cluster & cltr ); void ready_mutate_unlock( struct cluster & cltr, uint_fast32_t ); //======================================================================= // Ready-Queue API __attribute__((hot)) bool push(struct cluster * cltr, struct thread_desc * thrd); __attribute__((hot)) thread_desc * pop(struct cluster * cltr); void ready_queue_grow (struct cluster * cltr); void ready_queue_shrink(struct cluster * cltr); #if !defined(__CFA_NO_STATISTICS__) void stats_tls_tally(struct cluster * cltr); #else static inline void stats_tls_tally(struct cluster * cltr) {} #endif // Local Variables: // // mode: c // // tab-width: 4 // // End: //