// // Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo // // The contents of this file are covered under the licence agreement in the // file "LICENCE" distributed with Cforall. // // kernel.c -- // // Author : Thierry Delisle // Created On : Tue Jan 17 12:27:26 2017 // Last Modified By : Peter A. Buhr // Last Modified On : Mon Apr 9 16:11:46 2018 // Update Count : 24 // //C Includes #include #include #include extern "C" { #include #include #include #include #include } //CFA Includes #include "time.hfa" #include "kernel_private.hfa" #include "preemption.hfa" #include "startup.hfa" //Private includes #define __CFA_INVOKE_PRIVATE__ #include "invoke.h" //Start and stop routine for the kernel, declared first to make sure they run first static void kernel_startup(void) __attribute__(( constructor( STARTUP_PRIORITY_KERNEL ) )); static void kernel_shutdown(void) __attribute__(( destructor ( STARTUP_PRIORITY_KERNEL ) )); //----------------------------------------------------------------------------- // Kernel storage KERNEL_STORAGE(cluster, mainCluster); KERNEL_STORAGE(processor, mainProcessor); KERNEL_STORAGE(thread_desc, mainThread); KERNEL_STORAGE(__stack_t, mainThreadCtx); cluster * mainCluster; processor * mainProcessor; thread_desc * mainThread; extern "C" { struct { __dllist_t(cluster) list; __spinlock_t lock; } __cfa_dbg_global_clusters; } size_t __page_size = 0; //----------------------------------------------------------------------------- // Global state thread_local struct KernelThreadData kernelTLS __attribute__ ((tls_model ( "initial-exec" ))) = { NULL, NULL, { 1, false, false } }; //----------------------------------------------------------------------------- // Struct to steal stack struct current_stack_info_t { __stack_t * storage; // pointer to stack object void *base; // base of stack void *limit; // stack grows towards stack limit void *context; // address of cfa_context_t }; void ?{}( current_stack_info_t & this ) { __stack_context_t ctx; CtxGet( ctx ); this.base = ctx.FP; rlimit r; getrlimit( RLIMIT_STACK, &r); size_t size = r.rlim_cur; this.limit = (void *)(((intptr_t)this.base) - size); this.context = &storage_mainThreadCtx; } //----------------------------------------------------------------------------- // Main thread construction void ?{}( coroutine_desc & this, current_stack_info_t * info) with( this ) { stack.storage = info->storage; with(*stack.storage) { limit = info->limit; base = info->base; } *((intptr_t*)&stack.storage) |= 0x1; name = "Main Thread"; state = Start; starter = NULL; last = NULL; cancellation = NULL; } void ?{}( thread_desc & this, current_stack_info_t * info) with( this ) { state = Start; self_cor{ info }; curr_cor = &self_cor; curr_cluster = mainCluster; self_mon.owner = &this; self_mon.recursion = 1; self_mon_p = &self_mon; next = NULL; node.next = NULL; node.prev = NULL; doregister(curr_cluster, this); monitors{ &self_mon_p, 1, (fptr_t)0 }; } //----------------------------------------------------------------------------- // Processor coroutine void ?{}(processorCtx_t & this) { } // Construct the processor context of non-main processors static void ?{}(processorCtx_t & this, processor * proc, current_stack_info_t * info) { (this.__cor){ info }; this.proc = proc; } static void start(processor * this); void ?{}(processor & this, const char * name, cluster & cltr) with( this ) { this.name = name; this.cltr = &cltr; terminated{ 0 }; do_terminate = false; preemption_alarm = NULL; pending_preemption = false; runner.proc = &this; idleLock{}; start( &this ); } void ^?{}(processor & this) with( this ){ if( ! __atomic_load_n(&do_terminate, __ATOMIC_ACQUIRE) ) { __cfaabi_dbg_print_safe("Kernel : core %p signaling termination\n", &this); __atomic_store_n(&do_terminate, true, __ATOMIC_RELAXED); wake( &this ); P( terminated ); verify( kernelTLS.this_processor != &this); } pthread_join( kernel_thread, NULL ); } void ?{}(cluster & this, const char * name, Duration preemption_rate) with( this ) { this.name = name; this.preemption_rate = preemption_rate; ready_queue{}; ready_queue_lock{}; procs{ __get }; idles{ __get }; threads{ __get }; doregister(this); } void ^?{}(cluster & this) { unregister(this); } //============================================================================================= // Kernel Scheduling logic //============================================================================================= static void runThread(processor * this, thread_desc * dst); static void finishRunning(processor * this); static void halt(processor * this); //Main of the processor contexts void main(processorCtx_t & runner) { processor * this = runner.proc; verify(this); __cfaabi_dbg_print_safe("Kernel : core %p starting\n", this); doregister(this->cltr, this); { // Setup preemption data preemption_scope scope = { this }; __cfaabi_dbg_print_safe("Kernel : core %p started\n", this); thread_desc * readyThread = NULL; for( unsigned int spin_count = 0; ! __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST); spin_count++ ) { readyThread = nextThread( this->cltr ); if(readyThread) { verify( ! kernelTLS.preemption_state.enabled ); runThread(this, readyThread); verify( ! kernelTLS.preemption_state.enabled ); //Some actions need to be taken from the kernel finishRunning(this); spin_count = 0; } else { // spin(this, &spin_count); halt(this); } } __cfaabi_dbg_print_safe("Kernel : core %p stopping\n", this); } unregister(this->cltr, this); V( this->terminated ); __cfaabi_dbg_print_safe("Kernel : core %p terminated\n", this); } static int * __volatile_errno() __attribute__((noinline)); static int * __volatile_errno() { asm(""); return &errno; } // KERNEL ONLY // runThread runs a thread by context switching // from the processor coroutine to the target thread static void runThread(processor * this, thread_desc * thrd_dst) { coroutine_desc * proc_cor = get_coroutine(this->runner); // Reset the terminating actions here this->finish.action_code = No_Action; // Update global state kernelTLS.this_thread = thrd_dst; // set state of processor coroutine to inactive and the thread to active proc_cor->state = proc_cor->state == Halted ? Halted : Inactive; thrd_dst->state = Active; // set context switch to the thread that the processor is executing verify( thrd_dst->context.SP ); CtxSwitch( &proc_cor->context, &thrd_dst->context ); // when CtxSwitch returns we are back in the processor coroutine // set state of processor coroutine to active and the thread to inactive thrd_dst->state = thrd_dst->state == Halted ? Halted : Inactive; proc_cor->state = Active; } // KERNEL_ONLY static void returnToKernel() { coroutine_desc * proc_cor = get_coroutine(kernelTLS.this_processor->runner); thread_desc * thrd_src = kernelTLS.this_thread; // set state of current coroutine to inactive thrd_src->state = thrd_src->state == Halted ? Halted : Inactive; proc_cor->state = Active; int local_errno = *__volatile_errno(); // set new coroutine that the processor is executing // and context switch to it verify( proc_cor->context.SP ); CtxSwitch( &thrd_src->context, &proc_cor->context ); // set state of new coroutine to active proc_cor->state = proc_cor->state == Halted ? Halted : Inactive; thrd_src->state = Active; *__volatile_errno() = local_errno; } // KERNEL_ONLY // Once a thread has finished running, some of // its final actions must be executed from the kernel static void finishRunning(processor * this) with( this->finish ) { verify( ! kernelTLS.preemption_state.enabled ); choose( action_code ) { case No_Action: break; case Release: unlock( *lock ); case Schedule: ScheduleThread( thrd ); case Release_Schedule: unlock( *lock ); ScheduleThread( thrd ); case Release_Multi: for(int i = 0; i < lock_count; i++) { unlock( *locks[i] ); } case Release_Multi_Schedule: for(int i = 0; i < lock_count; i++) { unlock( *locks[i] ); } for(int i = 0; i < thrd_count; i++) { ScheduleThread( thrds[i] ); } case Callback: callback(); default: abort("KERNEL ERROR: Unexpected action to run after thread"); } } // KERNEL_ONLY // Context invoker for processors // This is the entry point for processors (kernel threads) // It effectively constructs a coroutine by stealing the pthread stack static void * CtxInvokeProcessor(void * arg) { processor * proc = (processor *) arg; kernelTLS.this_processor = proc; kernelTLS.this_thread = NULL; kernelTLS.preemption_state.[enabled, disable_count] = [false, 1]; // SKULLDUGGERY: We want to create a context for the processor coroutine // which is needed for the 2-step context switch. However, there is no reason // to waste the perfectly valid stack create by pthread. current_stack_info_t info; __stack_t ctx; info.storage = &ctx; (proc->runner){ proc, &info }; __cfaabi_dbg_print_safe("Coroutine : created stack %p\n", get_coroutine(proc->runner)->stack.storage); //Set global state kernelTLS.this_thread = NULL; //We now have a proper context from which to schedule threads __cfaabi_dbg_print_safe("Kernel : core %p created (%p, %p)\n", proc, &proc->runner, &ctx); // SKULLDUGGERY: Since the coroutine doesn't have its own stack, we can't // resume it to start it like it normally would, it will just context switch // back to here. Instead directly call the main since we already are on the // appropriate stack. get_coroutine(proc->runner)->state = Active; main( proc->runner ); get_coroutine(proc->runner)->state = Halted; // Main routine of the core returned, the core is now fully terminated __cfaabi_dbg_print_safe("Kernel : core %p main ended (%p)\n", proc, &proc->runner); return NULL; } static void start(processor * this) { __cfaabi_dbg_print_safe("Kernel : Starting core %p\n", this); pthread_create( &this->kernel_thread, NULL, CtxInvokeProcessor, (void*)this ); __cfaabi_dbg_print_safe("Kernel : core %p started\n", this); } // KERNEL_ONLY void kernel_first_resume( processor * this ) { thread_desc * src = mainThread; coroutine_desc * dst = get_coroutine(this->runner); verify( ! kernelTLS.preemption_state.enabled ); __stack_prepare( &dst->stack, 65000 ); CtxStart(&this->runner, CtxInvokeCoroutine); verify( ! kernelTLS.preemption_state.enabled ); dst->last = &src->self_cor; dst->starter = dst->starter ? dst->starter : &src->self_cor; // set state of current coroutine to inactive src->state = src->state == Halted ? Halted : Inactive; // context switch to specified coroutine verify( dst->context.SP ); CtxSwitch( &src->context, &dst->context ); // when CtxSwitch returns we are back in the src coroutine // set state of new coroutine to active src->state = Active; verify( ! kernelTLS.preemption_state.enabled ); } // KERNEL_ONLY void kernel_last_resume( processor * this ) { coroutine_desc * src = &mainThread->self_cor; coroutine_desc * dst = get_coroutine(this->runner); verify( ! kernelTLS.preemption_state.enabled ); verify( dst->starter == src ); verify( dst->context.SP ); // context switch to the processor CtxSwitch( &src->context, &dst->context ); } //----------------------------------------------------------------------------- // Scheduler routines // KERNEL ONLY void ScheduleThread( thread_desc * thrd ) { verify( thrd ); verify( thrd->state != Halted ); verify( ! kernelTLS.preemption_state.enabled ); verifyf( thrd->next == NULL, "Expected null got %p", thrd->next ); with( *thrd->curr_cluster ) { lock ( ready_queue_lock __cfaabi_dbg_ctx2 ); bool was_empty = !(ready_queue != 0); append( ready_queue, thrd ); unlock( ready_queue_lock ); if(was_empty) { lock (proc_list_lock __cfaabi_dbg_ctx2); if(idles) { wake_fast(idles.head); } unlock (proc_list_lock); } else if( struct processor * idle = idles.head ) { wake_fast(idle); } } verify( ! kernelTLS.preemption_state.enabled ); } // KERNEL ONLY thread_desc * nextThread(cluster * this) with( *this ) { verify( ! kernelTLS.preemption_state.enabled ); lock( ready_queue_lock __cfaabi_dbg_ctx2 ); thread_desc * head = pop_head( ready_queue ); unlock( ready_queue_lock ); verify( ! kernelTLS.preemption_state.enabled ); return head; } void BlockInternal() { disable_interrupts(); verify( ! kernelTLS.preemption_state.enabled ); returnToKernel(); verify( ! kernelTLS.preemption_state.enabled ); enable_interrupts( __cfaabi_dbg_ctx ); } void BlockInternal( __spinlock_t * lock ) { disable_interrupts(); with( *kernelTLS.this_processor ) { finish.action_code = Release; finish.lock = lock; } verify( ! kernelTLS.preemption_state.enabled ); returnToKernel(); verify( ! kernelTLS.preemption_state.enabled ); enable_interrupts( __cfaabi_dbg_ctx ); } void BlockInternal( thread_desc * thrd ) { disable_interrupts(); with( * kernelTLS.this_processor ) { finish.action_code = Schedule; finish.thrd = thrd; } verify( ! kernelTLS.preemption_state.enabled ); returnToKernel(); verify( ! kernelTLS.preemption_state.enabled ); enable_interrupts( __cfaabi_dbg_ctx ); } void BlockInternal( __spinlock_t * lock, thread_desc * thrd ) { assert(thrd); disable_interrupts(); with( * kernelTLS.this_processor ) { finish.action_code = Release_Schedule; finish.lock = lock; finish.thrd = thrd; } verify( ! kernelTLS.preemption_state.enabled ); returnToKernel(); verify( ! kernelTLS.preemption_state.enabled ); enable_interrupts( __cfaabi_dbg_ctx ); } void BlockInternal(__spinlock_t * locks [], unsigned short count) { disable_interrupts(); with( * kernelTLS.this_processor ) { finish.action_code = Release_Multi; finish.locks = locks; finish.lock_count = count; } verify( ! kernelTLS.preemption_state.enabled ); returnToKernel(); verify( ! kernelTLS.preemption_state.enabled ); enable_interrupts( __cfaabi_dbg_ctx ); } void BlockInternal(__spinlock_t * locks [], unsigned short lock_count, thread_desc * thrds [], unsigned short thrd_count) { disable_interrupts(); with( *kernelTLS.this_processor ) { finish.action_code = Release_Multi_Schedule; finish.locks = locks; finish.lock_count = lock_count; finish.thrds = thrds; finish.thrd_count = thrd_count; } verify( ! kernelTLS.preemption_state.enabled ); returnToKernel(); verify( ! kernelTLS.preemption_state.enabled ); enable_interrupts( __cfaabi_dbg_ctx ); } void BlockInternal(__finish_callback_fptr_t callback) { disable_interrupts(); with( *kernelTLS.this_processor ) { finish.action_code = Callback; finish.callback = callback; } verify( ! kernelTLS.preemption_state.enabled ); returnToKernel(); verify( ! kernelTLS.preemption_state.enabled ); enable_interrupts( __cfaabi_dbg_ctx ); } // KERNEL ONLY void LeaveThread(__spinlock_t * lock, thread_desc * thrd) { verify( ! kernelTLS.preemption_state.enabled ); with( * kernelTLS.this_processor ) { finish.action_code = thrd ? Release_Schedule : Release; finish.lock = lock; finish.thrd = thrd; } returnToKernel(); } //============================================================================================= // Kernel Setup logic //============================================================================================= //----------------------------------------------------------------------------- // Kernel boot procedures static void kernel_startup(void) { verify( ! kernelTLS.preemption_state.enabled ); __cfaabi_dbg_print_safe("Kernel : Starting\n"); __page_size = sysconf( _SC_PAGESIZE ); __cfa_dbg_global_clusters.list{ __get }; __cfa_dbg_global_clusters.lock{}; // Initialize the main cluster mainCluster = (cluster *)&storage_mainCluster; (*mainCluster){"Main Cluster"}; __cfaabi_dbg_print_safe("Kernel : Main cluster ready\n"); // Start by initializing the main thread // SKULLDUGGERY: the mainThread steals the process main thread // which will then be scheduled by the mainProcessor normally mainThread = (thread_desc *)&storage_mainThread; current_stack_info_t info; info.storage = (__stack_t*)&storage_mainThreadCtx; (*mainThread){ &info }; __cfaabi_dbg_print_safe("Kernel : Main thread ready\n"); // Construct the processor context of the main processor void ?{}(processorCtx_t & this, processor * proc) { (this.__cor){ "Processor" }; this.__cor.starter = NULL; this.proc = proc; } void ?{}(processor & this) with( this ) { name = "Main Processor"; cltr = mainCluster; terminated{ 0 }; do_terminate = false; preemption_alarm = NULL; pending_preemption = false; kernel_thread = pthread_self(); runner{ &this }; __cfaabi_dbg_print_safe("Kernel : constructed main processor context %p\n", &runner); } // Initialize the main processor and the main processor ctx // (the coroutine that contains the processing control flow) mainProcessor = (processor *)&storage_mainProcessor; (*mainProcessor){}; //initialize the global state variables kernelTLS.this_processor = mainProcessor; kernelTLS.this_thread = mainThread; // Enable preemption kernel_start_preemption(); // Add the main thread to the ready queue // once resume is called on mainProcessor->runner the mainThread needs to be scheduled like any normal thread ScheduleThread(mainThread); // SKULLDUGGERY: Force a context switch to the main processor to set the main thread's context to the current UNIX // context. Hence, the main thread does not begin through CtxInvokeThread, like all other threads. The trick here is that // mainThread is on the ready queue when this call is made. kernel_first_resume( kernelTLS.this_processor ); // THE SYSTEM IS NOW COMPLETELY RUNNING __cfaabi_dbg_print_safe("Kernel : Started\n--------------------------------------------------\n\n"); verify( ! kernelTLS.preemption_state.enabled ); enable_interrupts( __cfaabi_dbg_ctx ); verify( TL_GET( preemption_state.enabled ) ); } static void kernel_shutdown(void) { __cfaabi_dbg_print_safe("\n--------------------------------------------------\nKernel : Shutting down\n"); verify( TL_GET( preemption_state.enabled ) ); disable_interrupts(); verify( ! kernelTLS.preemption_state.enabled ); // SKULLDUGGERY: Notify the mainProcessor it needs to terminates. // When its coroutine terminates, it return control to the mainThread // which is currently here __atomic_store_n(&mainProcessor->do_terminate, true, __ATOMIC_RELEASE); kernel_last_resume( kernelTLS.this_processor ); mainThread->self_cor.state = Halted; // THE SYSTEM IS NOW COMPLETELY STOPPED // Disable preemption kernel_stop_preemption(); // Destroy the main processor and its context in reverse order of construction // These were manually constructed so we need manually destroy them ^(mainProcessor->runner){}; ^(mainProcessor){}; // Final step, destroy the main thread since it is no longer needed // Since we provided a stack to this taxk it will not destroy anything ^(mainThread){}; ^(__cfa_dbg_global_clusters.list){}; ^(__cfa_dbg_global_clusters.lock){}; __cfaabi_dbg_print_safe("Kernel : Shutdown complete\n"); } //============================================================================================= // Kernel Quiescing //============================================================================================= static void halt(processor * this) with( *this ) { // verify( ! __atomic_load_n(&do_terminate, __ATOMIC_SEQ_CST) ); with( *cltr ) { lock (proc_list_lock __cfaabi_dbg_ctx2); remove (procs, *this); push_front(idles, *this); unlock (proc_list_lock); } __cfaabi_dbg_print_safe("Kernel : Processor %p ready to sleep\n", this); wait( idleLock ); __cfaabi_dbg_print_safe("Kernel : Processor %p woke up and ready to run\n", this); with( *cltr ) { lock (proc_list_lock __cfaabi_dbg_ctx2); remove (idles, *this); push_front(procs, *this); unlock (proc_list_lock); } } //============================================================================================= // Unexpected Terminating logic //============================================================================================= static __spinlock_t kernel_abort_lock; static bool kernel_abort_called = false; void * kernel_abort(void) __attribute__ ((__nothrow__)) { // abort cannot be recursively entered by the same or different processors because all signal handlers return when // the globalAbort flag is true. lock( kernel_abort_lock __cfaabi_dbg_ctx2 ); // first task to abort ? if ( kernel_abort_called ) { // not first task to abort ? unlock( kernel_abort_lock ); sigset_t mask; sigemptyset( &mask ); sigaddset( &mask, SIGALRM ); // block SIGALRM signals sigsuspend( &mask ); // block the processor to prevent further damage during abort _exit( EXIT_FAILURE ); // if processor unblocks before it is killed, terminate it } else { kernel_abort_called = true; unlock( kernel_abort_lock ); } return kernelTLS.this_thread; } void kernel_abort_msg( void * kernel_data, char * abort_text, int abort_text_size ) { thread_desc * thrd = kernel_data; if(thrd) { int len = snprintf( abort_text, abort_text_size, "Error occurred while executing thread %.256s (%p)", thrd->self_cor.name, thrd ); __cfaabi_dbg_bits_write( abort_text, len ); if ( &thrd->self_cor != thrd->curr_cor ) { len = snprintf( abort_text, abort_text_size, " in coroutine %.256s (%p).\n", thrd->curr_cor->name, thrd->curr_cor ); __cfaabi_dbg_bits_write( abort_text, len ); } else { __cfaabi_dbg_bits_write( ".\n", 2 ); } } else { int len = snprintf( abort_text, abort_text_size, "Error occurred outside of any thread.\n" ); __cfaabi_dbg_bits_write( abort_text, len ); } } int kernel_abort_lastframe( void ) __attribute__ ((__nothrow__)) { return get_coroutine(kernelTLS.this_thread) == get_coroutine(mainThread) ? 4 : 2; } static __spinlock_t kernel_debug_lock; extern "C" { void __cfaabi_dbg_bits_acquire() { lock( kernel_debug_lock __cfaabi_dbg_ctx2 ); } void __cfaabi_dbg_bits_release() { unlock( kernel_debug_lock ); } } //============================================================================================= // Kernel Utilities //============================================================================================= //----------------------------------------------------------------------------- // Locks void ?{}( semaphore & this, int count = 1 ) { (this.lock){}; this.count = count; (this.waiting){}; } void ^?{}(semaphore & this) {} void P(semaphore & this) with( this ){ lock( lock __cfaabi_dbg_ctx2 ); count -= 1; if ( count < 0 ) { // queue current task append( waiting, kernelTLS.this_thread ); // atomically release spin lock and block BlockInternal( &lock ); } else { unlock( lock ); } } void V(semaphore & this) with( this ) { thread_desc * thrd = NULL; lock( lock __cfaabi_dbg_ctx2 ); count += 1; if ( count <= 0 ) { // remove task at head of waiting list thrd = pop_head( waiting ); } unlock( lock ); // make new owner WakeThread( thrd ); } //----------------------------------------------------------------------------- // Global Queues void doregister( cluster & cltr ) { lock ( __cfa_dbg_global_clusters.lock __cfaabi_dbg_ctx2); push_front( __cfa_dbg_global_clusters.list, cltr ); unlock ( __cfa_dbg_global_clusters.lock ); } void unregister( cluster & cltr ) { lock ( __cfa_dbg_global_clusters.lock __cfaabi_dbg_ctx2); remove( __cfa_dbg_global_clusters.list, cltr ); unlock( __cfa_dbg_global_clusters.lock ); } void doregister( cluster * cltr, thread_desc & thrd ) { lock (cltr->thread_list_lock __cfaabi_dbg_ctx2); push_front(cltr->threads, thrd); unlock (cltr->thread_list_lock); } void unregister( cluster * cltr, thread_desc & thrd ) { lock (cltr->thread_list_lock __cfaabi_dbg_ctx2); remove(cltr->threads, thrd ); unlock(cltr->thread_list_lock); } void doregister( cluster * cltr, processor * proc ) { lock (cltr->proc_list_lock __cfaabi_dbg_ctx2); push_front(cltr->procs, *proc); unlock (cltr->proc_list_lock); } void unregister( cluster * cltr, processor * proc ) { lock (cltr->proc_list_lock __cfaabi_dbg_ctx2); remove(cltr->procs, *proc ); unlock(cltr->proc_list_lock); } //----------------------------------------------------------------------------- // Debug __cfaabi_dbg_debug_do( extern "C" { void __cfaabi_dbg_record(__spinlock_t & this, const char * prev_name) { this.prev_name = prev_name; this.prev_thrd = kernelTLS.this_thread; } } ) // Local Variables: // // mode: c // // tab-width: 4 // // End: //