// -*- Mode: CFA -*- // // Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo // // The contents of this file are covered under the licence agreement in the // file "LICENCE" distributed with Cforall. // // kernel.c -- // // Author : Thierry Delisle // Created On : Tue Jan 17 12:27:26 2016 // Last Modified By : Thierry Delisle // Last Modified On : -- // Update Count : 0 // //Start and stop routine for the kernel, declared first to make sure they run first void kernel_startup(void) __attribute__((constructor(101))); void kernel_shutdown(void) __attribute__((destructor(101))); //Header #include "kernel" //C Includes #include extern "C" { #include #include } //CFA Includes #include "libhdr.h" #include "threads" //Private includes #define __CFA_INVOKE_PRIVATE__ #include "invoke.h" static volatile int lock; void spin_lock( volatile int *lock ) { for ( unsigned int i = 1;; i += 1 ) { if ( *lock == 0 && __sync_lock_test_and_set_4( lock, 1 ) == 0 ) break; } } void spin_unlock( volatile int *lock ) { __sync_lock_release_4( lock ); } //----------------------------------------------------------------------------- // Kernel storage struct processorCtx_t { processor * proc; coroutine c; }; DECL_COROUTINE(processorCtx_t); #define KERNEL_STORAGE(T,X) static char X##_storage[sizeof(T)] KERNEL_STORAGE(processorCtx_t, systemProcessorCtx); KERNEL_STORAGE(cluster, systemCluster); KERNEL_STORAGE(processor, systemProcessor); KERNEL_STORAGE(thread, mainThread); KERNEL_STORAGE(machine_context_t, mainThread_context); cluster * systemCluster; processor * systemProcessor; thread * mainThread; //----------------------------------------------------------------------------- // Global state thread_local processor * this_processor; processor * get_this_processor() { return this_processor; } coroutine * this_coroutine(void) { return this_processor->current_coroutine; } thread * this_thread(void) { return this_processor->current_thread; } //----------------------------------------------------------------------------- // Main thread construction struct current_stack_info_t { machine_context_t ctx; unsigned int size; // size of stack void *base; // base of stack void *storage; // pointer to stack void *limit; // stack grows towards stack limit void *context; // address of cfa_context_t void *top; // address of top of storage }; void ?{}( current_stack_info_t * this ) { CtxGet( &this->ctx ); this->base = this->ctx.FP; this->storage = this->ctx.SP; rlimit r; getrlimit( RLIMIT_STACK, &r); this->size = r.rlim_cur; this->limit = (void *)(((intptr_t)this->base) - this->size); this->context = &mainThread_context_storage; this->top = this->base; } void ?{}( coStack_t * this, current_stack_info_t * info) { this->size = info->size; this->storage = info->storage; this->limit = info->limit; this->base = info->base; this->context = info->context; this->top = info->top; this->userStack = true; } void ?{}( coroutine * this, current_stack_info_t * info) { (&this->stack){ info }; this->name = "Main Thread"; this->errno_ = 0; this->state = Inactive; this->notHalted = true; } void ?{}( thread * this, current_stack_info_t * info) { (&this->c){ info }; } //----------------------------------------------------------------------------- // Processor coroutine void ?{}(processorCtx_t * this, processor * proc) { (&this->c){}; this->proc = proc; proc->runner = this; } void ?{}(processorCtx_t * this, processor * proc, current_stack_info_t * info) { (&this->c){ info }; this->proc = proc; proc->runner = this; } void start(processor * this); void ?{}(processor * this) { this{ systemCluster }; } void ?{}(processor * this, cluster * cltr) { this->cltr = cltr; this->current_coroutine = NULL; this->current_thread = NULL; (&this->lock){}; this->terminated = false; start( this ); } void ?{}(processor * this, cluster * cltr, processorCtx_t * runner) { this->cltr = cltr; this->current_coroutine = NULL; this->current_thread = NULL; (&this->lock){}; this->terminated = false; this->runner = runner; LIB_DEBUG_PRINTF("Kernel : constructing processor context %p\n", runner); runner{ this }; } void ^?{}(processor * this) { if( ! this->terminated ) { LIB_DEBUG_PRINTF("Kernel : core %p signaling termination\n", this); this->terminated = true; lock( &this->lock ); } } void ?{}(cluster * this) { ( &this->ready_queue ){}; lock = 0; } void ^?{}(cluster * this) { } //----------------------------------------------------------------------------- // Processor running routines void main(processorCtx_t *); thread * nextThread(cluster * this); void scheduleInternal(processor * this, thread * dst); void spin(processor * this, unsigned int * spin_count); void thread_schedule( thread * thrd ); //Main of the processor contexts void main(processorCtx_t * runner) { processor * this = runner->proc; LIB_DEBUG_PRINTF("Kernel : core %p starting\n", this); fenv_t envp; fegetenv( &envp ); LIB_DEBUG_PRINTF("Kernel : mxcsr %x\n", envp.__mxcsr); thread * readyThread = NULL; for( unsigned int spin_count = 0; ! this->terminated; spin_count++ ) { readyThread = nextThread( this->cltr ); if(readyThread) { scheduleInternal(this, readyThread); spin_count = 0; } else { spin(this, &spin_count); } } LIB_DEBUG_PRINTF("Kernel : core %p unlocking thread\n", this); unlock( &this->lock ); LIB_DEBUG_PRINTF("Kernel : core %p terminated\n", this); } //Declarations for scheduleInternal extern void ThreadCtxSwitch(coroutine * src, coroutine * dst); // scheduleInternal runs a thread by context switching // from the processor coroutine to the target thread void scheduleInternal(processor * this, thread * dst) { this->thread_action = NoAction; // coroutine * proc_ctx = get_coroutine(this->ctx); // coroutine * thrd_ctx = get_coroutine(dst); // //Update global state // this->current_thread = dst; // // Context Switch to the thread // ThreadCtxSwitch(proc_ctx, thrd_ctx); // // when ThreadCtxSwitch returns we are back in the processor coroutine coroutine * proc_ctx = get_coroutine(this->runner); coroutine * thrd_ctx = get_coroutine(dst); thrd_ctx->last = proc_ctx; // context switch to specified coroutine // Which is now the current_coroutine // LIB_DEBUG_PRINTF("Kernel : switching to ctx %p (from %p, current %p)\n", thrd_ctx, proc_ctx, this->current_coroutine); this->current_thread = dst; this->current_coroutine = thrd_ctx; CtxSwitch( proc_ctx->stack.context, thrd_ctx->stack.context ); this->current_coroutine = proc_ctx; // LIB_DEBUG_PRINTF("Kernel : returned from ctx %p (to %p, current %p)\n", thrd_ctx, proc_ctx, this->current_coroutine); // when CtxSwitch returns we are back in the processor coroutine if(this->thread_action == Reschedule) { thread_schedule( dst ); } } // Handles spinning logic // TODO : find some strategy to put cores to sleep after some time void spin(processor * this, unsigned int * spin_count) { (*spin_count)++; } // Context invoker for processors // This is the entry point for processors (kernel threads) // It effectively constructs a coroutine by stealing the pthread stack void * CtxInvokeProcessor(void * arg) { processor * proc = (processor *) arg; this_processor = proc; // SKULLDUGGERY: We want to create a context for the processor coroutine // which is needed for the 2-step context switch. However, there is no reason // to waste the perfectly valid stack create by pthread. current_stack_info_t info; machine_context_t ctx; info.context = &ctx; processorCtx_t proc_cor_storage = { proc, &info }; LIB_DEBUG_PRINTF("Coroutine : created stack %p\n", proc_cor_storage.c.stack.base); //Set global state proc->current_coroutine = &proc->runner->c; proc->current_thread = NULL; //We now have a proper context from which to schedule threads LIB_DEBUG_PRINTF("Kernel : core %p created (%p, %p)\n", proc, proc->runner, &ctx); // SKULLDUGGERY: Since the coroutine doesn't have its own stack, we can't // resume it to start it like it normally would, it will just context switch // back to here. Instead directly call the main since we already are on the // appropriate stack. proc_cor_storage.c.state = Active; main( &proc_cor_storage ); proc_cor_storage.c.state = Halt; proc_cor_storage.c.notHalted = false; // Main routine of the core returned, the core is now fully terminated LIB_DEBUG_PRINTF("Kernel : core %p main ended (%p)\n", proc, proc->runner); return NULL; } void start(processor * this) { LIB_DEBUG_PRINTF("Kernel : Starting core %p\n", this); // pthread_attr_t attributes; // pthread_attr_init( &attributes ); pthread_create( &this->kernel_thread, NULL, CtxInvokeProcessor, (void*)this ); // pthread_attr_destroy( &attributes ); LIB_DEBUG_PRINTF("Kernel : core %p started\n", this); } //----------------------------------------------------------------------------- // Scheduler routines void thread_schedule( thread * thrd ) { assertf( thrd->next == NULL, "Expected null got %p", thrd->next ); spin_lock( &lock ); append( &systemProcessor->cltr->ready_queue, thrd ); spin_unlock( &lock ); } thread * nextThread(cluster * this) { spin_lock( &lock ); thread * head = pop_head( &this->ready_queue ); spin_unlock( &lock ); return head; } //----------------------------------------------------------------------------- // Kernel boot procedures void kernel_startup(void) { LIB_DEBUG_PRINTF("Kernel : Starting\n"); // Start by initializing the main thread // SKULLDUGGERY: the mainThread steals the process main thread // which will then be scheduled by the systemProcessor normally mainThread = (thread *)&mainThread_storage; current_stack_info_t info; mainThread{ &info }; // Initialize the system cluster systemCluster = (cluster *)&systemCluster_storage; systemCluster{}; // Initialize the system processor and the system processor ctx // (the coroutine that contains the processing control flow) systemProcessor = (processor *)&systemProcessor_storage; systemProcessor{ systemCluster, (processorCtx_t *)&systemProcessorCtx_storage }; // Add the main thread to the ready queue // once resume is called on systemProcessor->ctx the mainThread needs to be scheduled like any normal thread thread_schedule(mainThread); //initialize the global state variables this_processor = systemProcessor; this_processor->current_thread = mainThread; this_processor->current_coroutine = &mainThread->c; // SKULLDUGGERY: Force a context switch to the system processor to set the main thread's context to the current UNIX // context. Hence, the main thread does not begin through CtxInvokeThread, like all other threads. The trick here is that // mainThread is on the ready queue when this call is made. resume(systemProcessor->runner); // THE SYSTEM IS NOW COMPLETELY RUNNING LIB_DEBUG_PRINTF("Kernel : Started\n--------------------------------------------------\n\n"); } void kernel_shutdown(void) { LIB_DEBUG_PRINTF("\n--------------------------------------------------\nKernel : Shutting down\n"); // SKULLDUGGERY: Notify the systemProcessor it needs to terminates. // When its coroutine terminates, it return control to the mainThread // which is currently here systemProcessor->terminated = true; suspend(); // THE SYSTEM IS NOW COMPLETELY STOPPED // Destroy the system processor and its context in reverse order of construction // These were manually constructed so we need manually destroy them ^(systemProcessor->runner){}; ^(systemProcessor){}; // Final step, destroy the main thread since it is no longer needed // Since we provided a stack to this taxk it will not destroy anything ^(mainThread){}; LIB_DEBUG_PRINTF("Kernel : Shutdown complete\n"); } //----------------------------------------------------------------------------- // Locks void ?{}( simple_lock * this ) { ( &this->blocked ){}; } void ^?{}( simple_lock * this ) { } void lock( simple_lock * this ) { { spin_lock( &lock ); append( &this->blocked, this_thread() ); spin_unlock( &lock ); } suspend(); } void unlock( simple_lock * this ) { thread * it; while( it = pop_head( &this->blocked) ) { thread_schedule( it ); } } //----------------------------------------------------------------------------- // Queues void ?{}( simple_thread_list * this ) { this->head = NULL; this->tail = &this->head; } void append( simple_thread_list * this, thread * t ) { assert( t->next == NULL ); *this->tail = t; this->tail = &t->next; } thread * pop_head( simple_thread_list * this ) { thread * head = this->head; if( head ) { this->head = head->next; if( !head->next ) { this->tail = &this->head; } head->next = NULL; } return head; } // Local Variables: // // mode: c // // tab-width: 4 // // End: //