//                              -*- Mode: CFA -*-
//
// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
//
// The contents of this file are covered under the licence agreement in the
// file "LICENCE" distributed with Cforall.
//
// kernel.c --
//
// Author           : Thierry Delisle
// Created On       : Tue Jan 17 12:27:26 2016
// Last Modified By : Thierry Delisle
// Last Modified On : --
// Update Count     : 0
//

//Start and stop routine for the kernel, declared first to make sure they run first
void kernel_startup(void)  __attribute__((constructor(101)));
void kernel_shutdown(void) __attribute__((destructor(101)));

//Header
#include "kernel"

//C Includes
#include <stddef.h>
extern "C" {
#include <fenv.h>
#include <sys/resource.h>
}

//CFA Includes
#include "libhdr.h"
#include "threads"

//Private includes
#define __CFA_INVOKE_PRIVATE__
#include "invoke.h"

static volatile int lock;

void spin_lock( volatile int *lock ) {
	for ( unsigned int i = 1;; i += 1 ) {
	  if ( *lock == 0 && __sync_lock_test_and_set_4( lock, 1 ) == 0 ) break;
	}
}

void spin_unlock( volatile int *lock ) {
	__sync_lock_release_4( lock );
}

//-----------------------------------------------------------------------------
// Kernel storage
struct processorCtx_t {
	processor * proc;
	coroutine c;
};

DECL_COROUTINE(processorCtx_t);

#define KERNEL_STORAGE(T,X) static char X##_storage[sizeof(T)]

KERNEL_STORAGE(processorCtx_t, systemProcessorCtx);
KERNEL_STORAGE(cluster, systemCluster);
KERNEL_STORAGE(processor, systemProcessor);
KERNEL_STORAGE(thread, mainThread);
KERNEL_STORAGE(machine_context_t, mainThread_context);

cluster * systemCluster;
processor * systemProcessor;
thread * mainThread;

//-----------------------------------------------------------------------------
// Global state

thread_local processor * this_processor;

processor * get_this_processor() {
	return this_processor;
}

coroutine * this_coroutine(void) {
	return this_processor->current_coroutine;
}

thread * this_thread(void) {
	return this_processor->current_thread;
}

//-----------------------------------------------------------------------------
// Main thread construction
struct current_stack_info_t {
	machine_context_t ctx;	
	unsigned int size;		// size of stack
	void *base;				// base of stack
	void *storage;			// pointer to stack
	void *limit;			// stack grows towards stack limit
	void *context;			// address of cfa_context_t
	void *top;				// address of top of storage
};

void ?{}( current_stack_info_t * this ) {
	CtxGet( &this->ctx );
	this->base = this->ctx.FP;
	this->storage = this->ctx.SP;

	rlimit r;
	int ret = getrlimit( RLIMIT_STACK, &r);
	this->size = r.rlim_cur;

	this->limit = (void *)(((intptr_t)this->base) - this->size);
	this->context = &mainThread_context_storage;
	this->top = this->base;
}

void ?{}( coStack_t * this, current_stack_info_t * info) {
	this->size = info->size;
	this->storage = info->storage;
	this->limit = info->limit;
	this->base = info->base;
	this->context = info->context;
	this->top = info->top;
	this->userStack = true;
}

void ?{}( coroutine * this, current_stack_info_t * info) {
	(&this->stack){ info };	
	this->name = "Main Thread";
	this->errno_ = 0;
	this->state = Inactive;
	this->notHalted = true;
}

void ?{}( thread * this, current_stack_info_t * info) {
	(&this->c){ info };
}

//-----------------------------------------------------------------------------
// Processor coroutine
void ?{}(processorCtx_t * this, processor * proc) {
	(&this->c){};
	this->proc = proc;
	proc->runner = this;
}

void ?{}(processorCtx_t * this, processor * proc, current_stack_info_t * info) {
	(&this->c){ info };
	this->proc = proc;
	proc->runner = this;
}

void start(processor * this);

void ?{}(processor * this) {
	this{ systemCluster };
}

void ?{}(processor * this, cluster * cltr) {
	this->cltr = cltr;
	this->current_coroutine = NULL;
	this->current_thread = NULL;
	(&this->lock){};
	this->terminated = false;

	start( this );
}

void ?{}(processor * this, cluster * cltr, processorCtx_t * runner) {
	this->cltr = cltr;
	this->current_coroutine = NULL;
	this->current_thread = NULL;
	(&this->lock){};
	this->terminated = false;

	this->runner = runner;
	LIB_DEBUG_PRINTF("Kernel : constructing processor context %p\n", runner);
	runner{ this };
}

void ^?{}(processor * this) {
	if( ! this->terminated ) {
		LIB_DEBUG_PRINTF("Kernel : core %p signaling termination\n", this);
		this->terminated = true;
		lock( &this->lock );
	}
}

void ?{}(cluster * this) {
	( &this->ready_queue ){};
	lock = 0;
}

void ^?{}(cluster * this) {
	
}

//-----------------------------------------------------------------------------
// Processor running routines
void main(processorCtx_t *);
thread * nextThread(cluster * this);
void scheduleInternal(processor * this, thread * dst);
void spin(processor * this, unsigned int * spin_count);
void thread_schedule( thread * thrd );

//Main of the processor contexts
void main(processorCtx_t * runner) {
	processor * this = runner->proc;
	LIB_DEBUG_PRINTF("Kernel : core %p starting\n", this);

	fenv_t envp;
	fegetenv( &envp );
	LIB_DEBUG_PRINTF("Kernel : mxcsr %x\n", envp.__mxcsr);

	thread * readyThread = NULL;
	for( unsigned int spin_count = 0; ! this->terminated; spin_count++ ) {
		
		readyThread = nextThread( this->cltr );

		if(readyThread) {
			scheduleInternal(this, readyThread);
			spin_count = 0;
		} else {
			spin(this, &spin_count);
		}		
	}

	LIB_DEBUG_PRINTF("Kernel : core %p unlocking thread\n", this);
	unlock( &this->lock );
	LIB_DEBUG_PRINTF("Kernel : core %p terminated\n", this);
}

//Declarations for scheduleInternal
extern void ThreadCtxSwitch(coroutine * src, coroutine * dst);

// scheduleInternal runs a thread by context switching 
// from the processor coroutine to the target thread 
void scheduleInternal(processor * this, thread * dst) {
	this->thread_action = NoAction;

	// coroutine * proc_ctx = get_coroutine(this->ctx);
	// coroutine * thrd_ctx = get_coroutine(dst);

	// //Update global state
	// this->current_thread = dst;

	// // Context Switch to the thread
	// ThreadCtxSwitch(proc_ctx, thrd_ctx);
	// // when ThreadCtxSwitch returns we are back in the processor coroutine

	coroutine * proc_ctx = get_coroutine(this->runner);
	coroutine * thrd_ctx = get_coroutine(dst);
      thrd_ctx->last = proc_ctx;
 
      // context switch to specified coroutine
      // Which is now the current_coroutine
      // LIB_DEBUG_PRINTF("Kernel : switching to ctx %p (from %p, current %p)\n", thrd_ctx, proc_ctx, this->current_coroutine);
      this->current_thread = dst;
      this->current_coroutine = thrd_ctx;
      CtxSwitch( proc_ctx->stack.context, thrd_ctx->stack.context );
      this->current_coroutine = proc_ctx;
      // LIB_DEBUG_PRINTF("Kernel : returned from ctx %p (to %p, current %p)\n", thrd_ctx, proc_ctx, this->current_coroutine);
 
      // when CtxSwitch returns we are back in the processor coroutine
	if(this->thread_action == Reschedule) {
		thread_schedule( dst );
	}
}

// Handles spinning logic
// TODO : find some strategy to put cores to sleep after some time
void spin(processor * this, unsigned int * spin_count) {
	(*spin_count)++;
}

// Context invoker for processors
// This is the entry point for processors (kernel threads)
// It effectively constructs a coroutine by stealing the pthread stack
void * CtxInvokeProcessor(void * arg) {
	processor * proc = (processor *) arg;
	this_processor = proc;
	// SKULLDUGGERY: We want to create a context for the processor coroutine
	// which is needed for the 2-step context switch. However, there is no reason
	// to waste the perfectly valid stack create by pthread. 
	current_stack_info_t info;
	machine_context_t ctx;
	info.context = &ctx;
	processorCtx_t proc_cor_storage = { proc, &info };

	LIB_DEBUG_PRINTF("Coroutine : created stack %p\n", proc_cor_storage.c.stack.base);

	//Set global state
	proc->current_coroutine = &proc->runner->c;
	proc->current_thread = NULL;

	//We now have a proper context from which to schedule threads
	LIB_DEBUG_PRINTF("Kernel : core %p created (%p, %p)\n", proc, proc->runner, &ctx);

	// SKULLDUGGERY: Since the coroutine doesn't have its own stack, we can't 
	// resume it to start it like it normally would, it will just context switch 
	// back to here. Instead directly call the main since we already are on the 
	// appropriate stack.
	proc_cor_storage.c.state = Active;
      main( &proc_cor_storage );
      proc_cor_storage.c.state = Halt;
      proc_cor_storage.c.notHalted = false;

	// Main routine of the core returned, the core is now fully terminated
	LIB_DEBUG_PRINTF("Kernel : core %p main ended (%p)\n", proc, proc->runner);	

	return NULL;
}

void start(processor * this) {
	LIB_DEBUG_PRINTF("Kernel : Starting core %p\n", this);
	
	// pthread_attr_t attributes;
	// pthread_attr_init( &attributes );

	pthread_create( &this->kernel_thread, NULL, CtxInvokeProcessor, (void*)this );

	// pthread_attr_destroy( &attributes );

	LIB_DEBUG_PRINTF("Kernel : core %p started\n", this);	
}

//-----------------------------------------------------------------------------
// Scheduler routines
void thread_schedule( thread * thrd ) {
	assertf( thrd->next == NULL, "Expected null got %p", thrd->next );
	
	spin_lock( &lock );
	append( &systemProcessor->cltr->ready_queue, thrd );
	spin_unlock( &lock );
}

thread * nextThread(cluster * this) {
	spin_lock( &lock );
	thread * head = pop_head( &this->ready_queue );
	spin_unlock( &lock );
	return head;
}

//-----------------------------------------------------------------------------
// Kernel boot procedures
void kernel_startup(void) {
	LIB_DEBUG_PRINTF("Kernel : Starting\n");	

	// Start by initializing the main thread
	// SKULLDUGGERY: the mainThread steals the process main thread 
	// which will then be scheduled by the systemProcessor normally
	mainThread = (thread *)&mainThread_storage;
	current_stack_info_t info;
	mainThread{ &info };

	// Initialize the system cluster
	systemCluster = (cluster *)&systemCluster_storage;
	systemCluster{};

	// Initialize the system processor and the system processor ctx
	// (the coroutine that contains the processing control flow)
	systemProcessor = (processor *)&systemProcessor_storage;
	systemProcessor{ systemCluster, (processorCtx_t *)&systemProcessorCtx_storage };

	// Add the main thread to the ready queue 
	// once resume is called on systemProcessor->ctx the mainThread needs to be scheduled like any normal thread
	thread_schedule(mainThread);

	//initialize the global state variables
	this_processor = systemProcessor;
	this_processor->current_thread = mainThread;
	this_processor->current_coroutine = &mainThread->c;

	// SKULLDUGGERY: Force a context switch to the system processor to set the main thread's context to the current UNIX
	// context. Hence, the main thread does not begin through CtxInvokeThread, like all other threads. The trick here is that
	// mainThread is on the ready queue when this call is made. 
	resume(systemProcessor->runner);



	// THE SYSTEM IS NOW COMPLETELY RUNNING
	LIB_DEBUG_PRINTF("Kernel : Started\n--------------------------------------------------\n\n");
}

void kernel_shutdown(void) {
	LIB_DEBUG_PRINTF("\n--------------------------------------------------\nKernel : Shutting down\n");

	// SKULLDUGGERY: Notify the systemProcessor it needs to terminates.
	// When its coroutine terminates, it return control to the mainThread
	// which is currently here
	systemProcessor->terminated = true;
	suspend();

	// THE SYSTEM IS NOW COMPLETELY STOPPED

	// Destroy the system processor and its context in reverse order of construction
	// These were manually constructed so we need manually destroy them
	^(systemProcessor->runner){};
	^(systemProcessor){};

	// Final step, destroy the main thread since it is no longer needed
	// Since we provided a stack to this taxk it will not destroy anything
	^(mainThread){};

	LIB_DEBUG_PRINTF("Kernel : Shutdown complete\n");	
}

//-----------------------------------------------------------------------------
// Locks
void ?{}( simple_lock * this ) {
	( &this->blocked ){};
}

void ^?{}( simple_lock * this ) {

}

void lock( simple_lock * this ) {
	{
		spin_lock( &lock );
		append( &this->blocked, this_thread() );
		spin_unlock( &lock );
	}
	suspend();
}

void unlock( simple_lock * this ) {
	thread * it;
	while( it = pop_head( &this->blocked) ) {
		thread_schedule( it );
	}
}

//-----------------------------------------------------------------------------
// Queues
void ?{}( simple_thread_list * this ) {
	this->head = NULL;
	this->tail = &this->head;
}

void append( simple_thread_list * this, thread * t ) {
	assert( t->next == NULL );
	*this->tail = t;
	this->tail = &t->next;
}

thread * pop_head( simple_thread_list * this ) {
	thread * head = this->head;
	if( head ) {
		this->head = head->next;
		if( !head->next ) {
			this->tail = &this->head;
		}
		head->next = NULL;
	}	
	
	return head;
}
// Local Variables: //
// mode: c //
// tab-width: 4 //
// End: //
