//
// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
//
// The contents of this file are covered under the licence agreement in the
// file "LICENCE" distributed with Cforall.
//
// kernel_private.hfa --
//
// Author           : Thierry Delisle
// Created On       : Mon Feb 13 12:27:26 2017
// Last Modified By : Peter A. Buhr
// Last Modified On : Sat Nov 30 19:25:02 2019
// Update Count     : 8
//

#pragma once

#include "kernel.hfa"
#include "thread.hfa"

#include "alarm.hfa"
#include "stats.hfa"


//-----------------------------------------------------------------------------
// Scheduler

struct __attribute__((aligned(128))) __scheduler_lock_id_t;

extern "C" {
	void disable_interrupts() OPTIONAL_THREAD;
	void enable_interrupts_noPoll();
	void enable_interrupts( __cfaabi_dbg_ctx_param );
}

void __schedule_thread( struct __processor_id_t *, $thread * ) __attribute__((nonnull (2)));

//Block current thread and release/wake-up the following resources
void __leave_thread() __attribute__((noreturn));

//-----------------------------------------------------------------------------
// Processor
void main(processorCtx_t *);

void * __create_pthread( pthread_t *, void * (*)(void *), void * );


struct event_kernel_t {
	alarm_list_t alarms;
	__spinlock_t lock;
};

extern event_kernel_t * event_kernel;

struct __cfa_kernel_preemption_state_t {
	bool enabled;
	bool in_progress;
	unsigned short disable_count;
};

extern volatile thread_local __cfa_kernel_preemption_state_t preemption_state __attribute__ ((tls_model ( "initial-exec" )));

extern cluster * mainCluster;

//-----------------------------------------------------------------------------
// Threads
extern "C" {
      void __cfactx_invoke_thread(void (*main)(void *), void * this);
}

__cfaabi_dbg_debug_do(
	extern void __cfaabi_dbg_thread_register  ( $thread * thrd );
	extern void __cfaabi_dbg_thread_unregister( $thread * thrd );
)

// KERNEL ONLY unpark with out disabling interrupts
void __unpark( struct __processor_id_t *, $thread * thrd __cfaabi_dbg_ctx_param2 );

//-----------------------------------------------------------------------------
// I/O
void __kernel_io_startup     ( cluster &, unsigned, bool );
void __kernel_io_finish_start( cluster & );
void __kernel_io_prepare_stop( cluster & );
void __kernel_io_shutdown    ( cluster &, bool );

//-----------------------------------------------------------------------------
// Utils
#define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)]

static inline uint32_t __tls_rand() {
	kernelTLS.rand_seed ^= kernelTLS.rand_seed << 6;
	kernelTLS.rand_seed ^= kernelTLS.rand_seed >> 21;
	kernelTLS.rand_seed ^= kernelTLS.rand_seed << 7;
	return kernelTLS.rand_seed;
}


void doregister( struct cluster & cltr );
void unregister( struct cluster & cltr );

void doregister( struct cluster * cltr, struct $thread & thrd );
void unregister( struct cluster * cltr, struct $thread & thrd );

void doregister( struct cluster * cltr, struct processor * proc );
void unregister( struct cluster * cltr, struct processor * proc );

//=======================================================================
// Cluster lock API
//=======================================================================
// Cells use by the reader writer lock
// while not generic it only relies on a opaque pointer
struct __attribute__((aligned(128))) __scheduler_lock_id_t {
	__processor_id_t * volatile handle;
	volatile bool lock;
};

// Lock-Free registering/unregistering of threads
// Register a processor to a given cluster and get its unique id in return
unsigned doregister( struct __processor_id_t * proc );

// Unregister a processor from a given cluster using its id, getting back the original pointer
void     unregister( struct __processor_id_t * proc );

//=======================================================================
// Reader-writer lock implementation
// Concurrent with doregister/unregister,
//    i.e., threads can be added at any point during or between the entry/exit

//-----------------------------------------------------------------------
// simple spinlock underlying the RWLock
// Blocking acquire
static inline void __atomic_acquire(volatile bool * ll) {
	while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) {
		while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED))
			asm volatile("pause");
	}
	/* paranoid */ verify(*ll);
}

// Non-Blocking acquire
static inline bool __atomic_try_acquire(volatile bool * ll) {
	return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST);
}

// Release
static inline void __atomic_unlock(volatile bool * ll) {
	/* paranoid */ verify(*ll);
	__atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
}

//-----------------------------------------------------------------------
// Reader-Writer lock protecting the ready-queues
// while this lock is mostly generic some aspects
// have been hard-coded to for the ready-queue for
// simplicity and performance
struct __scheduler_RWLock_t {
	// total cachelines allocated
	unsigned int max;

	// cachelines currently in use
	volatile unsigned int alloc;

	// cachelines ready to itereate over
	// (!= to alloc when thread is in second half of doregister)
	volatile unsigned int ready;

	// writer lock
	volatile bool lock;

	// data pointer
	__scheduler_lock_id_t * data;
};

void  ?{}(__scheduler_RWLock_t & this);
void ^?{}(__scheduler_RWLock_t & this);

extern __scheduler_RWLock_t * __scheduler_lock;

//-----------------------------------------------------------------------
// Reader side : acquire when using the ready queue to schedule but not
//  creating/destroying queues
static inline void ready_schedule_lock( struct __processor_id_t * proc) with(*__scheduler_lock) {
	unsigned iproc = proc->id;
	/*paranoid*/ verify(data[iproc].handle == proc);
	/*paranoid*/ verify(iproc < ready);

	// Step 1 : make sure no writer are in the middle of the critical section
	while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED))
		asm volatile("pause");

	// Fence needed because we don't want to start trying to acquire the lock
	// before we read a false.
	// Not needed on x86
	// std::atomic_thread_fence(std::memory_order_seq_cst);

	// Step 2 : acquire our local lock
	__atomic_acquire( &data[iproc].lock );
	/*paranoid*/ verify(data[iproc].lock);
}

static inline void ready_schedule_unlock( struct __processor_id_t * proc) with(*__scheduler_lock) {
	unsigned iproc = proc->id;
	/*paranoid*/ verify(data[iproc].handle == proc);
	/*paranoid*/ verify(iproc < ready);
	/*paranoid*/ verify(data[iproc].lock);
	__atomic_unlock(&data[iproc].lock);
}

//-----------------------------------------------------------------------
// Writer side : acquire when changing the ready queue, e.g. adding more
//  queues or removing them.
uint_fast32_t ready_mutate_lock( void );

void ready_mutate_unlock( uint_fast32_t /* value returned by lock */ );

//=======================================================================
// Ready-Queue API
//-----------------------------------------------------------------------
// push thread onto a ready queue for a cluster
// returns true if the list was previously empty, false otherwise
__attribute__((hot)) bool push(struct cluster * cltr, struct $thread * thrd);

//-----------------------------------------------------------------------
// pop thread from the ready queue of a cluster
// returns 0p if empty
__attribute__((hot)) struct $thread * pop(struct cluster * cltr);

//-----------------------------------------------------------------------
// Increase the width of the ready queue (number of lanes) by 4
void ready_queue_grow  (struct cluster * cltr);

//-----------------------------------------------------------------------
// Decrease the width of the ready queue (number of lanes) by 4
void ready_queue_shrink(struct cluster * cltr);

//-----------------------------------------------------------------------
// Statics call at the end of each thread to register statistics
#if !defined(__CFA_NO_STATISTICS__)
static inline struct __stats_t * __tls_stats() {
	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
	/* paranoid */ verify( kernelTLS.this_stats );
	return kernelTLS.this_stats;
}
#endif

// Local Variables: //
// mode: c //
// tab-width: 4 //
// End: //