//
// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
//
// The contents of this file are covered under the licence agreement in the
// file "LICENCE" distributed with Cforall.
//
// monitor_desc.c --
//
// Author           : Thierry Delisle
// Created On       : Thd Feb 23 12:27:26 2017
// Last Modified By : Peter A. Buhr
// Last Modified On : Mon Jul 31 14:59:05 2017
// Update Count     : 3
//

#include "monitor"

#include <stdlib>

#include "libhdr.h"
#include "kernel_private.h"

//-----------------------------------------------------------------------------
// Forward declarations
static inline void set_owner( monitor_desc * this, thread_desc * owner );
static inline thread_desc * next_thread( monitor_desc * this );
static inline int is_accepted( thread_desc * owner, monitor_desc * this, monitor_desc ** group, int group_cnt, void (*func)() );

static inline void lock_all( spinlock ** locks, unsigned short count );
static inline void lock_all( monitor_desc ** source, spinlock ** /*out*/ locks, unsigned short count );
static inline void unlock_all( spinlock ** locks, unsigned short count );
static inline void unlock_all( monitor_desc ** locks, unsigned short count );

static inline void save_recursion   ( monitor_desc ** ctx, unsigned int * /*out*/ recursions, unsigned short count );
static inline void restore_recursion( monitor_desc ** ctx, unsigned int * /*in */ recursions, unsigned short count );

static inline void init     ( int count, monitor_desc ** monitors, __condition_node_t * waiter, __condition_criterion_t * criteria );
static inline void init_push( int count, monitor_desc ** monitors, __condition_node_t * waiter, __condition_criterion_t * criteria );

static inline thread_desc * check_condition( __condition_criterion_t * );
static inline void brand_condition( condition * );
static inline unsigned short insert_unique( thread_desc ** thrds, unsigned short end, thread_desc * val );

static inline thread_desc * search_entry_queue( __acceptable_t * acceptables, int acc_count, monitor_desc ** monitors, int count );

//-----------------------------------------------------------------------------
// Useful defines
#define wait_ctx(thrd, user_info)                               /* Create the necessary information to use the signaller stack       */ \
	__condition_node_t waiter = { thrd, count, user_info };   /* Create the node specific to this wait operation                   */ \
	__condition_criterion_t criteria[count];                  /* Create the creteria this wait operation needs to wake up          */ \
	init( count, monitors, &waiter, criteria );               /* Link everything together                                          */ \

#define wait_ctx_primed(thrd, user_info)                        /* Create the necessary information to use the signaller stack       */ \
	__condition_node_t waiter = { thrd, count, user_info };   /* Create the node specific to this wait operation                   */ \
	__condition_criterion_t criteria[count];                  /* Create the creteria this wait operation needs to wake up          */ \
	init_push( count, monitors, &waiter, criteria );          /* Link everything together and push it to the AS-Stack              */ \

#define monitor_ctx( mons, cnt )              /* Define that create the necessary struct for internal/external scheduling operations */ \
	monitor_desc ** monitors = mons;        /* Save the targeted monitors                                                          */ \
	unsigned short count = cnt;             /* Save the count to a local variable                                                  */ \
	unsigned int recursions[ count ];       /* Save the current recursion levels to restore them later                             */ \
	spinlock *   locks     [ count ];       /* We need to pass-in an array of locks to BlockInternal                               */ \

//-----------------------------------------------------------------------------
// Enter/Leave routines


extern "C" {
	// Enter single monitor
	static void __enter_monitor_desc( monitor_desc * this, monitor_desc ** group, int group_cnt, void (*func)() ) {
		// Lock the monitor spinlock, lock_yield to reduce contention
		lock_yield( &this->lock DEBUG_CTX2 );
		thread_desc * thrd = this_thread;

		LIB_DEBUG_PRINT_SAFE("Kernel : %10p Entering mon %p (%p)\n", thrd, this, this->owner);

		this->accepted_index = -1;
		if( !this->owner ) {
			// No one has the monitor, just take it
			set_owner( this, thrd );

			LIB_DEBUG_PRINT_SAFE("Kernel :  mon is free \n");
		}
		else if( this->owner == thrd) {
			// We already have the monitor, just not how many times we took it
			verify( this->recursion > 0 );
			this->recursion += 1;

			LIB_DEBUG_PRINT_SAFE("Kernel :  mon already owned \n");
		}
		else if( (this->accepted_index = is_accepted( thrd, this, group, group_cnt, func)) >= 0 ) {
			// Some one was waiting for us, enter
			set_owner( this, thrd );

			LIB_DEBUG_PRINT_SAFE("Kernel :  mon accepts \n");
		}
		else {
			LIB_DEBUG_PRINT_SAFE("Kernel :  blocking \n");

			// Some one else has the monitor, wait in line for it
			append( &this->entry_queue, thrd );
			BlockInternal( &this->lock );

			LIB_DEBUG_PRINT_SAFE("Kernel : %10p Entered  mon %p\n", thrd, this);

			// BlockInternal will unlock spinlock, no need to unlock ourselves
			return;
		}

		LIB_DEBUG_PRINT_SAFE("Kernel : %10p Entered  mon %p\n", thrd, this);

		// Release the lock and leave
		unlock( &this->lock );
		return;
	}

	// Leave single monitor
	void __leave_monitor_desc( monitor_desc * this ) {
		// Lock the monitor spinlock, lock_yield to reduce contention
		lock_yield( &this->lock DEBUG_CTX2 );

		verifyf( this_thread == this->owner, "Expected owner to be %p, got %p (r: %i)", this_thread, this->owner, this->recursion );

		// Leaving a recursion level, decrement the counter
		this->recursion -= 1;

		// If we haven't left the last level of recursion
		// it means we don't need to do anything
		if( this->recursion != 0) {
			unlock( &this->lock );
			return;
		}

		// Get the next thread, will be null on low contention monitor
		thread_desc * new_owner = next_thread( this );

		// We can now let other threads in safely
		unlock( &this->lock );

		//We need to wake-up the thread
		WakeThread( new_owner );
	}

	// Leave the thread monitor
	// last routine called by a thread.
	// Should never return
	void __leave_thread_monitor( thread_desc * thrd ) {
		monitor_desc * this = &thrd->mon;

		// Lock the monitor now
		lock_yield( &this->lock DEBUG_CTX2 );

		disable_interrupts();

		thrd->cor.state = Halted;

		verifyf( thrd == this->owner, "Expected owner to be %p, got %p (r: %i)", thrd, this->owner, this->recursion );

		// Leaving a recursion level, decrement the counter
		this->recursion -= 1;

		// If we haven't left the last level of recursion
		// it must mean there is an error
		if( this->recursion != 0) { abortf("Thread internal monitor has unbalanced recursion"); }

		// Fetch the next thread, can be null
		thread_desc * new_owner = next_thread( this );

		// Leave the thread, this will unlock the spinlock
		// Use leave thread instead of BlockInternal which is
		// specialized for this case and supports null new_owner
		LeaveThread( &this->lock, new_owner );

		// Control flow should never reach here!
	}
}

// Enter multiple monitor
// relies on the monitor array being sorted
static inline void enter(monitor_desc ** monitors, int count, void (*func)() ) {
	for(int i = 0; i < count; i++) {
		__enter_monitor_desc( monitors[i], monitors, count, func );
	}
}

// Leave multiple monitor
// relies on the monitor array being sorted
static inline void leave(monitor_desc ** monitors, int count) {
	for(int i = count - 1; i >= 0; i--) {
		__leave_monitor_desc( monitors[i] );
	}
}

// Ctor for monitor guard
// Sorts monitors before entering
void ?{}( monitor_guard_t & this, monitor_desc ** m, int count, void (*func)() ) {
	// Store current array
	this.m = m;
	this.count = count;

	// Sort monitors based on address -> TODO use a sort specialized for small numbers
	qsort(this.m, count);

	// Save previous thread context
	this.prev_mntrs = this_thread->current_monitors;
	this.prev_count = this_thread->current_monitor_count;
	this.prev_func  = this_thread->current_monitor_func;

	// Update thread context (needed for conditions)
	this_thread->current_monitors      = m;
	this_thread->current_monitor_count = count;
	this_thread->current_monitor_func  = func;

	// Enter the monitors in order
	enter( this.m, this.count, func );
}


// Dtor for monitor guard
void ^?{}( monitor_guard_t & this ) {
	// Leave the monitors in order
	leave( this.m, this.count );

	// Restore thread context
	this_thread->current_monitors      = this.prev_mntrs;
	this_thread->current_monitor_count = this.prev_count;
	this_thread->current_monitor_func  = this.prev_func;
}

//-----------------------------------------------------------------------------
// Internal scheduling types
void ?{}(__condition_node_t & this, thread_desc * waiting_thread, unsigned short count, uintptr_t user_info ) {
	this.waiting_thread = waiting_thread;
	this.count = count;
	this.next = NULL;
	this.user_info = user_info;
}

void ?{}(__condition_criterion_t & this ) {
	this.ready  = false;
	this.target = NULL;
	this.owner  = NULL;
	this.next   = NULL;
}

void ?{}(__condition_criterion_t & this, monitor_desc * target, __condition_node_t * owner ) {
	this.ready  = false;
	this.target = target;
	this.owner  = owner;
	this.next   = NULL;
}

//-----------------------------------------------------------------------------
// Internal scheduling
void wait( condition * this, uintptr_t user_info = 0 ) {
	brand_condition( this );

	// Check that everything is as expected
	assertf( this->monitors != NULL, "Waiting with no monitors (%p)", this->monitors );
	verifyf( this->monitor_count != 0, "Waiting with 0 monitors (%i)", this->monitor_count );
	verifyf( this->monitor_count < 32u, "Excessive monitor count (%i)", this->monitor_count );

	// Create storage for monitor context
	monitor_ctx( this->monitors, this->monitor_count );

	// Create the node specific to this wait operation
	wait_ctx( this_thread, user_info );

	// Append the current wait operation to the ones already queued on the condition
	// We don't need locks for that since conditions must always be waited on inside monitor mutual exclusion
	append( &this->blocked, &waiter );

	// Lock all monitors (aggregates the lock them as well)
	lock_all( monitors, locks, count );

	// DON'T unlock, ask the kernel to do it

	// Save monitor state
	save_recursion( monitors, recursions, count );

	// Find the next thread(s) to run
	unsigned short thread_count = 0;
	thread_desc * threads[ count ];
	for(int i = 0; i < count; i++) {
		threads[i] = 0;
	}

	// Remove any duplicate threads
	for( int i = 0; i < count; i++) {
		thread_desc * new_owner = next_thread( monitors[i] );
		thread_count = insert_unique( threads, thread_count, new_owner );
	}

	// Everything is ready to go to sleep
	BlockInternal( locks, count, threads, thread_count );


	// WE WOKE UP


	// We are back, restore the owners and recursions
	lock_all( locks, count );
	restore_recursion( monitors, recursions, count );
	unlock_all( locks, count );
}

bool signal( condition * this ) {
	if( is_empty( this ) ) { return false; }

	//Check that everything is as expected
	verify( this->monitors );
	verify( this->monitor_count != 0 );

	//Some more checking in debug
	LIB_DEBUG_DO(
		thread_desc * this_thrd = this_thread;
		if ( this->monitor_count != this_thrd->current_monitor_count ) {
			abortf( "Signal on condition %p made with different number of monitor(s), expected %i got %i", this, this->monitor_count, this_thrd->current_monitor_count );
		}

		for(int i = 0; i < this->monitor_count; i++) {
			if ( this->monitors[i] != this_thrd->current_monitors[i] ) {
				abortf( "Signal on condition %p made with different monitor, expected %p got %i", this, this->monitors[i], this_thrd->current_monitors[i] );
			}
		}
	);

	unsigned short count = this->monitor_count;

	// Lock all monitors
	lock_all( this->monitors, NULL, count );

	//Pop the head of the waiting queue
	__condition_node_t * node = pop_head( &this->blocked );

	//Add the thread to the proper AS stack
	for(int i = 0; i < count; i++) {
		__condition_criterion_t * crit = &node->criteria[i];
		assert( !crit->ready );
		push( &crit->target->signal_stack, crit );
	}

	//Release
	unlock_all( this->monitors, count );

	return true;
}

bool signal_block( condition * this ) {
	if( !this->blocked.head ) { return false; }

	//Check that everything is as expected
	verifyf( this->monitors != NULL, "Waiting with no monitors (%p)", this->monitors );
	verifyf( this->monitor_count != 0, "Waiting with 0 monitors (%i)", this->monitor_count );

	// Create storage for monitor context
	monitor_ctx( this->monitors, this->monitor_count );

	// Lock all monitors (aggregates the locks them as well)
	lock_all( monitors, locks, count );

	// Create the node specific to this wait operation
	wait_ctx_primed( this_thread, 0 )

	//save contexts
	save_recursion( monitors, recursions, count );

	//Find the thread to run
	thread_desc * signallee = pop_head( &this->blocked )->waiting_thread;
	for(int i = 0; i < count; i++) {
		set_owner( monitors[i], signallee );
	}

	//Everything is ready to go to sleep
	BlockInternal( locks, count, &signallee, 1 );


	// WE WOKE UP


	//We are back, restore the owners and recursions
	lock_all( locks, count );
	restore_recursion( monitors, recursions, count );
	unlock_all( locks, count );

	return true;
}

// Access the user_info of the thread waiting at the front of the queue
uintptr_t front( condition * this ) {
	verifyf( !is_empty(this),
		"Attempt to access user data on an empty condition.\n"
		"Possible cause is not checking if the condition is empty before reading stored data."
	);
	return this->blocked.head->user_info;
}

//-----------------------------------------------------------------------------
// Internal scheduling
int __accept_internal( unsigned short acc_count, __acceptable_t * acceptables ) {
	thread_desc * thrd = this_thread;

	// Create storage for monitor context
	monitor_ctx( acceptables->monitors, acceptables->count );

	// Lock all monitors (aggregates the lock them as well)
	lock_all( monitors, locks, count );

	// Create the node specific to this wait operation
	wait_ctx_primed( thrd, 0 );

	// Check if the entry queue
	thread_desc * next = search_entry_queue( acceptables, acc_count, monitors, count );

	LIB_DEBUG_PRINT_SAFE("Owner(s) :");
	for(int i = 0; i < count; i++) {
		LIB_DEBUG_PRINT_SAFE(" %p", monitors[i]->owner );
	}
	LIB_DEBUG_PRINT_SAFE("\n");

	LIB_DEBUG_PRINT_SAFE("Passing mon to %p\n", next);

	if( !next ) {
		// Update acceptables on the current monitors
		for(int i = 0; i < count; i++) {
			monitors[i]->acceptables = acceptables;
			monitors[i]->acceptable_count = acc_count;
		}
	}
	else {
		for(int i = 0; i < count; i++) {
			set_owner( monitors[i], next );
		}
	}


	save_recursion( monitors, recursions, count );


	// Everything is ready to go to sleep
	BlockInternal( locks, count, &next, next ? 1 : 0 );


	//WE WOKE UP


	//We are back, restore the owners and recursions
	lock_all( locks, count );
	restore_recursion( monitors, recursions, count );
	int acc_idx = monitors[0]->accepted_index;
	unlock_all( locks, count );

	return acc_idx;
}

//-----------------------------------------------------------------------------
// Utilities

static inline void set_owner( monitor_desc * this, thread_desc * owner ) {
	//Pass the monitor appropriately
	this->owner = owner;

	//We are passing the monitor to someone else, which means recursion level is not 0
	this->recursion = owner ? 1 : 0;
}

static inline thread_desc * next_thread( monitor_desc * this ) {
	//Check the signaller stack
	__condition_criterion_t * urgent = pop( &this->signal_stack );
	if( urgent ) {
		//The signaller stack is not empty,
		//regardless of if we are ready to baton pass,
		//we need to set the monitor as in use
		set_owner( this,  urgent->owner->waiting_thread );

		return check_condition( urgent );
	}

	// No signaller thread
	// Get the next thread in the entry_queue
	thread_desc * new_owner = pop_head( &this->entry_queue );
	set_owner( this, new_owner );

	return new_owner;
}

static inline int is_accepted( thread_desc * owner, monitor_desc * this, monitor_desc ** group, int group_cnt, void (*func)() ) {
	__acceptable_t* accs = this->acceptables; // Optim
	int acc_cnt = this->acceptable_count;

	// Check if there are any acceptable functions
	if( !accs ) return -1;

	// If this isn't the first monitor to test this, there is no reason to repeat the test.
	if( this != group[0] ) return group[0]->accepted_index;

	// For all acceptable functions check if this is the current function.
	OUT_LOOP:
	for( int i = 0; i < acc_cnt; i++ ) {
		__acceptable_t * acc = &accs[i];

		// if function matches, check the monitors
		if( acc->func == func ) {

			// If the group count is different then it can't be a match
			if( acc->count != group_cnt ) return -1;

			// Check that all the monitors match
			for( int j = 0; j < group_cnt; j++ ) {
				// If not a match, check next function
				if( acc->monitors[j] != group[j] ) continue OUT_LOOP;
			}

			// It's a complete match, accept the call
			return i;
		}
	}

	// No function matched
	return -1;
}

static inline void init( int count, monitor_desc ** monitors, __condition_node_t * waiter, __condition_criterion_t * criteria ) {
	for(int i = 0; i < count; i++) {
		(criteria[i]){ monitors[i], waiter };
	}

	waiter->criteria = criteria;
}

static inline void init_push( int count, monitor_desc ** monitors, __condition_node_t * waiter, __condition_criterion_t * criteria ) {
	for(int i = 0; i < count; i++) {
		(criteria[i]){ monitors[i], waiter };
		push( &criteria[i].target->signal_stack, &criteria[i] );
	}

	waiter->criteria = criteria;
}

static inline void lock_all( spinlock ** locks, unsigned short count ) {
	for( int i = 0; i < count; i++ ) {
		lock_yield( locks[i] DEBUG_CTX2 );
	}
}

static inline void lock_all( monitor_desc ** source, spinlock ** /*out*/ locks, unsigned short count ) {
	for( int i = 0; i < count; i++ ) {
		spinlock * l = &source[i]->lock;
		lock_yield( l DEBUG_CTX2 );
		if(locks) locks[i] = l;
	}
}

static inline void unlock_all( spinlock ** locks, unsigned short count ) {
	for( int i = 0; i < count; i++ ) {
		unlock( locks[i] );
	}
}

static inline void unlock_all( monitor_desc ** locks, unsigned short count ) {
	for( int i = 0; i < count; i++ ) {
		unlock( &locks[i]->lock );
	}
}


static inline void save_recursion   ( monitor_desc ** ctx, unsigned int * /*out*/ recursions, unsigned short count ) {
	for( int i = 0; i < count; i++ ) {
		recursions[i] = ctx[i]->recursion;
	}
}

static inline void restore_recursion( monitor_desc ** ctx, unsigned int * /*in */ recursions, unsigned short count ) {
	for( int i = 0; i < count; i++ ) {
		ctx[i]->recursion = recursions[i];
	}
}

// Function has 2 different behavior
// 1 - Marks a monitors as being ready to run
// 2 - Checks if all the monitors are ready to run
//     if so return the thread to run
static inline thread_desc * check_condition( __condition_criterion_t * target ) {
	__condition_node_t * node = target->owner;
	unsigned short count = node->count;
	__condition_criterion_t * criteria = node->criteria;

	bool ready2run = true;

	for(	int i = 0; i < count; i++ ) {

		// LIB_DEBUG_PRINT_SAFE( "Checking %p for %p\n", &criteria[i], target );
		if( &criteria[i] == target ) {
			criteria[i].ready = true;
			// LIB_DEBUG_PRINT_SAFE( "True\n" );
		}

		ready2run = criteria[i].ready && ready2run;
	}

	// LIB_DEBUG_PRINT_SAFE( "Runing %i\n", ready2run );
	return ready2run ? node->waiting_thread : NULL;
}

static inline void brand_condition( condition * this ) {
	thread_desc * thrd = this_thread;
	if( !this->monitors ) {
		// LIB_DEBUG_PRINT_SAFE("Branding\n");
		assertf( thrd->current_monitors != NULL, "No current monitor to brand condition %p", thrd->current_monitors );
		this->monitor_count = thrd->current_monitor_count;

		this->monitors = malloc( this->monitor_count * sizeof( *this->monitors ) );
		for( int i = 0; i < this->monitor_count; i++ ) {
			this->monitors[i] = thrd->current_monitors[i];
		}
	}
}

static inline unsigned short insert_unique( thread_desc ** thrds, unsigned short end, thread_desc * val ) {
	if( !val ) return end;

	for(int i = 0; i <= end; i++) {
		if( thrds[i] == val ) return end;
	}

	thrds[end] = val;
	return end + 1;
}


static inline bool match( __acceptable_t * acc, thread_desc * thrd ) {
	verify( thrd );
	verify( acc );
	if( acc->func != thrd->current_monitor_func ) return false;

	return true;
}

static inline thread_desc * search_entry_queue( __acceptable_t * acceptables, int acc_count, monitor_desc ** monitors, int count ) {

	__thread_queue_t * entry_queue = &monitors[0]->entry_queue;

	// For each thread in the entry-queue
	for(	thread_desc ** thrd_it = &entry_queue->head;
		*thrd_it;
		thrd_it = &(*thrd_it)->next)
	{
		// For each acceptable check if it matches
		__acceptable_t * acc_end = acceptables + acc_count;
		for( __acceptable_t * acc_it = acceptables; acc_it != acc_end; acc_it++ ) {
			// Check if we have a match
			if( match( acc_it, *thrd_it ) ) {

				// If we have a match return it
				// after removeing it from the entry queue
				return remove( entry_queue, thrd_it );
			}
		}
	}

	return NULL;
}
void ?{}( __condition_blocked_queue_t & this ) {
	this.head = NULL;
	this.tail = &this.head;
}

void append( __condition_blocked_queue_t * this, __condition_node_t * c ) {
	verify(this->tail != NULL);
	*this->tail = c;
	this->tail = &c->next;
}

__condition_node_t * pop_head( __condition_blocked_queue_t * this ) {
	__condition_node_t * head = this->head;
	if( head ) {
		this->head = head->next;
		if( !head->next ) {
			this->tail = &this->head;
		}
		head->next = NULL;
	}
	return head;
}

// Local Variables: //
// mode: c //
// tab-width: 4 //
// End: //
