//
// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
//
// The contents of this file are covered under the licence agreement in the
// file "LICENCE" distributed with Cforall.
//
// bits/locks.hfa -- Fast internal locks.
//
// Author           : Thierry Delisle
// Created On       : Tue Oct 31 15:14:38 2017
// Last Modified By : Peter A. Buhr
// Last Modified On : Wed Aug 12 14:18:07 2020
// Update Count     : 13
//

#pragma once

#include "bits/debug.hfa"
#include "bits/defs.hfa"
#include <assert.h>

#ifdef __cforall
	extern "C" {
		#include <pthread.h>
	}
#endif

// pause to prevent excess processor bus usage
#if defined( __i386 ) || defined( __x86_64 )
	#define Pause() __asm__ __volatile__ ( "pause" : : : )
#elif defined( __ARM_ARCH )
	#define Pause() __asm__ __volatile__ ( "YIELD" : : : )
#else
	#error unsupported architecture
#endif

struct __spinlock_t {
	// Wrap in struct to prevent false sharing with debug info
	volatile bool lock;
	#ifdef __CFA_DEBUG__
		// previous function to acquire the lock
		const char * prev_name;
		// previous thread to acquire the lock
		void* prev_thrd;
	#endif
};

#ifdef __cforall
	extern "C" {
		extern void disable_interrupts() OPTIONAL_THREAD;
		extern void enable_interrupts_noPoll() OPTIONAL_THREAD;

		#ifdef __CFA_DEBUG__
			void __cfaabi_dbg_record_lock(__spinlock_t & this, const char prev_name[]);
		#else
			#define __cfaabi_dbg_record_lock(x, y)
		#endif
	}

	static inline void ?{}( __spinlock_t & this ) {
		this.lock = 0;
	}

	// Lock the spinlock, return false if already acquired
	static inline bool try_lock  ( __spinlock_t & this __cfaabi_dbg_ctx_param2 ) {
		disable_interrupts();
		bool result = (this.lock == 0) && (__atomic_test_and_set( &this.lock, __ATOMIC_ACQUIRE ) == 0);
		if( result ) {
			__cfaabi_dbg_record_lock( this, caller );
		} else {
			enable_interrupts_noPoll();
		}
		return result;
	}

	// Lock the spinlock, spin if already acquired
	static inline void lock( __spinlock_t & this __cfaabi_dbg_ctx_param2 ) {
		#ifndef NOEXPBACK
			enum { SPIN_START = 4, SPIN_END = 64 * 1024, };
			unsigned int spin = SPIN_START;
		#endif

		disable_interrupts();
		for ( unsigned int i = 1;; i += 1 ) {
			if ( (this.lock == 0) && (__atomic_test_and_set( &this.lock, __ATOMIC_ACQUIRE ) == 0) ) break;
			#ifndef NOEXPBACK
				// exponential spin
				for ( volatile unsigned int s = 0; s < spin; s += 1 ) Pause();

				// slowly increase by powers of 2
				if ( i % 64 == 0 ) spin += spin;

				// prevent overflow
				if ( spin > SPIN_END ) spin = SPIN_START;
			#else
				Pause();
			#endif
		}
		__cfaabi_dbg_record_lock( this, caller );
	}

	static inline void unlock( __spinlock_t & this ) {
		__atomic_clear( &this.lock, __ATOMIC_RELEASE );
		enable_interrupts_noPoll();
	}


	#ifdef __CFA_WITH_VERIFY__
		extern bool __cfaabi_dbg_in_kernel();
	#endif

	extern "C" {
		char * strerror(int);
	}
	#define CHECKED(x) { int err = x; if( err != 0 ) abort("KERNEL ERROR: Operation \"" #x "\" return error %d - %s\n", err, strerror(err)); }

	struct __bin_sem_t {
		pthread_mutex_t 	lock;
		pthread_cond_t  	cond;
		int     		val;
	};

	static inline void ?{}(__bin_sem_t & this) with( this ) {
		// Create the mutex with error checking
		pthread_mutexattr_t mattr;
		pthread_mutexattr_init( &mattr );
		pthread_mutexattr_settype( &mattr, PTHREAD_MUTEX_ERRORCHECK_NP);
		pthread_mutex_init(&lock, &mattr);

		pthread_cond_init (&cond, (const pthread_condattr_t *)0p);  // workaround trac#208: cast should not be required
		val = 0;
	}

	static inline void ^?{}(__bin_sem_t & this) with( this ) {
		CHECKED( pthread_mutex_destroy(&lock) );
		CHECKED( pthread_cond_destroy (&cond) );
	}

	static inline void wait(__bin_sem_t & this) with( this ) {
		verify(__cfaabi_dbg_in_kernel());
		CHECKED( pthread_mutex_lock(&lock) );
			while(val < 1) {
				pthread_cond_wait(&cond, &lock);
			}
			val -= 1;
		CHECKED( pthread_mutex_unlock(&lock) );
	}

	static inline bool post(__bin_sem_t & this) with( this ) {
		bool needs_signal = false;

		CHECKED( pthread_mutex_lock(&lock) );
			if(val < 1) {
				val += 1;
				pthread_cond_signal(&cond);
				needs_signal = true;
			}
		CHECKED( pthread_mutex_unlock(&lock) );

		return needs_signal;
	}

	#undef CHECKED

	struct $thread;
	extern void park( __cfaabi_dbg_ctx_param );
	extern void unpark( struct $thread * this __cfaabi_dbg_ctx_param2 );
	static inline struct $thread * active_thread ();

	// Semaphore which only supports a single thread
	struct single_sem {
		struct $thread * volatile ptr;
	};

	static inline {
		void  ?{}(single_sem & this) {
			this.ptr = 0p;
		}

		void ^?{}(single_sem & this) {}

		bool wait(single_sem & this) {
			for() {
				struct $thread * expected = this.ptr;
				if(expected == 1p) {
					if(__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
						return false;
					}
				}
				else {
					/* paranoid */ verify( expected == 0p );
					if(__atomic_compare_exchange_n(&this.ptr, &expected, active_thread(), false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
						park( __cfaabi_dbg_ctx );
						return true;
					}
				}

			}
		}

		bool post(single_sem & this) {
			for() {
				struct $thread * expected = this.ptr;
				if(expected == 1p) return false;
				if(expected == 0p) {
					if(__atomic_compare_exchange_n(&this.ptr, &expected, 1p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
						return false;
					}
				}
				else {
					if(__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
						unpark( expected __cfaabi_dbg_ctx2 );
						return true;
					}
				}
			}
		}
	}

	// Synchronozation primitive which only supports a single thread and one post
	// Similar to a binary semaphore with a 'one shot' semantic
	// is expected to be discarded after each party call their side
	struct oneshot {
		// Internal state :
		//     0p     : is initial state (wait will block)
		//     1p     : fulfilled (wait won't block)
		// any thread : a thread is currently waiting
		struct $thread * volatile ptr;
	};

	static inline {
		void  ?{}(oneshot & this) {
			this.ptr = 0p;
		}

		void ^?{}(oneshot & this) {}

		// Wait for the post, return immidiately if it already happened.
		// return true if the thread was parked
		bool wait(oneshot & this) {
			for() {
				struct $thread * expected = this.ptr;
				if(expected == 1p) return false;
				/* paranoid */ verify( expected == 0p );
				if(__atomic_compare_exchange_n(&this.ptr, &expected, active_thread(), false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
					park( __cfaabi_dbg_ctx );
					/* paranoid */ verify( this.ptr == 1p );
					return true;
				}
			}
		}

		// Mark as fulfilled, wake thread if needed
		// return true if a thread was unparked
		bool post(oneshot & this) {
			struct $thread * got = __atomic_exchange_n( &this.ptr, 1p, __ATOMIC_SEQ_CST);
			if( got == 0p ) return false;
			unpark( got __cfaabi_dbg_ctx2 );
			return true;
		}
	}

	// base types for future to build upon
	// It is based on the 'oneshot' type to allow multiple futures
	// to block on the same instance, permitting users to block a single
	// thread on "any of" [a given set of] futures.
	// does not support multiple threads waiting on the same future
	struct future_t {
		// Internal state :
		//     0p      : is initial state (wait will block)
		//     1p      : fulfilled (wait won't block)
		//     2p      : in progress ()
		//     3p      : abandoned, server should delete
		// any oneshot : a context has been setup to wait, a thread could wait on it
		struct oneshot * volatile ptr;
	};

	static inline {
		void  ?{}(future_t & this) {
			this.ptr = 0p;
		}

		void ^?{}(future_t & this) {}

		// check if the future is available
		bool available( future_t & this ) {
			return this.ptr == 1p;
		}

		// Prepare the future to be waited on
		// intented to be use by wait, wait_any, waitfor, etc. rather than used directly
		bool setup( future_t & this, oneshot & wait_ctx ) {
			/* paranoid */ verify( wait_ctx.ptr == 0p );
			// The future needs to set the wait context
			for() {
				struct oneshot * expected = this.ptr;
				// Is the future already fulfilled?
				if(expected == 1p) return false; // Yes, just return false (didn't block)

				// The future is not fulfilled, try to setup the wait context
				/* paranoid */ verify( expected == 0p );
				if(__atomic_compare_exchange_n(&this.ptr, &expected, &wait_ctx, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
					return true;
				}
			}
		}

		// Stop waiting on a future
		// When multiple futures are waited for together in "any of" pattern
		// futures that weren't fulfilled before the thread woke up
		// should retract the wait ctx
		// intented to be use by wait, wait_any, waitfor, etc. rather than used directly
		void retract( future_t & this, oneshot & wait_ctx ) {
			// Remove the wait context
			struct oneshot * got = __atomic_exchange_n( &this.ptr, 0p, __ATOMIC_SEQ_CST);

			// got == 0p: future was never actually setup, just return
			if( got == 0p ) return;

			// got == wait_ctx: since fulfil does an atomic_swap,
			// if we got back the original then no one else saw context
			// It is safe to delete (which could happen after the return)
			if( got == &wait_ctx ) return;

			// got == 1p: the future is ready and the context was fully consumed
			// the server won't use the pointer again
			// It is safe to delete (which could happen after the return)
			if( got == 1p ) return;

			// got == 2p: the future is ready but the context hasn't fully been consumed
			// spin until it is safe to move on
			if( got == 2p ) {
				while( this.ptr != 1p ) Pause();
				return;
			}

			// got == any thing else, something wen't wrong here, abort
			abort("Future in unexpected state");
		}

		// Mark the future as abandoned, meaning it will be deleted by the server
		void abandon( future_t & this ) {
			struct oneshot * got = __atomic_exchange_n( &this.ptr, 3p, __ATOMIC_SEQ_CST);

			// got == 2p: the future is ready but the context hasn't fully been consumed
			// spin until it is safe to move on
			if( got == 2p ) {
				while( this.ptr != 1p ) Pause();
			}
			return;
		}

		// from the server side, mark the future as fulfilled
		// delete it if needed
		bool fulfil( future_t & this ) {
			for() {
				struct oneshot * expected = this.ptr;
				// was this abandoned?
				if( expected == 3p ) { free( &this ); return false; }

				/* paranoid */ verify( expected != 1p ); // Future is already fulfilled, should not happen
				/* paranoid */ verify( expected != 2p ); // Future is bein fulfilled by someone else, this is even less supported then the previous case.

				// If there is a wait context, we need to consume it and mark it as consumed after
				// If there is no context then we can skip the in progress phase
				struct oneshot * want = expected == 0p ? 1p : 2p;
				if(__atomic_compare_exchange_n(&this.ptr, &expected, want, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
					if( expected == 0p ) { /* paranoid */ verify( this.ptr == 1p); return false; }
					bool ret = post( *expected );
					__atomic_store_n( &this.ptr, 1p, __ATOMIC_SEQ_CST);
					return ret;
				}
			}

		}

		// Wait for the future to be fulfilled
		bool wait( future_t & this ) {
			oneshot temp;
			if( !setup(this, temp) ) return false;

			// Wait context is setup, just wait on it
			bool ret = wait( temp );

			// Wait for the future to tru
			while( this.ptr == 2p ) Pause();
			// Make sure the state makes sense
			// Should be fulfilled, could be in progress but it's out of date if so
			// since if that is the case, the oneshot was fulfilled (unparking this thread)
			// and the oneshot should not be needed any more
			__attribute__((unused)) struct oneshot * was = this.ptr;
			/* paranoid */ verifyf( was == 1p, "Expected this.ptr to be 1p, was %p\n", was );

			// Mark the future as fulfilled, to be consistent
			// with potential calls to avail
			// this.ptr = 1p;
			return ret;
		}
	}
#endif