//
// Cforall Version 1.0.0 Copyright (C) 2020 University of Waterloo
//
// The contents of this file are covered under the licence agreement in the
// file "LICENCE" distributed with Cforall.
//
// kernel/fwd.hfa -- PUBLIC
// Fundamental code needed to implement threading M.E.S. algorithms.
//
// Author           : Thierry Delisle
// Created On       : Thu Jul 30 16:46:41 2020
// Last Modified By :
// Last Modified On :
// Update Count     :
//

#pragma once

#include "bits/defs.hfa"
#include "bits/debug.hfa"

#ifdef __cforall
#include "bits/random.hfa"
#endif

struct thread$;
struct processor;
struct cluster;

enum __Preemption_Reason { __NO_PREEMPTION, __ALARM_PREEMPTION, __POLL_PREEMPTION, __MANUAL_PREEMPTION };

#define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)]

#ifdef __cforall
extern "C" {
	extern "Cforall" {
		extern __attribute__((aligned(64))) __thread struct KernelThreadData {
			struct thread$          * volatile this_thread;
			struct processor        * volatile this_processor;
			volatile bool sched_lock;

			struct {
				volatile unsigned short disable_count;
				volatile bool enabled;
				volatile bool in_progress;
			} preemption_state;

			PRNG_STATE_T random_state;

			struct {
				uint64_t fwd_seed;
				uint64_t bck_seed;
			} ready_rng;

			struct __stats_t        * volatile this_stats;

			#ifdef __CFA_WITH_VERIFY__
				// Debug, check if the rwlock is owned for reading
				bool in_sched_lock;
				unsigned sched_id;
			#endif
		} __cfaabi_tls __attribute__ ((tls_model ( "initial-exec" )));

		extern bool __preemption_enabled();

		static inline KernelThreadData & kernelTLS( void ) {
			/* paranoid */ verify( ! __preemption_enabled() );
			return __cfaabi_tls;
		}

		extern uintptr_t __cfatls_get( unsigned long int member );
		#define publicTLS_get( member ) ((typeof(__cfaabi_tls.member))__cfatls_get( __builtin_offsetof(KernelThreadData, member) ))

		static inline
			#ifdef __x86_64__							// 64-bit architecture
			uint64_t
			#else										// 32-bit architecture
			uint32_t
			#endif // __x86_64__
		__tls_rand() {
			return PRNG_NAME( kernelTLS().random_state );
		}

		static inline unsigned __tls_rand_fwd() {
			return LCGBI_fwd( kernelTLS().ready_rng.fwd_seed );
		}

		static inline unsigned __tls_rand_bck() {
			return LCGBI_bck( kernelTLS().ready_rng.bck_seed );
		}

		static inline void __tls_rand_advance_bck(void) {
			kernelTLS().ready_rng.bck_seed = kernelTLS().ready_rng.fwd_seed;
		}
	}

	extern void disable_interrupts();
	extern void enable_interrupts( bool poll = false );

	extern "Cforall" {
		enum unpark_hint { UNPARK_LOCAL, UNPARK_REMOTE };

		extern void park( void );
		extern void unpark( struct thread$ *, unpark_hint );
		static inline void unpark( struct thread$ * thrd ) { unpark(thrd, UNPARK_LOCAL); }
		static inline struct thread$ * active_thread () {
			struct thread$ * t = publicTLS_get( this_thread );
			/* paranoid */ verify( t );
			return t;
		}

		extern bool force_yield( enum __Preemption_Reason );

		static inline void yield() {
			force_yield(__MANUAL_PREEMPTION);
		}

		// Yield: yield N times
		static inline void yield( size_t times ) {
			for ( times ) {
				yield();
			}
		}

		// Semaphore which only supports a single thread
		struct single_sem {
			struct thread$ * volatile ptr;
		};

		static inline {
			void  ?{}(single_sem & this) {
				this.ptr = 0p;
			}

			void ^?{}(single_sem &) {}

			bool wait(single_sem & this) {
				for () {
					struct thread$ * expected = this.ptr;
					if (expected == 1p) {
						if (__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
							return false;
						}
					}
					else {
						/* paranoid */ verify( expected == 0p );
						if (__atomic_compare_exchange_n(&this.ptr, &expected, active_thread(), false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
							park();
							return true;
						}
					}

				}
			}

			bool post(single_sem & this) {
				for () {
					struct thread$ * expected = this.ptr;
					if (expected == 1p) return false;
					if (expected == 0p) {
						if (__atomic_compare_exchange_n(&this.ptr, &expected, 1p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
							return false;
						}
					}
					else {
						if (__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
							unpark( expected );
							return true;
						}
					}
				}
			}
		}

		// Synchronozation primitive which only supports a single thread and one post
		// Similar to a binary semaphore with a 'one shot' semantic
		// is expected to be discarded after each party call their side
		enum(struct thread$ *) { oneshot_ARMED = 0p, oneshot_FULFILLED = 1p };
		struct oneshot {
			// Internal state :
			// armed      : initial state, wait will block
			// fulfilled  : wait won't block
			// any thread : a thread is currently waiting
			struct thread$ * volatile ptr;
		};

		static inline {
			void  ?{}(oneshot & this) {
				this.ptr = oneshot_ARMED;
			}

			void ^?{}(oneshot &) {}

			// Wait for the post, return immidiately if it already happened.
			// return true if the thread was parked
			bool wait(oneshot & this) {
				for () {
					struct thread$ * expected = this.ptr;
					if (expected == oneshot_FULFILLED) return false;
					if (__atomic_compare_exchange_n(&this.ptr, &expected, active_thread(), false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
						park();
						/* paranoid */ verify( this.ptr == oneshot_FULFILLED );
						return true;
					}
				}
			}

			// Mark as fulfilled, wake thread if needed
			// return true if a thread was unparked
			thread$ * post(oneshot & this, bool do_unpark = true) {
				struct thread$ * got = __atomic_exchange_n( &this.ptr, oneshot_FULFILLED, __ATOMIC_SEQ_CST);
				if ( got == oneshot_ARMED || got == oneshot_FULFILLED ) return 0p;
				if (do_unpark) unpark( got );
				return got;
			}
		}

		// base types for future to build upon
		// It is based on the 'oneshot' type to allow multiple futures
		// to block on the same instance, permitting users to block a single
		// thread on "any of" [a given set of] futures.
		// does not support multiple threads waiting on the same future
		enum(struct oneshot *) { future_ARMED = 0p, future_FULFILLED = 1p, future_PROGRESS = 2p, future_ABANDONED = 3p };
		struct future_t {
			// Internal state :
			// armed       : initial state, wait will block
			// fulfilled   : result is ready, wait won't block
			// progress    : someone else is in the process of fulfilling this
			// abandoned   : client no longer cares, server should delete
			// any oneshot : a context has been setup to wait, a thread could wait on it
			struct oneshot * volatile ptr;
		};

		static inline {
			void  ?{}(future_t & this) {
				this.ptr = future_ARMED;
			}

			void ^?{}(future_t &) {}

			void reset(future_t & this) {
				// needs to be in 0p or 1p
				__atomic_exchange_n( &this.ptr, future_ARMED, __ATOMIC_SEQ_CST);
			}

			// check if the future is available
			bool available( future_t & this ) {
				while( this.ptr == future_PROGRESS ) Pause();
				return this.ptr == future_FULFILLED;
			}

			// Prepare the future to be waited on
			// intented to be use by wait, wait_any, waitfor, etc. rather than used directly
			bool setup( future_t & this, oneshot & wait_ctx ) {
				/* paranoid */ verify( wait_ctx.ptr == oneshot_ARMED || wait_ctx.ptr == oneshot_FULFILLED );
				// The future needs to set the wait context
				for () {
					struct oneshot * expected = this.ptr;
					// Is the future already fulfilled?
					if (expected == future_FULFILLED) return false; // Yes, just return false (didn't block)

					// The future is not fulfilled, try to setup the wait context
					if (__atomic_compare_exchange_n(&this.ptr, &expected, &wait_ctx, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
						return true;
					}
				}
			}

			// Stop waiting on a future
			// When multiple futures are waited for together in "any of" pattern
			// futures that weren't fulfilled before the thread woke up
			// should retract the wait ctx
			// intented to be use by wait, wait_any, waitfor, etc. rather than used directly
			bool retract( future_t & this, oneshot & wait_ctx ) {
				struct oneshot * expected = &wait_ctx;

				// attempt to remove the context so it doesn't get consumed.
				if (__atomic_compare_exchange_n( &this.ptr, &expected, future_ARMED, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
					// we still have the original context, then no one else saw it
					return false;
				}

				// expected == ARMED: future was never actually setup, just return
				if ( expected == future_ARMED ) return false;

				// expected == FULFILLED: the future is ready and the context was fully consumed
				// the server won't use the pointer again
				// It is safe to delete (which could happen after the return)
				if ( expected == future_FULFILLED ) return true;

				// expected == PROGRESS: the future is ready but the context hasn't fully been consumed
				// spin until it is safe to move on
				if ( expected == future_PROGRESS ) {
					while( this.ptr != future_FULFILLED ) Pause();
					/* paranoid */ verify( this.ptr == future_FULFILLED );
					return true;
				}

				// anything else: the future was setup with a different context ?!?!
				// something went wrong here, abort
				abort("Future in unexpected state");
			}

			// Mark the future as abandoned, meaning it will be deleted by the server
			bool abandon( future_t & this ) {
				/* paranoid */ verify( this.ptr != future_ABANDONED );

				// Mark the future as abandonned
				struct oneshot * got = __atomic_exchange_n( &this.ptr, future_ABANDONED, __ATOMIC_SEQ_CST);

				// If the future isn't already fulfilled, let the server delete it
				if ( got == future_ARMED ) return false;

				// got == PROGRESS: the future is ready but the context hasn't fully been consumed
				// spin until it is safe to move on
				if ( got == future_PROGRESS ) {
					while( this.ptr != future_FULFILLED ) Pause();
					got = future_FULFILLED;
				}

				// The future is completed delete it now
				/* paranoid */ verify( this.ptr != future_FULFILLED );
				free( &this );
				return true;
			}

			// from the server side, mark the future as fulfilled
			// delete it if needed

			thread$ * fulfil( future_t & this, bool do_unpark = true  ) {
				for () {
					struct oneshot * expected = this.ptr;

					#if defined(__GNUC__) && __GNUC__ >= 7
					// SKULLDUGGERY: gcc bug does not handle push/pop for -Wfree-nonheap-object
					//#pragma GCC diagnostic push
					#pragma GCC diagnostic ignored "-Wfree-nonheap-object"
					#endif

					if ( expected == future_ABANDONED ) { free( &this ); return 0p; }

					#if defined(__GNUC__) && __GNUC__ >= 7
					//#pragma GCC diagnostic pop
					#endif

					/* paranoid */ verify( expected != future_FULFILLED ); // Future is already fulfilled, should not happen
					/* paranoid */ verify( expected != future_PROGRESS ); // Future is bein fulfilled by someone else, this is even less supported then the previous case.

					// If there is a wait context, we need to consume it and mark it as consumed after
					// If there is no context then we can skip the in progress phase
					struct oneshot * want = expected == future_ARMED ? future_FULFILLED : future_PROGRESS;
					if (__atomic_compare_exchange_n(&this.ptr, &expected, want, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
						if ( expected == future_ARMED ) { return 0p; }
						thread$ * ret = post( *expected, do_unpark );
						__atomic_store_n( &this.ptr, future_FULFILLED, __ATOMIC_SEQ_CST);
						return ret;
					}
				}

			}

			// Wait for the future to be fulfilled
			bool wait( future_t & this ) {
				oneshot temp;
				if ( !setup(this, temp) ) return false;

				// Wait context is setup, just wait on it
				bool ret = wait( temp );

				// Wait for the future to tru
				while( this.ptr == future_PROGRESS ) Pause();
				// Make sure the state makes sense
				// Should be fulfilled, could be in progress but it's out of date if so
				// since if that is the case, the oneshot was fulfilled (unparking this thread)
				// and the oneshot should not be needed any more
				struct oneshot * was __attribute__((unused)) = this.ptr; // used in option verify
				/* paranoid */ verifyf( was == future_FULFILLED, "Expected this.ptr to be 1p, was %p\n", was );

				// Mark the future as fulfilled, to be consistent
				// with potential calls to avail
				// this.ptr = 1p;
				return ret;
			}

			// Wait for any future to be fulfilled
			forall(T& | sized(T) | { bool setup( T&, oneshot & ); bool retract( T&, oneshot & ); })
			T & wait_any( T * futures, size_t num_futures ) {
				oneshot temp;

				// setup all futures
				// if any are already satisfied return
				for ( i; num_futures ) {
					if ( !setup(futures[i], temp) ) return futures[i];
				}

				// Wait context is setup, just wait on it
				wait( temp );

				size_t ret;
				// attempt to retract all futures
				for ( i; num_futures ) {
					if ( retract( futures[i], temp ) ) ret = i;
				}

				return futures[ret];
			}
		}

		//-----------------------------------------------------------------------
		// Statics call at the end of each thread to register statistics
		#if !defined(__CFA_NO_STATISTICS__)
			static inline struct __stats_t * __tls_stats() {
				/* paranoid */ verify( ! __preemption_enabled() );
				/* paranoid */ verify( kernelTLS().this_stats );
				return kernelTLS().this_stats;
			}

			#define __STATS__(in_kernel, ...) { \
				if ( !(in_kernel) ) disable_interrupts(); \
				with ( *__tls_stats() ) { \
					__VA_ARGS__ \
				} \
				if ( !(in_kernel) ) enable_interrupts(); \
			}
			#if defined(CFA_HAVE_LINUX_IO_URING_H)
				#define __IO_STATS__(in_kernel, ...) { \
					if ( !(in_kernel) ) disable_interrupts(); \
					with ( *__tls_stats() ) { \
						__VA_ARGS__ \
					} \
					if ( !(in_kernel) ) enable_interrupts(); \
				}
			#else
				#define __IO_STATS__(in_kernel, ...)
			#endif
		#else
			#define __STATS__(in_kernel, ...)
			#define __IO_STATS__(in_kernel, ...)
		#endif
	}
}
#endif // #endif