// // Cforall Version 1.0.0 Copyright (C) 2020 University of Waterloo // // The contents of this file are covered under the licence agreement in the // file "LICENCE" distributed with Cforall. // // kernel/fwd.hfa -- PUBLIC // Fundamental code needed to implement threading M.E.S. algorithms. // // Author : Thierry Delisle // Created On : Thu Jul 30 16:46:41 2020 // Last Modified By : // Last Modified On : // Update Count : // #pragma once #include "bits/defs.hfa" #include "bits/debug.hfa" #ifdef __cforall #include "bits/random.hfa" #endif struct thread$; struct processor; struct cluster; enum __Preemption_Reason { __NO_PREEMPTION, __ALARM_PREEMPTION, __POLL_PREEMPTION, __MANUAL_PREEMPTION }; #define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)] #ifdef __cforall extern "C" { extern "Cforall" { extern __attribute__((aligned(64))) __thread struct KernelThreadData { struct thread$ * volatile this_thread; struct processor * volatile this_processor; volatile bool sched_lock; struct { volatile unsigned short disable_count; volatile bool enabled; volatile bool in_progress; } preemption_state; PRNG_STATE_T random_state; struct { uint64_t fwd_seed; uint64_t bck_seed; } ready_rng; struct __stats_t * volatile this_stats; #ifdef __CFA_WITH_VERIFY__ // Debug, check if the rwlock is owned for reading bool in_sched_lock; unsigned sched_id; #endif } __cfaabi_tls __attribute__ ((tls_model ( "initial-exec" ))); extern bool __preemption_enabled(); static inline KernelThreadData & kernelTLS( void ) { /* paranoid */ verify( ! __preemption_enabled() ); return __cfaabi_tls; } extern uintptr_t __cfatls_get( unsigned long int member ); #define publicTLS_get( member ) ((typeof(__cfaabi_tls.member))__cfatls_get( __builtin_offsetof(KernelThreadData, member) )) static inline #ifdef __x86_64__ // 64-bit architecture uint64_t #else // 32-bit architecture uint32_t #endif // __x86_64__ __tls_rand() { return PRNG_NAME( kernelTLS().random_state ); } static inline unsigned __tls_rand_fwd() { return LCGBI_fwd( kernelTLS().ready_rng.fwd_seed ); } static inline unsigned __tls_rand_bck() { return LCGBI_bck( kernelTLS().ready_rng.bck_seed ); } static inline void __tls_rand_advance_bck(void) { kernelTLS().ready_rng.bck_seed = kernelTLS().ready_rng.fwd_seed; } } extern void disable_interrupts(); extern void enable_interrupts( bool poll = false ); extern "Cforall" { enum unpark_hint { UNPARK_LOCAL, UNPARK_REMOTE }; extern void park( void ); extern void unpark( struct thread$ *, unpark_hint ); static inline void unpark( struct thread$ * thrd ) { unpark(thrd, UNPARK_LOCAL); } static inline struct thread$ * active_thread () { struct thread$ * t = publicTLS_get( this_thread ); /* paranoid */ verify( t ); return t; } extern bool force_yield( enum __Preemption_Reason ); static inline void yield() { force_yield(__MANUAL_PREEMPTION); } // Yield: yield N times static inline void yield( size_t times ) { for ( times ) { yield(); } } // Semaphore which only supports a single thread struct single_sem { struct thread$ * volatile ptr; }; static inline { void ?{}(single_sem & this) { this.ptr = 0p; } void ^?{}(single_sem &) {} bool wait(single_sem & this) { for () { struct thread$ * expected = this.ptr; if (expected == 1p) { if (__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { return false; } } else { /* paranoid */ verify( expected == 0p ); if (__atomic_compare_exchange_n(&this.ptr, &expected, active_thread(), false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { park(); return true; } } } } bool post(single_sem & this) { for () { struct thread$ * expected = this.ptr; if (expected == 1p) return false; if (expected == 0p) { if (__atomic_compare_exchange_n(&this.ptr, &expected, 1p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { return false; } } else { if (__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { unpark( expected ); return true; } } } } } // Synchronozation primitive which only supports a single thread and one post // Similar to a binary semaphore with a 'one shot' semantic // is expected to be discarded after each party call their side enum(struct thread$ *) { oneshot_ARMED = 0p, oneshot_FULFILLED = 1p }; struct oneshot { // Internal state : // armed : initial state, wait will block // fulfilled : wait won't block // any thread : a thread is currently waiting struct thread$ * volatile ptr; }; static inline { void ?{}(oneshot & this) { this.ptr = oneshot_ARMED; } void ^?{}(oneshot &) {} // Wait for the post, return immidiately if it already happened. // return true if the thread was parked bool wait(oneshot & this) { for () { struct thread$ * expected = this.ptr; if (expected == oneshot_FULFILLED) return false; if (__atomic_compare_exchange_n(&this.ptr, &expected, active_thread(), false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { park(); /* paranoid */ verify( this.ptr == oneshot_FULFILLED ); return true; } } } // Mark as fulfilled, wake thread if needed // return true if a thread was unparked thread$ * post(oneshot & this, bool do_unpark = true) { struct thread$ * got = __atomic_exchange_n( &this.ptr, oneshot_FULFILLED, __ATOMIC_SEQ_CST); if ( got == oneshot_ARMED || got == oneshot_FULFILLED ) return 0p; if (do_unpark) unpark( got ); return got; } } // base types for future to build upon // It is based on the 'oneshot' type to allow multiple futures // to block on the same instance, permitting users to block a single // thread on "any of" [a given set of] futures. // does not support multiple threads waiting on the same future enum(struct oneshot *) { future_ARMED = 0p, future_FULFILLED = 1p, future_PROGRESS = 2p, future_ABANDONED = 3p }; struct future_t { // Internal state : // armed : initial state, wait will block // fulfilled : result is ready, wait won't block // progress : someone else is in the process of fulfilling this // abandoned : client no longer cares, server should delete // any oneshot : a context has been setup to wait, a thread could wait on it struct oneshot * volatile ptr; }; static inline { void ?{}(future_t & this) { this.ptr = future_ARMED; } void ^?{}(future_t &) {} void reset(future_t & this) { // needs to be in 0p or 1p __atomic_exchange_n( &this.ptr, future_ARMED, __ATOMIC_SEQ_CST); } // check if the future is available bool available( future_t & this ) { while( this.ptr == future_PROGRESS ) Pause(); return this.ptr == future_FULFILLED; } // Prepare the future to be waited on // intented to be use by wait, wait_any, waitfor, etc. rather than used directly bool setup( future_t & this, oneshot & wait_ctx ) { /* paranoid */ verify( wait_ctx.ptr == oneshot_ARMED || wait_ctx.ptr == oneshot_FULFILLED ); // The future needs to set the wait context for () { struct oneshot * expected = this.ptr; // Is the future already fulfilled? if (expected == future_FULFILLED) return false; // Yes, just return false (didn't block) // The future is not fulfilled, try to setup the wait context if (__atomic_compare_exchange_n(&this.ptr, &expected, &wait_ctx, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { return true; } } } // Stop waiting on a future // When multiple futures are waited for together in "any of" pattern // futures that weren't fulfilled before the thread woke up // should retract the wait ctx // intented to be use by wait, wait_any, waitfor, etc. rather than used directly bool retract( future_t & this, oneshot & wait_ctx ) { struct oneshot * expected = &wait_ctx; // attempt to remove the context so it doesn't get consumed. if (__atomic_compare_exchange_n( &this.ptr, &expected, future_ARMED, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { // we still have the original context, then no one else saw it return false; } // expected == ARMED: future was never actually setup, just return if ( expected == future_ARMED ) return false; // expected == FULFILLED: the future is ready and the context was fully consumed // the server won't use the pointer again // It is safe to delete (which could happen after the return) if ( expected == future_FULFILLED ) return true; // expected == PROGRESS: the future is ready but the context hasn't fully been consumed // spin until it is safe to move on if ( expected == future_PROGRESS ) { while( this.ptr != future_FULFILLED ) Pause(); /* paranoid */ verify( this.ptr == future_FULFILLED ); return true; } // anything else: the future was setup with a different context ?!?! // something went wrong here, abort abort("Future in unexpected state"); } // Mark the future as abandoned, meaning it will be deleted by the server bool abandon( future_t & this ) { /* paranoid */ verify( this.ptr != future_ABANDONED ); // Mark the future as abandonned struct oneshot * got = __atomic_exchange_n( &this.ptr, future_ABANDONED, __ATOMIC_SEQ_CST); // If the future isn't already fulfilled, let the server delete it if ( got == future_ARMED ) return false; // got == PROGRESS: the future is ready but the context hasn't fully been consumed // spin until it is safe to move on if ( got == future_PROGRESS ) { while( this.ptr != future_FULFILLED ) Pause(); got = future_FULFILLED; } // The future is completed delete it now /* paranoid */ verify( this.ptr != future_FULFILLED ); free( &this ); return true; } // from the server side, mark the future as fulfilled // delete it if needed thread$ * fulfil( future_t & this, bool do_unpark = true ) { for () { struct oneshot * expected = this.ptr; #if defined(__GNUC__) && __GNUC__ >= 7 // SKULLDUGGERY: gcc bug does not handle push/pop for -Wfree-nonheap-object //#pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfree-nonheap-object" #endif if ( expected == future_ABANDONED ) { free( &this ); return 0p; } #if defined(__GNUC__) && __GNUC__ >= 7 //#pragma GCC diagnostic pop #endif /* paranoid */ verify( expected != future_FULFILLED ); // Future is already fulfilled, should not happen /* paranoid */ verify( expected != future_PROGRESS ); // Future is bein fulfilled by someone else, this is even less supported then the previous case. // If there is a wait context, we need to consume it and mark it as consumed after // If there is no context then we can skip the in progress phase struct oneshot * want = expected == future_ARMED ? future_FULFILLED : future_PROGRESS; if (__atomic_compare_exchange_n(&this.ptr, &expected, want, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { if ( expected == future_ARMED ) { return 0p; } thread$ * ret = post( *expected, do_unpark ); __atomic_store_n( &this.ptr, future_FULFILLED, __ATOMIC_SEQ_CST); return ret; } } } // Wait for the future to be fulfilled bool wait( future_t & this ) { oneshot temp; if ( !setup(this, temp) ) return false; // Wait context is setup, just wait on it bool ret = wait( temp ); // Wait for the future to tru while( this.ptr == future_PROGRESS ) Pause(); // Make sure the state makes sense // Should be fulfilled, could be in progress but it's out of date if so // since if that is the case, the oneshot was fulfilled (unparking this thread) // and the oneshot should not be needed any more struct oneshot * was __attribute__((unused)) = this.ptr; // used in option verify /* paranoid */ verifyf( was == future_FULFILLED, "Expected this.ptr to be 1p, was %p\n", was ); // Mark the future as fulfilled, to be consistent // with potential calls to avail // this.ptr = 1p; return ret; } // Wait for any future to be fulfilled forall(T& | sized(T) | { bool setup( T&, oneshot & ); bool retract( T&, oneshot & ); }) T & wait_any( T * futures, size_t num_futures ) { oneshot temp; // setup all futures // if any are already satisfied return for ( i; num_futures ) { if ( !setup(futures[i], temp) ) return futures[i]; } // Wait context is setup, just wait on it wait( temp ); size_t ret; // attempt to retract all futures for ( i; num_futures ) { if ( retract( futures[i], temp ) ) ret = i; } return futures[ret]; } } //----------------------------------------------------------------------- // Statics call at the end of each thread to register statistics #if !defined(__CFA_NO_STATISTICS__) static inline struct __stats_t * __tls_stats() { /* paranoid */ verify( ! __preemption_enabled() ); /* paranoid */ verify( kernelTLS().this_stats ); return kernelTLS().this_stats; } #define __STATS__(in_kernel, ...) { \ if ( !(in_kernel) ) disable_interrupts(); \ with ( *__tls_stats() ) { \ __VA_ARGS__ \ } \ if ( !(in_kernel) ) enable_interrupts(); \ } #if defined(CFA_HAVE_LINUX_IO_URING_H) #define __IO_STATS__(in_kernel, ...) { \ if ( !(in_kernel) ) disable_interrupts(); \ with ( *__tls_stats() ) { \ __VA_ARGS__ \ } \ if ( !(in_kernel) ) enable_interrupts(); \ } #else #define __IO_STATS__(in_kernel, ...) #endif #else #define __STATS__(in_kernel, ...) #define __IO_STATS__(in_kernel, ...) #endif } } #endif // #endif