// // Cforall Version 1.0.0 Copyright (C) 2020 University of Waterloo // // The contents of this file are covered under the licence agreement in the // file "LICENCE" distributed with Cforall. // // kernel/fwd.hfa -- PUBLIC // Fundamental code needed to implement threading M.E.S. algorithms. // // Author : Thierry Delisle // Created On : Thu Jul 30 16:46:41 2020 // Last Modified By : // Last Modified On : // Update Count : // #pragma once #include "bits/defs.hfa" #include "bits/debug.hfa" #ifdef __cforall #include "bits/random.hfa" #endif struct thread$; struct processor; struct cluster; enum __Preemption_Reason { __NO_PREEMPTION, __ALARM_PREEMPTION, __POLL_PREEMPTION, __MANUAL_PREEMPTION }; #define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)] #ifdef __cforall extern "C" { extern "Cforall" { extern __attribute__((aligned(128))) thread_local struct KernelThreadData { struct thread$ * volatile this_thread; struct processor * volatile this_processor; volatile bool sched_lock; struct { volatile unsigned short disable_count; volatile bool enabled; volatile bool in_progress; } preemption_state; #if defined(__SIZEOF_INT128__) __uint128_t rand_seed; #else uint64_t rand_seed; #endif struct { uint64_t fwd_seed; uint64_t bck_seed; } ready_rng; struct __stats_t * volatile this_stats; #ifdef __CFA_WITH_VERIFY__ // Debug, check if the rwlock is owned for reading bool in_sched_lock; unsigned sched_id; #endif } __cfaabi_tls __attribute__ ((tls_model ( "initial-exec" ))); extern bool __preemption_enabled(); static inline KernelThreadData & kernelTLS( void ) { /* paranoid */ verify( ! __preemption_enabled() ); return __cfaabi_tls; } extern uintptr_t __cfatls_get( unsigned long int member ); #define publicTLS_get( member ) ((typeof(__cfaabi_tls.member))__cfatls_get( __builtin_offsetof(KernelThreadData, member) )) static inline uint64_t __tls_rand() { return #if defined(__SIZEOF_INT128__) lehmer64( kernelTLS().rand_seed ); #else xorshift_13_7_17( kernelTLS().rand_seed ); #endif } static inline unsigned __tls_rand_fwd() { return LCGBI_fwd( kernelTLS().ready_rng.fwd_seed ); } static inline unsigned __tls_rand_bck() { return LCGBI_bck( kernelTLS().ready_rng.bck_seed ); } static inline void __tls_rand_advance_bck(void) { kernelTLS().ready_rng.bck_seed = kernelTLS().ready_rng.fwd_seed; } } extern void disable_interrupts(); extern void enable_interrupts( bool poll = false ); extern "Cforall" { enum unpark_hint { UNPARK_LOCAL, UNPARK_REMOTE }; extern void park( void ); extern void unpark( struct thread$ *, unpark_hint ); static inline void unpark( struct thread$ * thrd ) { unpark(thrd, UNPARK_LOCAL); } static inline struct thread$ * active_thread () { struct thread$ * t = publicTLS_get( this_thread ); /* paranoid */ verify( t ); return t; } extern bool force_yield( enum __Preemption_Reason ); static inline void yield() { force_yield(__MANUAL_PREEMPTION); } // Yield: yield N times static inline void yield( unsigned times ) { for( times ) { yield(); } } // Semaphore which only supports a single thread struct single_sem { struct thread$ * volatile ptr; }; static inline { void ?{}(single_sem & this) { this.ptr = 0p; } void ^?{}(single_sem &) {} bool wait(single_sem & this) { for() { struct thread$ * expected = this.ptr; if(expected == 1p) { if(__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { return false; } } else { /* paranoid */ verify( expected == 0p ); if(__atomic_compare_exchange_n(&this.ptr, &expected, active_thread(), false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { park(); return true; } } } } bool post(single_sem & this) { for() { struct thread$ * expected = this.ptr; if(expected == 1p) return false; if(expected == 0p) { if(__atomic_compare_exchange_n(&this.ptr, &expected, 1p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { return false; } } else { if(__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { unpark( expected ); return true; } } } } } // Synchronozation primitive which only supports a single thread and one post // Similar to a binary semaphore with a 'one shot' semantic // is expected to be discarded after each party call their side struct oneshot { // Internal state : // 0p : is initial state (wait will block) // 1p : fulfilled (wait won't block) // any thread : a thread is currently waiting struct thread$ * volatile ptr; }; static inline { void ?{}(oneshot & this) { this.ptr = 0p; } void ^?{}(oneshot &) {} // Wait for the post, return immidiately if it already happened. // return true if the thread was parked bool wait(oneshot & this) { for() { struct thread$ * expected = this.ptr; if(expected == 1p) return false; if(__atomic_compare_exchange_n(&this.ptr, &expected, active_thread(), false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { park(); /* paranoid */ verify( this.ptr == 1p ); return true; } } } // Mark as fulfilled, wake thread if needed // return true if a thread was unparked thread$ * post(oneshot & this, bool do_unpark = true) { struct thread$ * got = __atomic_exchange_n( &this.ptr, 1p, __ATOMIC_SEQ_CST); if( got == 0p || got == 1p ) return 0p; if(do_unpark) unpark( got ); return got; } } // base types for future to build upon // It is based on the 'oneshot' type to allow multiple futures // to block on the same instance, permitting users to block a single // thread on "any of" [a given set of] futures. // does not support multiple threads waiting on the same future struct future_t { // Internal state : // 0p : is initial state (wait will block) // 1p : fulfilled (wait won't block) // 2p : in progress () // 3p : abandoned, server should delete // any oneshot : a context has been setup to wait, a thread could wait on it struct oneshot * volatile ptr; }; static inline { void ?{}(future_t & this) { this.ptr = 0p; } void ^?{}(future_t &) {} void reset(future_t & this) { // needs to be in 0p or 1p __atomic_exchange_n( &this.ptr, 0p, __ATOMIC_SEQ_CST); } // check if the future is available bool available( future_t & this ) { while( this.ptr == 2p ) Pause(); return this.ptr == 1p; } // Prepare the future to be waited on // intented to be use by wait, wait_any, waitfor, etc. rather than used directly bool setup( future_t & this, oneshot & wait_ctx ) { /* paranoid */ verify( wait_ctx.ptr == 0p ); // The future needs to set the wait context for() { struct oneshot * expected = this.ptr; // Is the future already fulfilled? if(expected == 1p) return false; // Yes, just return false (didn't block) // The future is not fulfilled, try to setup the wait context if(__atomic_compare_exchange_n(&this.ptr, &expected, &wait_ctx, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { return true; } } } // Stop waiting on a future // When multiple futures are waited for together in "any of" pattern // futures that weren't fulfilled before the thread woke up // should retract the wait ctx // intented to be use by wait, wait_any, waitfor, etc. rather than used directly bool retract( future_t & this, oneshot & wait_ctx ) { // Remove the wait context struct oneshot * got = __atomic_exchange_n( &this.ptr, 0p, __ATOMIC_SEQ_CST); // got == 0p: future was never actually setup, just return if( got == 0p ) return false; // got == wait_ctx: since fulfil does an atomic_swap, // if we got back the original then no one else saw context // It is safe to delete (which could happen after the return) if( got == &wait_ctx ) return false; // got == 1p: the future is ready and the context was fully consumed // the server won't use the pointer again // It is safe to delete (which could happen after the return) if( got == 1p ) return true; // got == 2p: the future is ready but the context hasn't fully been consumed // spin until it is safe to move on if( got == 2p ) { while( this.ptr != 1p ) Pause(); return false; } // got == any thing else, something wen't wrong here, abort abort("Future in unexpected state"); } // Mark the future as abandoned, meaning it will be deleted by the server bool abandon( future_t & this ) { /* paranoid */ verify( this.ptr != 3p ); // Mark the future as abandonned struct oneshot * got = __atomic_exchange_n( &this.ptr, 3p, __ATOMIC_SEQ_CST); // If the future isn't already fulfilled, let the server delete it if( got == 0p ) return false; // got == 2p: the future is ready but the context hasn't fully been consumed // spin until it is safe to move on if( got == 2p ) { while( this.ptr != 1p ) Pause(); got = 1p; } // The future is completed delete it now /* paranoid */ verify( this.ptr != 1p ); free( &this ); return true; } // from the server side, mark the future as fulfilled // delete it if needed thread$ * fulfil( future_t & this, bool do_unpark = true ) { for() { struct oneshot * expected = this.ptr; // was this abandoned? #if defined(__GNUC__) && __GNUC__ >= 7 #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfree-nonheap-object" #endif if( expected == 3p ) { free( &this ); return 0p; } #if defined(__GNUC__) && __GNUC__ >= 7 #pragma GCC diagnostic pop #endif /* paranoid */ verify( expected != 1p ); // Future is already fulfilled, should not happen /* paranoid */ verify( expected != 2p ); // Future is bein fulfilled by someone else, this is even less supported then the previous case. // If there is a wait context, we need to consume it and mark it as consumed after // If there is no context then we can skip the in progress phase struct oneshot * want = expected == 0p ? 1p : 2p; if(__atomic_compare_exchange_n(&this.ptr, &expected, want, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { if( expected == 0p ) { return 0p; } thread$ * ret = post( *expected, do_unpark ); __atomic_store_n( &this.ptr, 1p, __ATOMIC_SEQ_CST); return ret; } } } // Wait for the future to be fulfilled bool wait( future_t & this ) { oneshot temp; if( !setup(this, temp) ) return false; // Wait context is setup, just wait on it bool ret = wait( temp ); // Wait for the future to tru while( this.ptr == 2p ) Pause(); // Make sure the state makes sense // Should be fulfilled, could be in progress but it's out of date if so // since if that is the case, the oneshot was fulfilled (unparking this thread) // and the oneshot should not be needed any more __attribute__((unused)) struct oneshot * was = this.ptr; /* paranoid */ verifyf( was == 1p, "Expected this.ptr to be 1p, was %p\n", was ); // Mark the future as fulfilled, to be consistent // with potential calls to avail // this.ptr = 1p; return ret; } // Wait for any future to be fulfilled future_t & wait_any( future_t * futures, size_t num_futures ) { oneshot temp; // setup all futures // if any are already satisfied return for ( i; num_futures ) { if( !setup(futures[i], temp) ) return futures[i]; } // Wait context is setup, just wait on it wait( temp ); size_t ret; // attempt to retract all futures for ( i; num_futures ) { if ( retract( futures[i], temp ) ) ret = i; } return futures[ret]; } } //----------------------------------------------------------------------- // Statics call at the end of each thread to register statistics #if !defined(__CFA_NO_STATISTICS__) static inline struct __stats_t * __tls_stats() { /* paranoid */ verify( ! __preemption_enabled() ); /* paranoid */ verify( kernelTLS().this_stats ); return kernelTLS().this_stats; } #define __STATS__(in_kernel, ...) { \ if( !(in_kernel) ) disable_interrupts(); \ with( *__tls_stats() ) { \ __VA_ARGS__ \ } \ if( !(in_kernel) ) enable_interrupts(); \ } #if defined(CFA_HAVE_LINUX_IO_URING_H) #define __IO_STATS__(in_kernel, ...) { \ if( !(in_kernel) ) disable_interrupts(); \ with( *__tls_stats() ) { \ __VA_ARGS__ \ } \ if( !(in_kernel) ) enable_interrupts(); \ } #else #define __IO_STATS__(in_kernel, ...) #endif #else #define __STATS__(in_kernel, ...) #define __IO_STATS__(in_kernel, ...) #endif } } #endif