Diff [8465b4d718cc260b3c5e2362338adf4bbb9a3c18:e2702fd7b3981fd9a1de288a52f59a502b874720] for / – Cforall

libcfa/src/concurrency/alarm.hfa

r8465b4d	re2702fd
23	23	#include "time.hfa"
24	24
25		#include ~~"containers/list.hfa"~~
	25	#include <containers/list.hfa>
26	26
27	27	struct $thread;

libcfa/src/concurrency/io/setup.cfa

r8465b4d	re2702fd
228	228	if( cluster_context ) {
229	229	cluster & cltr = *thrd.curr_cluster;
230		/* paranoid */ verify( cltr.~~idles.total~~ == 0 \|\| &cltr == mainCluster );
	230	/* paranoid */ verify( cltr.nprocessors == 0 \|\| &cltr == mainCluster );
231	231	/* paranoid */ verify( !ready_mutate_islocked() );
232	232

libcfa/src/concurrency/kernel.cfa

-                      r8465b4d
+                      re2702fd
 // Kernel Scheduling logic
 static $thread * __next_thread(cluster * this);
 static $thread * __next_thread_slow(cluster * this);
+static bool __has_next_thread(cluster * this);
 static void __run_thread(processor * this, $thread * dst);
+static void __wake_one(struct __processor_id_t * id, cluster * cltr);
+static void push  (__cluster_idles & idles, processor & proc);
+static void remove(__cluster_idles & idles, processor & proc);
+static [unsigned idle, unsigned total, * processor] query( & __cluster_idles idles );
+static bool __wake_one(struct __processor_id_t * id, cluster * cltr);
+static void __halt(processor * this);
+bool __wake_proc(processor *);
 //=============================================================================================
 …
                 $thread * readyThread = 0p;
+                MAIN_LOOP:
+                for() {
+                for( unsigned int spin_count = 0;; spin_count++ ) {
                         // Try to get the next thread
                         readyThread = __next_thread( this->cltr );
+                        // Check if we actually found a thread
+                        if( readyThread ) {
+                                /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+                                /* paranoid */ verifyf( readyThread->state == Ready || readyThread->preempted != __NO_PREEMPTION, "state : %d, preempted %d\n", readyThread->state, readyThread->preempted);
+                                /* paranoid */ verifyf( readyThread->link.next == 0p, "Expected null got %p", readyThread->link.next );
+                                __builtin_prefetch( readyThread->context.SP );
+                                // We found a thread run it
+                                __run_thread(this, readyThread);
+                                /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+                        }
+                        if(__atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST)) break;
                         if( !readyThread ) {
+                                readyThread = __next_thread_slow( this->cltr );
+                                // Block until a thread is ready
+                                __halt(this);
+                        }
-                        HALT:
-                        if( !readyThread ) {
-                                // Don't block if we are done
-                                if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP;
-                                #if !defined(__CFA_NO_STATISTICS__)
-                                        __tls_stats()->ready.sleep.halts++;
-                                #endif
-                                // Push self to idle stack
-                                push(this->cltr->idles, * this);
-                                // Confirm the ready-queue is empty
-                                readyThread = __next_thread_slow( this->cltr );
-                                if( readyThread ) {
-                                        // A thread was found, cancel the halt
-                                        remove(this->cltr->idles, * this);
-                                        #if !defined(__CFA_NO_STATISTICS__)
-                                                __tls_stats()->ready.sleep.cancels++;
-                                        #endif
-                                        // continue the mai loop
-                                        break HALT;
+                                }
-                                #if !defined(__CFA_NO_STATISTICS__)
-                                        if(this->print_halts) {
-                                                __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->id, rdtscl());
+                                        }
-                                #endif
-                                wait( this->idle );
-                                #if !defined(__CFA_NO_STATISTICS__)
-                                        if(this->print_halts) {
-                                                __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->id, rdtscl());
+                                        }
-                                #endif
-                                // We were woken up, remove self from idle
-                                remove(this->cltr->idles, * this);
-                                // DON'T just proceed, start looking again
-                                continue MAIN_LOOP;
+                        }
-                        /* paranoid */ verify( readyThread );
-                        // We found a thread run it
-                        __run_thread(this, readyThread);
-                        // Are we done?
-                        if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP;
+                }
 …
 // from the processor coroutine to the target thread
 static void __run_thread(processor * this, $thread * thrd_dst) {
-        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-        /* paranoid */ verifyf( thrd_dst->state == Ready || thrd_dst->preempted != __NO_PREEMPTION, "state : %d, preempted %d\n", thrd_dst->state, thrd_dst->preempted);
-        /* paranoid */ verifyf( thrd_dst->link.next == 0p, "Expected null got %p", thrd_dst->link.next );
-        __builtin_prefetch( thrd_dst->context.SP );
         $coroutine * proc_cor = get_coroutine(this->runner);
 …
         proc_cor->state = Active;
         kernelTLS.this_thread = 0p;
-        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+}
 …
         ready_schedule_lock  ( id );
                 push( thrd->curr_cluster, thrd );
+                __wake_one(id, thrd->curr_cluster);
+                #if !defined(__CFA_NO_STATISTICS__)
+                        bool woke =
+                #endif
+                        __wake_one(id, thrd->curr_cluster);
+                #if !defined(__CFA_NO_STATISTICS__)
+                        if(woke) __tls_stats()->ready.sleep.wakes++;
+                #endif
         ready_schedule_unlock( id );
 …
 // KERNEL ONLY
 static inline $thread * __next_thread(cluster * this) with( *this ) {
+static $thread * __next_thread(cluster * this) with( *this ) {
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
         ready_schedule_lock  ( (__processor_id_t*)kernelTLS.this_processor );
                 $thread * thrd = pop( this );
+                $thread * head = pop( this );
         ready_schedule_unlock( (__processor_id_t*)kernelTLS.this_processor );
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
         return thrd;
+        return head;
+}
 // KERNEL ONLY
 static inline $thread * __next_thread_slow(cluster * this) with( *this ) {
+static bool __has_next_thread(cluster * this) with( *this ) {
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
         ready_schedule_lock  ( (__processor_id_t*)kernelTLS.this_processor );
                 $thread * thrd = pop_slow( this );
+                bool not_empty = query( this );
         ready_schedule_unlock( (__processor_id_t*)kernelTLS.this_processor );
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
         return thrd;
+        return not_empty;
+}
 …
 //=============================================================================================
 // Wake a thread from the front if there are any
+static void __wake_one(struct __processor_id_t * id, cluster * this) {
+        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+static bool __wake_one(struct __processor_id_t * id, cluster * this) {
         /* paranoid */ verify( ready_schedule_islocked( id ) );
         // Check if there is a sleeping processor
+        processor * p;
+        unsigned idle;
+        unsigned total;
+        [idle, total, p] = query(this->idles);
+        processor * p = pop(this->idles);
         // If no one is sleeping, we are done
         if( idle == 0 ) return;
+        if( 0p == p ) return false;
         // We found a processor, wake it up
         post( p->idle );
+        #if !defined(__CFA_NO_STATISTICS__)
+                __tls_stats()->ready.sleep.wakes++;
+        #endif
+        /* paranoid */ verify( ready_schedule_islocked( id ) );
+        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        return;
+        return true;
+}
 // Unconditionnaly wake a thread
 void __wake_proc(processor * this) {
+bool __wake_proc(processor * this) {
         __cfadbg_print_safe(runtime_core, "Kernel : waking Processor %p\n", this);
 …
                 bool ret = post( this->idle );
         enable_interrupts( __cfaabi_dbg_ctx );
+}
+static void push  (__cluster_idles & this, processor & proc) {
+        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        lock( this );
+                this.idle++;
+                /* paranoid */ verify( this.idle <= this.total );
+                insert_first(this.list, proc);
+        unlock( this );
+        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+}
+static void remove(__cluster_idles & this, processor & proc) {
+        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        lock( this );
+                this.idle--;
+                /* paranoid */ verify( this.idle >= 0 );
+                remove(proc);
+        unlock( this );
+        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+}
+static [unsigned idle, unsigned total, * processor] query( & __cluster_idles this ) {
+        for() {
+                uint64_t l = __atomic_load_n(&this.lock, __ATOMIC_SEQ_CST);
+                if( 1 == (l % 2) ) { Pause(); continue; }
+                unsigned idle    = this.idle;
+                unsigned total   = this.total;
+                processor * proc = &this.list`first;
+                if(l != __atomic_load_n(&this.lock, __ATOMIC_SEQ_CST)) { Pause(); continue; }
+                return [idle, total, proc];
+        }
+        return ret;
+}
+static void __halt(processor * this) with( *this ) {
+        if( do_terminate ) return;
+        #if !defined(__CFA_NO_STATISTICS__)
+                __tls_stats()->ready.sleep.halts++;
+        #endif
+        // Push self to queue
+        push(cltr->idles, *this);
+        // Makre sure we don't miss a thread
+        if( __has_next_thread(cltr) ) {
+                // A thread was posted, make sure a processor is woken up
+                struct __processor_id_t *id = (struct __processor_id_t *) this;
+                ready_schedule_lock  ( id );
+                        __wake_one( id, cltr );
+                ready_schedule_unlock( id );
+                #if !defined(__CFA_NO_STATISTICS__)
+                        __tls_stats()->ready.sleep.cancels++;
+                #endif
+        }
+        #if !defined(__CFA_NO_STATISTICS__)
+                if(this->print_halts) {
+                        __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->id, rdtscl());
+                }
+        #endif
+        wait( idle );
+        #if !defined(__CFA_NO_STATISTICS__)
+                if(this->print_halts) {
+                        __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->id, rdtscl());
+                }
+        #endif
+}

libcfa/src/concurrency/kernel.hfa

-                      r8465b4d
+                      re2702fd
 #include "coroutine.hfa"
 #include "containers/list.hfa"
+#include "containers/stackLockFree.hfa"
 extern "C" {
 …
         // Link lists fields
         DLISTED_MGD_IMPL_IN(processor)
+        Link(processor) link;
         #if !defined(__CFA_NO_STATISTICS__)
 …
 static inline void  ?{}(processor & this, const char name[]) { this{name, *mainCluster }; }
+DLISTED_MGD_IMPL_OUT(processor)
+static inline Link(processor) * ?`next( processor * this ) { return &this->link; }
 //-----------------------------------------------------------------------------
 …
 void ^?{}(__ready_queue_t & this);
-// Idle Sleep
-struct __cluster_idles {
-        // Spin lock protecting the queue
-        volatile uint64_t lock;
-        // Total number of processors
-        unsigned total;
-        // Total number of idle processors
-        unsigned idle;
-        // List of idle processors
-        dlist(processor, processor) list;
-};
 //-----------------------------------------------------------------------------
 // Cluster
 …
         // List of idle processors
+        __cluster_idles idles;
+        StackLF(processor) idles;
+        volatile unsigned int nprocessors;
         // List of threads

libcfa/src/concurrency/kernel/startup.cfa

-                      r8465b4d
+                      re2702fd
 //-----------------------------------------------------------------------------
 // Other Forward Declarations
 extern void __wake_proc(processor *);
+extern bool __wake_proc(processor *);
 //-----------------------------------------------------------------------------
 …
         #endif
+        lock( this.cltr->idles );
+                int target = this.cltr->idles.total += 1u;
+        unlock( this.cltr->idles );
+        int target = __atomic_add_fetch( &cltr->nprocessors, 1u, __ATOMIC_SEQ_CST );
         id = doregister((__processor_id_t*)&this);
 …
 // Not a ctor, it just preps the destruction but should not destroy members
 static void deinit(processor & this) {
+        lock( this.cltr->idles );
+                int target = this.cltr->idles.total -= 1u;
+        unlock( this.cltr->idles );
+        int target = __atomic_sub_fetch( &this.cltr->nprocessors, 1u, __ATOMIC_SEQ_CST );
         // Lock the RWlock so no-one pushes/pops while we are changing the queue
 …
                 // Adjust the ready queue size
                 ready_queue_shrink( this.cltr, target );
+                // Make sure we aren't on the idle queue
+                unsafe_remove( this.cltr->idles, &this );
         // Unlock the RWlock
 …
 //-----------------------------------------------------------------------------
 // Cluster
-static void ?{}(__cluster_idles & this) {
-        this.lock  = 0;
-        this.idle  = 0;
-        this.total = 0;
-        (this.list){};
+}
 void ?{}(cluster & this, const char name[], Duration preemption_rate, unsigned num_io, const io_context_params & io_params) with( this ) {
         this.name = name;
         this.preemption_rate = preemption_rate;
+        this.nprocessors = 0;
         ready_queue{};

libcfa/src/concurrency/kernel_private.hfa

-                      r8465b4d
+                      re2702fd
 void     unregister( struct __processor_id_t * proc );
-//-----------------------------------------------------------------------
-// Cluster idle lock/unlock
-static inline void lock(__cluster_idles & this) {
-        for() {
-                uint64_t l = this.lock;
-                if(
-                        (0 == (l % 2))
-                        && __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
-                ) return;
-                Pause();
+        }
+}
-static inline void unlock(__cluster_idles & this) {
-        /* paranoid */ verify( 1 == (this.lock % 2) );
-        __atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST );
+}
 //=======================================================================
 // Reader-writer lock implementation
 …
 // pop thread from the ready queue of a cluster
 // returns 0p if empty
-// May return 0p spuriously
 __attribute__((hot)) struct $thread * pop(struct cluster * cltr);
-//-----------------------------------------------------------------------
-// pop thread from the ready queue of a cluster
-// returns 0p if empty
-// guaranteed to find any threads added before this call
-__attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr);
 //-----------------------------------------------------------------------

libcfa/src/concurrency/ready_queue.cfa

-                      r8465b4d
+                      re2702fd
 // #define __CFA_DEBUG_PRINT_READY_QUEUE__
-// #define USE_SNZI
 #include "bits/defs.hfa"
 #include "kernel_private.hfa"
 …
 void ^?{}(__ready_queue_t & this) with (this) {
         verify( 1 == lanes.count );
+        #ifdef USE_SNZI
+                verify( !query( snzi ) );
+        #endif
+        verify( !query( snzi ) );
         free(lanes.data);
+}
 …
 //-----------------------------------------------------------------------
 __attribute__((hot)) bool query(struct cluster * cltr) {
+        #ifdef USE_SNZI
+                return query(cltr->ready_queue.snzi);
+        #endif
+        return true;
+        return query(cltr->ready_queue.snzi);
+}
 …
         bool lane_first = push(lanes.data[i], thrd);
+        #ifdef USE_SNZI
+                // If this lane used to be empty we need to do more
+                if(lane_first) {
+                        // Check if the entire queue used to be empty
+                        first = !query(snzi);
+                        // Update the snzi
+                        arrive( snzi, i );
+                }
+        #endif
+        // If this lane used to be empty we need to do more
+        if(lane_first) {
+                // Check if the entire queue used to be empty
+                first = !query(snzi);
+                // Update the snzi
+                arrive( snzi, i );
+        }
         // Unlock and return
 …
 __attribute__((hot)) $thread * pop(struct cluster * cltr) with (cltr->ready_queue) {
         /* paranoid */ verify( lanes.count > 0 );
-        unsigned count = __atomic_load_n( &lanes.count, __ATOMIC_RELAXED );
         #if defined(BIAS)
                 // Don't bother trying locally too much
 …
         // As long as the list is not empty, try finding a lane that isn't empty and pop from it
+        #ifdef USE_SNZI
+                while( query(snzi) ) {
+        #else
+                for(25) {
+        #endif
+        while( query(snzi) ) {
                 // Pick two lists at random
                 unsigned i,j;
 …
                 #endif
                 i %= count;
                 j %= count;
+                i %= __atomic_load_n( &lanes.count, __ATOMIC_RELAXED );
+                j %= __atomic_load_n( &lanes.count, __ATOMIC_RELAXED );
                 // try popping from the 2 picked lists
 …
+}
-__attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr) with (cltr->ready_queue) {
-        /* paranoid */ verify( lanes.count > 0 );
-        unsigned count = __atomic_load_n( &lanes.count, __ATOMIC_RELAXED );
-        unsigned offset = __tls_rand();
-        for(i; count) {
-                unsigned idx = (offset + i) % count;
-                struct $thread * thrd = try_pop(cltr, idx);
-                if(thrd) {
-                        return thrd;
+                }
+        }
-        // All lanes where empty return 0p
-        return 0p;
+}
 //-----------------------------------------------------------------------
 // Given 2 indexes, pick the list with the oldest push an try to pop from it
 …
         /* paranoid */ verify(lane.lock);
+        #ifdef USE_SNZI
+                // If this was the last element in the lane
+                if(emptied) {
+                        depart( snzi, w );
+                }
+        #endif
+        // If this was the last element in the lane
+        if(emptied) {
+                depart( snzi, w );
+        }
         // Unlock and return
 …
                                 removed = true;
+                                #ifdef USE_SNZI
+                                        if(emptied) {
+                                                depart( snzi, i );
+                                        }
+                                #endif
+                                if(emptied) {
+                                        depart( snzi, i );
+                                }
+                        }
                 __atomic_unlock(&lane.lock);
 …
         // grow the ready queue
         with( cltr->ready_queue ) {
+                #ifdef USE_SNZI
+                        ^(snzi){};
+                #endif
+                ^(snzi){};
                 // Find new count
 …
                 lanes.count = ncount;
+                #ifdef USE_SNZI
+                        // Re-create the snzi
+                        snzi{ log2( lanes.count / 8 ) };
+                        for( idx; (size_t)lanes.count ) {
+                                if( !is_empty(lanes.data[idx]) ) {
+                                        arrive(snzi, idx);
+                                }
+                        }
+                #endif
+                // Re-create the snzi
+                snzi{ log2( lanes.count / 8 ) };
+                for( idx; (size_t)lanes.count ) {
+                        if( !is_empty(lanes.data[idx]) ) {
+                                arrive(snzi, idx);
+                        }
+                }
+        }
 …
         with( cltr->ready_queue ) {
+                #ifdef USE_SNZI
+                        ^(snzi){};
+                #endif
+                ^(snzi){};
                 // Remember old count
 …
+                }
+                #ifdef USE_SNZI
+                        // Re-create the snzi
+                        snzi{ log2( lanes.count / 8 ) };
+                        for( idx; (size_t)lanes.count ) {
+                                if( !is_empty(lanes.data[idx]) ) {
+                                        arrive(snzi, idx);
+                                }
+                        }
+                #endif
+                // Re-create the snzi
+                snzi{ log2( lanes.count / 8 ) };
+                for( idx; (size_t)lanes.count ) {
+                        if( !is_empty(lanes.data[idx]) ) {
+                                arrive(snzi, idx);
+                        }
+                }
+        }

Context Navigation

Changes in / [8465b4d:e2702fd]

Legend:

libcfa/src/concurrency/alarm.hfa

libcfa/src/concurrency/io/setup.cfa

libcfa/src/concurrency/kernel.cfa

libcfa/src/concurrency/kernel.hfa

libcfa/src/concurrency/kernel/startup.cfa

libcfa/src/concurrency/kernel_private.hfa

libcfa/src/concurrency/ready_queue.cfa

Download in other formats: