Diff [8cfa4ef35bdd51590fccd4a3080e4a6c57848f9a:2f5ea69b6db7b5cd41a83b98373f3b70e83aeddb] for / – Cforall

libcfa/src/concurrency/kernel.cfa

-              r8cfa4ef
+              r2f5ea69
 static void __wake_one(cluster * cltr);
 static void mark_idle (__cluster_proc_list & idles, processor & proc);
 static void mark_awake(__cluster_proc_list & idles, processor & proc);
 static [unsigned idle, unsigned total, * processor] query_idles( & __cluster_proc_list idles );
+static void push  (__cluster_idles & idles, processor & proc);
+static void remove(__cluster_idles & idles, processor & proc);
+static [unsigned idle, unsigned total, * processor] query( & __cluster_idles idles );
 extern void __cfa_io_start( processor * );
 …
                                 // Push self to idle stack
                                 mark_idle(this->cltr->procs, * this);
+                                push(this->cltr->idles, * this);
                                 // Confirm the ready-queue is empty
 …
                                 if( readyThread ) {
                                         // A thread was found, cancel the halt
                                         mark_awake(this->cltr->procs, * this);
+                                        remove(this->cltr->idles, * this);
                                         #if !defined(__CFA_NO_STATISTICS__)
 …
                                 // We were woken up, remove self from idle
                                 mark_awake(this->cltr->procs, * this);
+                                remove(this->cltr->idles, * this);
                                 // DON'T just proceed, start looking again
 …
         unsigned idle;
         unsigned total;
         [idle, total, p] = query_idles(this->procs);
+        [idle, total, p] = query(this->idles);
         // If no one is sleeping, we are done
 …
+}
 static void mark_idle(__cluster_proc_list & this, processor & proc) {
+static void push  (__cluster_idles & this, processor & proc) {
         /* paranoid */ verify( ! __preemption_enabled() );
         lock( this );
                 this.idle++;
                 /* paranoid */ verify( this.idle <= this.total );
+                remove(proc);
                 insert_first(this.idles, proc);
+                insert_first(this.list, proc);
         unlock( this );
         /* paranoid */ verify( ! __preemption_enabled() );
+}
 static void mark_awake(__cluster_proc_list & this, processor & proc) {
+static void remove(__cluster_idles & this, processor & proc) {
         /* paranoid */ verify( ! __preemption_enabled() );
         lock( this );
                 this.idle--;
                 /* paranoid */ verify( this.idle >= 0 );
                 remove(proc);
-                insert_last(this.actives, proc);
         unlock( this );
         /* paranoid */ verify( ! __preemption_enabled() );
+}
+static [unsigned idle, unsigned total, * processor] query_idles( & __cluster_proc_list this ) {
+        /* paranoid */ verify( ! __preemption_enabled() );
+        /* paranoid */ verify( ready_schedule_islocked() );
+static [unsigned idle, unsigned total, * processor] query( & __cluster_idles this ) {
         for() {
                 uint64_t l = __atomic_load_n(&this.lock, __ATOMIC_SEQ_CST);
 …
                 unsigned idle    = this.idle;
                 unsigned total   = this.total;
                 processor * proc = &this.idles`first;
+                processor * proc = &this.list`first;
                 // Compiler fence is unnecessary, but gcc-8 and older incorrectly reorder code without it
                 asm volatile("": : :"memory");
 …
                 return [idle, total, proc];
+        }
-        /* paranoid */ verify( ready_schedule_islocked() );
-        /* paranoid */ verify( ! __preemption_enabled() );
+}

libcfa/src/concurrency/kernel.hfa

-              r8cfa4ef
+              r2f5ea69
 // Idle Sleep
 struct __cluster_proc_list {
+struct __cluster_idles {
         // Spin lock protecting the queue
         volatile uint64_t lock;
 …
         // List of idle processors
+        dlist(processor, processor) idles;
+        // List of active processors
+        dlist(processor, processor) actives;
+        dlist(processor, processor) list;
 };
 …
         // List of idle processors
         __cluster_proc_list procs;
+        __cluster_idles idles;
         // List of threads

libcfa/src/concurrency/kernel/startup.cfa

-              r8cfa4ef
+              r2f5ea69
         this.name = name;
         this.cltr = &_cltr;
-        this.cltr_id = -1u;
         do_terminate = false;
         preemption_alarm = 0p;
 …
         #endif
+        // Register and Lock the RWlock so no-one pushes/pops while we are changing the queue
+        uint_fast32_t last_size = ready_mutate_register((__processor_id_t*)&this);
+                this.cltr->procs.total += 1u;
+                insert_last(this.cltr->procs.actives, this);
+        lock( this.cltr->idles );
+                int target = this.cltr->idles.total += 1u;
+        unlock( this.cltr->idles );
+        id = doregister((__processor_id_t*)&this);
+        // Lock the RWlock so no-one pushes/pops while we are changing the queue
+        uint_fast32_t last_size = ready_mutate_lock();
                 // Adjust the ready queue size
                 ready_queue_grow( cltr );
+                this.cltr_id = ready_queue_grow( cltr, target );
         // Unlock the RWlock
 …
 // Not a ctor, it just preps the destruction but should not destroy members
 static void deinit(processor & this) {
+        lock( this.cltr->idles );
+                int target = this.cltr->idles.total -= 1u;
+        unlock( this.cltr->idles );
         // Lock the RWlock so no-one pushes/pops while we are changing the queue
         uint_fast32_t last_size = ready_mutate_lock();
-                this.cltr->procs.total -= 1u;
-                remove(this);
                 // Adjust the ready queue size
+                ready_queue_shrink( this.cltr );
+        // Unlock the RWlock and unregister: we don't need the read_lock any more
+        ready_mutate_unregister((__processor_id_t*)&this, last_size );
+                ready_queue_shrink( this.cltr, target );
+        // Unlock the RWlock
+        ready_mutate_unlock( last_size );
+        // Finally we don't need the read_lock any more
+        unregister((__processor_id_t*)&this);
         close(this.idle);
 …
 //-----------------------------------------------------------------------------
 // Cluster
 static void ?{}(__cluster_proc_list & this) {
+static void ?{}(__cluster_idles & this) {
         this.lock  = 0;
         this.idle  = 0;
         this.total = 0;
+        (this.list){};
+}
 …
                 // Adjust the ready queue size
                 ready_queue_grow( &this );
+                ready_queue_grow( &this, 0 );
         // Unlock the RWlock
 …
                 // Adjust the ready queue size
                 ready_queue_shrink( &this );
+                ready_queue_shrink( &this, 0 );
         // Unlock the RWlock

libcfa/src/concurrency/kernel_private.hfa

-              r8cfa4ef
+              r2f5ea69
 // Cluster lock API
 //=======================================================================
+// Cells use by the reader writer lock
+// while not generic it only relies on a opaque pointer
+struct __attribute__((aligned(128))) __scheduler_lock_id_t {
+        // Spin lock used as the underlying lock
+        volatile bool lock;
+        // Handle pointing to the proc owning this cell
+        // Used for allocating cells and debugging
+        __processor_id_t * volatile handle;
+        #ifdef __CFA_WITH_VERIFY__
+                // Debug, check if this is owned for reading
+                bool owned;
+        #endif
+};
+static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t));
 // Lock-Free registering/unregistering of threads
 // Register a processor to a given cluster and get its unique id in return
 void register_proc_id( struct __processor_id_t * );
+unsigned doregister( struct __processor_id_t * proc );
 // Unregister a processor from a given cluster using its id, getting back the original pointer
+void unregister_proc_id( struct __processor_id_t * proc );
+void     unregister( struct __processor_id_t * proc );
+//-----------------------------------------------------------------------
+// Cluster idle lock/unlock
+static inline void lock(__cluster_idles & this) {
+        for() {
+                uint64_t l = this.lock;
+                if(
+                        (0 == (l % 2))
+                        && __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
+                ) return;
+                Pause();
+        }
+}
+static inline void unlock(__cluster_idles & this) {
+        /* paranoid */ verify( 1 == (this.lock % 2) );
+        __atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST );
+}
 //=======================================================================
 …
         __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
+}
-// Cells use by the reader writer lock
-// while not generic it only relies on a opaque pointer
-struct __attribute__((aligned(128))) __scheduler_lock_id_t {
-        // Spin lock used as the underlying lock
-        volatile bool lock;
-        // Handle pointing to the proc owning this cell
-        // Used for allocating cells and debugging
-        __processor_id_t * volatile handle;
-        #ifdef __CFA_WITH_VERIFY__
-                // Debug, check if this is owned for reading
-                bool owned;
-        #endif
-};
-static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t));
 //-----------------------------------------------------------------------
 …
 void ready_mutate_unlock( uint_fast32_t /* value returned by lock */ );
-//-----------------------------------------------------------------------
-// Lock-Free registering/unregistering of threads
-// Register a processor to a given cluster and get its unique id in return
-// For convenience, also acquires the lock
-static inline uint_fast32_t ready_mutate_register( struct __processor_id_t * proc ) {
-        register_proc_id( proc );
-        return ready_mutate_lock();
+}
-// Unregister a processor from a given cluster using its id, getting back the original pointer
-// assumes the lock is acquired
-static inline void ready_mutate_unregister( struct __processor_id_t * proc, uint_fast32_t last_s ) {
-        ready_mutate_unlock( last_s );
-        unregister_proc_id( proc );
+}
-//-----------------------------------------------------------------------
-// Cluster idle lock/unlock
-static inline void lock(__cluster_proc_list & this) {
-        /* paranoid */ verify( ! __preemption_enabled() );
-        // Start by locking the global RWlock so that we know no-one is
-        // adding/removing processors while we mess with the idle lock
-        ready_schedule_lock();
-        // Simple counting lock, acquired, acquired by incrementing the counter
-        // to an odd number
-        for() {
-                uint64_t l = this.lock;
-                if(
-                        (0 == (l % 2))
-                        && __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
-                ) return;
-                Pause();
+        }
-        /* paranoid */ verify( ! __preemption_enabled() );
+}
-static inline void unlock(__cluster_proc_list & this) {
-        /* paranoid */ verify( ! __preemption_enabled() );
-        /* paranoid */ verify( 1 == (this.lock % 2) );
-        // Simple couting lock, release by incrementing to an even number
-        __atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST );
-        // Release the global lock, which we acquired when locking
-        ready_schedule_unlock();
-        /* paranoid */ verify( ! __preemption_enabled() );
+}
 //=======================================================================
 // Ready-Queue API
 …
 //-----------------------------------------------------------------------
 // Increase the width of the ready queue (number of lanes) by 4
 void ready_queue_grow  (struct cluster * cltr);
+unsigned ready_queue_grow  (struct cluster * cltr, int target);
 //-----------------------------------------------------------------------
 // Decrease the width of the ready queue (number of lanes) by 4
 void ready_queue_shrink(struct cluster * cltr);
+void ready_queue_shrink(struct cluster * cltr, int target);

libcfa/src/concurrency/preemption.cfa

-              r8cfa4ef
+              r2f5ea69
 static void * alarm_loop( __attribute__((unused)) void * args ) {
         __processor_id_t id;
         register_proc_id(&id);
+        id.id = doregister(&id);
         __cfaabi_tls.this_proc_id = &id;
 …
 EXIT:
         __cfaabi_dbg_print_safe( "Kernel : Preemption thread stopping\n" );
         register_proc_id(&id);
+        unregister(&id);
         return 0p;

libcfa/src/concurrency/ready_queue.cfa

-              r8cfa4ef
+              r2f5ea69
 //=======================================================================
 // Lock-Free registering/unregistering of threads
 void register_proc_id( struct __processor_id_t * proc ) with(*__scheduler_lock) {
+unsigned doregister( struct __processor_id_t * proc ) with(*__scheduler_lock) {
         __cfadbg_print_safe(ready_queue, "Kernel : Registering proc %p for RW-Lock\n", proc);
 …
                         /*paranoid*/ verify(0 == (__alignof__(data[i]) % cache_line_size));
                         /*paranoid*/ verify((((uintptr_t)&data[i]) % cache_line_size) == 0);
                         proc->id = i;
+                        return i;
+                }
+        }
 …
         /*paranoid*/ verify(__alignof__(data[n]) == (2 * cache_line_size));
         /*paranoid*/ verify((((uintptr_t)&data[n]) % cache_line_size) == 0);
         proc->id = n;
+}
 void unregister_proc_id( struct __processor_id_t * proc ) with(*__scheduler_lock) {
+        return n;
+}
+void unregister( struct __processor_id_t * proc ) with(*__scheduler_lock) {
         unsigned id = proc->id;
         /*paranoid*/ verify(id < ready);
 …
         __attribute__((unused)) int preferred;
         #if defined(BIAS)
-                /* paranoid */ verify(external || kernelTLS().this_processor->cltr_id < lanes.count );
                 preferred =
                         //*
 …
         int preferred;
         #if defined(BIAS)
                 /* paranoid */ verify(kernelTLS().this_processor->cltr_id < lanes.count );
+                // Don't bother trying locally too much
                 preferred = kernelTLS().this_processor->cltr_id;
         #endif
 …
+}
-static void assign_list(unsigned & value, const int inc, dlist(processor, processor) & list, unsigned count) {
-        processor * it = &list`first;
-        for(unsigned i = 0; i < count; i++) {
-                /* paranoid */ verifyf( it, "Unexpected null iterator, at index %u of %u\n", i, count);
-                it->cltr_id = value;
-                value += inc;
-                it = &(*it)`next;
+        }
+}
-static void reassign_cltr_id(struct cluster * cltr, const int inc) {
-        unsigned preferred = 0;
-        assign_list(preferred, inc, cltr->procs.actives, cltr->procs.total - cltr->procs.idle);
-        assign_list(preferred, inc, cltr->procs.idles  , cltr->procs.idle );
+}
 // Grow the ready queue
+void ready_queue_grow(struct cluster * cltr) {
+unsigned ready_queue_grow(struct cluster * cltr, int target) {
+        unsigned preferred;
         size_t ncount;
-        int target = cltr->procs.total;
         /* paranoid */ verify( ready_mutate_islocked() );
 …
                 if(target >= 2) {
                         ncount = target * 4;
+                        preferred = ncount - 4;
                 } else {
                         ncount = 1;
+                        preferred = 0;
+                }
 …
+        }
-        reassign_cltr_id(cltr, 4);
         // Make sure that everything is consistent
         /* paranoid */ check( cltr->ready_queue );
 …
         /* paranoid */ verify( ready_mutate_islocked() );
+        return preferred;
+}
 // Shrink the ready queue
 void ready_queue_shrink(struct cluster * cltr) {
+void ready_queue_shrink(struct cluster * cltr, int target) {
         /* paranoid */ verify( ready_mutate_islocked() );
         __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue\n");
 …
         // Make sure that everything is consistent
         /* paranoid */ check( cltr->ready_queue );
-        int target = cltr->procs.total;
         with( cltr->ready_queue ) {
 …
+        }
-        reassign_cltr_id(cltr, 4);
         // Make sure that everything is consistent
         /* paranoid */ check( cltr->ready_queue );

Context Navigation

Changes in / [8cfa4ef:2f5ea69]

Legend:

libcfa/src/concurrency/kernel.cfa

libcfa/src/concurrency/kernel.hfa

libcfa/src/concurrency/kernel/startup.cfa

libcfa/src/concurrency/kernel_private.hfa

libcfa/src/concurrency/preemption.cfa

libcfa/src/concurrency/ready_queue.cfa

Download in other formats: