Context Navigation

← Previous Changeset
Next Changeset →

Changeset 8cfa4ef

Timestamp:

Apr 15, 2021, 12:05:16 PM (5 years ago)

Author:

Peter A. Buhr <pabuhr@…>

Branches:

ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast-unique-expr, pthread-emulation, qualifiedEnum

Children:

8590328

Parents:

2f5ea69 (diff), a4b0aa4 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.

Message:

Merge branch 'master' of plg.uwaterloo.ca:software/cfa/cfa-cc

Location:

libcfa/src/concurrency

Files:

: 6 edited

kernel.cfa (modified) (8 diffs)
kernel.hfa (modified) (3 diffs)
kernel/startup.cfa (modified) (5 diffs)
kernel_private.hfa (modified) (4 diffs)
preemption.cfa (modified) (2 diffs)
ready_queue.cfa (modified) (11 diffs)

Legend:

: Unmodified
: Added
: Removed

libcfa/src/concurrency/kernel.cfa

-              r2f5ea69
+              r8cfa4ef
 static void __wake_one(cluster * cltr);
 static void push  (__cluster_idles & idles, processor & proc);
 static void remove(__cluster_idles & idles, processor & proc);
 static [unsigned idle, unsigned total, * processor] query( & __cluster_idles idles );
+static void mark_idle (__cluster_proc_list & idles, processor & proc);
+static void mark_awake(__cluster_proc_list & idles, processor & proc);
+static [unsigned idle, unsigned total, * processor] query_idles( & __cluster_proc_list idles );
 extern void __cfa_io_start( processor * );
 …
                                 // Push self to idle stack
                                 push(this->cltr->idles, * this);
+                                mark_idle(this->cltr->procs, * this);
                                 // Confirm the ready-queue is empty
 …
                                 if( readyThread ) {
                                         // A thread was found, cancel the halt
                                         remove(this->cltr->idles, * this);
+                                        mark_awake(this->cltr->procs, * this);
                                         #if !defined(__CFA_NO_STATISTICS__)
 …
                                 // We were woken up, remove self from idle
                                 remove(this->cltr->idles, * this);
+                                mark_awake(this->cltr->procs, * this);
                                 // DON'T just proceed, start looking again
 …
         unsigned idle;
         unsigned total;
         [idle, total, p] = query(this->idles);
+        [idle, total, p] = query_idles(this->procs);
         // If no one is sleeping, we are done
 …
+}
 static void push  (__cluster_idles & this, processor & proc) {
+static void mark_idle(__cluster_proc_list & this, processor & proc) {
         /* paranoid */ verify( ! __preemption_enabled() );
         lock( this );
                 this.idle++;
                 /* paranoid */ verify( this.idle <= this.total );
                 insert_first(this.list, proc);
+                remove(proc);
+                insert_first(this.idles, proc);
         unlock( this );
         /* paranoid */ verify( ! __preemption_enabled() );
+}
 static void remove(__cluster_idles & this, processor & proc) {
+static void mark_awake(__cluster_proc_list & this, processor & proc) {
         /* paranoid */ verify( ! __preemption_enabled() );
         lock( this );
                 this.idle--;
                 /* paranoid */ verify( this.idle >= 0 );
                 remove(proc);
+                insert_last(this.actives, proc);
         unlock( this );
         /* paranoid */ verify( ! __preemption_enabled() );
+}
+static [unsigned idle, unsigned total, * processor] query( & __cluster_idles this ) {
+static [unsigned idle, unsigned total, * processor] query_idles( & __cluster_proc_list this ) {
+        /* paranoid */ verify( ! __preemption_enabled() );
+        /* paranoid */ verify( ready_schedule_islocked() );
         for() {
                 uint64_t l = __atomic_load_n(&this.lock, __ATOMIC_SEQ_CST);
 …
                 unsigned idle    = this.idle;
                 unsigned total   = this.total;
                 processor * proc = &this.list`first;
+                processor * proc = &this.idles`first;
                 // Compiler fence is unnecessary, but gcc-8 and older incorrectly reorder code without it
                 asm volatile("": : :"memory");
 …
                 return [idle, total, proc];
+        }
+        /* paranoid */ verify( ready_schedule_islocked() );
+        /* paranoid */ verify( ! __preemption_enabled() );
+}

libcfa/src/concurrency/kernel.hfa

-              r2f5ea69
+              r8cfa4ef
 // Idle Sleep
 struct __cluster_idles {
+struct __cluster_proc_list {
         // Spin lock protecting the queue
         volatile uint64_t lock;
 …
         // List of idle processors
+        dlist(processor, processor) list;
+        dlist(processor, processor) idles;
+        // List of active processors
+        dlist(processor, processor) actives;
 };
 …
         // List of idle processors
         __cluster_idles idles;
+        __cluster_proc_list procs;
         // List of threads

libcfa/src/concurrency/kernel/startup.cfa

-              r2f5ea69
+              r8cfa4ef
         this.name = name;
         this.cltr = &_cltr;
+        this.cltr_id = -1u;
         do_terminate = false;
         preemption_alarm = 0p;
 …
         #endif
+        lock( this.cltr->idles );
+                int target = this.cltr->idles.total += 1u;
+        unlock( this.cltr->idles );
+        id = doregister((__processor_id_t*)&this);
+        // Register and Lock the RWlock so no-one pushes/pops while we are changing the queue
+        uint_fast32_t last_size = ready_mutate_register((__processor_id_t*)&this);
+                this.cltr->procs.total += 1u;
+                insert_last(this.cltr->procs.actives, this);
+                // Adjust the ready queue size
+                ready_queue_grow( cltr );
+        // Unlock the RWlock
+        ready_mutate_unlock( last_size );
+        __cfadbg_print_safe(runtime_core, "Kernel : core %p created\n", &this);
+}
+// Not a ctor, it just preps the destruction but should not destroy members
+static void deinit(processor & this) {
         // Lock the RWlock so no-one pushes/pops while we are changing the queue
         uint_fast32_t last_size = ready_mutate_lock();
+                this.cltr->procs.total -= 1u;
+                remove(this);
                 // Adjust the ready queue size
+                this.cltr_id = ready_queue_grow( cltr, target );
+        // Unlock the RWlock
+        ready_mutate_unlock( last_size );
+        __cfadbg_print_safe(runtime_core, "Kernel : core %p created\n", &this);
+}
+// Not a ctor, it just preps the destruction but should not destroy members
+static void deinit(processor & this) {
+        lock( this.cltr->idles );
+                int target = this.cltr->idles.total -= 1u;
+        unlock( this.cltr->idles );
+        // Lock the RWlock so no-one pushes/pops while we are changing the queue
+        uint_fast32_t last_size = ready_mutate_lock();
+                // Adjust the ready queue size
+                ready_queue_shrink( this.cltr, target );
+        // Unlock the RWlock
+        ready_mutate_unlock( last_size );
+        // Finally we don't need the read_lock any more
+        unregister((__processor_id_t*)&this);
+                ready_queue_shrink( this.cltr );
+        // Unlock the RWlock and unregister: we don't need the read_lock any more
+        ready_mutate_unregister((__processor_id_t*)&this, last_size );
         close(this.idle);
 …
 //-----------------------------------------------------------------------------
 // Cluster
 static void ?{}(__cluster_idles & this) {
+static void ?{}(__cluster_proc_list & this) {
         this.lock  = 0;
         this.idle  = 0;
         this.total = 0;
-        (this.list){};
+}
 …
                 // Adjust the ready queue size
                 ready_queue_grow( &this, 0 );
+                ready_queue_grow( &this );
         // Unlock the RWlock
 …
                 // Adjust the ready queue size
                 ready_queue_shrink( &this, 0 );
+                ready_queue_shrink( &this );
         // Unlock the RWlock

libcfa/src/concurrency/kernel_private.hfa

-              r2f5ea69
+              r8cfa4ef
 // Cluster lock API
 //=======================================================================
-// Cells use by the reader writer lock
-// while not generic it only relies on a opaque pointer
-struct __attribute__((aligned(128))) __scheduler_lock_id_t {
-        // Spin lock used as the underlying lock
-        volatile bool lock;
-        // Handle pointing to the proc owning this cell
-        // Used for allocating cells and debugging
-        __processor_id_t * volatile handle;
-        #ifdef __CFA_WITH_VERIFY__
-                // Debug, check if this is owned for reading
-                bool owned;
-        #endif
-};
-static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t));
 // Lock-Free registering/unregistering of threads
 // Register a processor to a given cluster and get its unique id in return
 unsigned doregister( struct __processor_id_t * proc );
+void register_proc_id( struct __processor_id_t * );
 // Unregister a processor from a given cluster using its id, getting back the original pointer
+void     unregister( struct __processor_id_t * proc );
+//-----------------------------------------------------------------------
+// Cluster idle lock/unlock
+static inline void lock(__cluster_idles & this) {
+        for() {
+                uint64_t l = this.lock;
+                if(
+                        (0 == (l % 2))
+                        && __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
+                ) return;
+                Pause();
+        }
+}
+static inline void unlock(__cluster_idles & this) {
+        /* paranoid */ verify( 1 == (this.lock % 2) );
+        __atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST );
+}
+void unregister_proc_id( struct __processor_id_t * proc );
 //=======================================================================
 …
         __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
+}
+// Cells use by the reader writer lock
+// while not generic it only relies on a opaque pointer
+struct __attribute__((aligned(128))) __scheduler_lock_id_t {
+        // Spin lock used as the underlying lock
+        volatile bool lock;
+        // Handle pointing to the proc owning this cell
+        // Used for allocating cells and debugging
+        __processor_id_t * volatile handle;
+        #ifdef __CFA_WITH_VERIFY__
+                // Debug, check if this is owned for reading
+                bool owned;
+        #endif
+};
+static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t));
 //-----------------------------------------------------------------------
 …
 void ready_mutate_unlock( uint_fast32_t /* value returned by lock */ );
+//-----------------------------------------------------------------------
+// Lock-Free registering/unregistering of threads
+// Register a processor to a given cluster and get its unique id in return
+// For convenience, also acquires the lock
+static inline uint_fast32_t ready_mutate_register( struct __processor_id_t * proc ) {
+        register_proc_id( proc );
+        return ready_mutate_lock();
+}
+// Unregister a processor from a given cluster using its id, getting back the original pointer
+// assumes the lock is acquired
+static inline void ready_mutate_unregister( struct __processor_id_t * proc, uint_fast32_t last_s ) {
+        ready_mutate_unlock( last_s );
+        unregister_proc_id( proc );
+}
+//-----------------------------------------------------------------------
+// Cluster idle lock/unlock
+static inline void lock(__cluster_proc_list & this) {
+        /* paranoid */ verify( ! __preemption_enabled() );
+        // Start by locking the global RWlock so that we know no-one is
+        // adding/removing processors while we mess with the idle lock
+        ready_schedule_lock();
+        // Simple counting lock, acquired, acquired by incrementing the counter
+        // to an odd number
+        for() {
+                uint64_t l = this.lock;
+                if(
+                        (0 == (l % 2))
+                        && __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
+                ) return;
+                Pause();
+        }
+        /* paranoid */ verify( ! __preemption_enabled() );
+}
+static inline void unlock(__cluster_proc_list & this) {
+        /* paranoid */ verify( ! __preemption_enabled() );
+        /* paranoid */ verify( 1 == (this.lock % 2) );
+        // Simple couting lock, release by incrementing to an even number
+        __atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST );
+        // Release the global lock, which we acquired when locking
+        ready_schedule_unlock();
+        /* paranoid */ verify( ! __preemption_enabled() );
+}
 //=======================================================================
 // Ready-Queue API
 …
 //-----------------------------------------------------------------------
 // Increase the width of the ready queue (number of lanes) by 4
 unsigned ready_queue_grow  (struct cluster * cltr, int target);
+void ready_queue_grow  (struct cluster * cltr);
 //-----------------------------------------------------------------------
 // Decrease the width of the ready queue (number of lanes) by 4
 void ready_queue_shrink(struct cluster * cltr, int target);
+void ready_queue_shrink(struct cluster * cltr);

libcfa/src/concurrency/preemption.cfa

-              r2f5ea69
+              r8cfa4ef
 static void * alarm_loop( __attribute__((unused)) void * args ) {
         __processor_id_t id;
         id.id = doregister(&id);
+        register_proc_id(&id);
         __cfaabi_tls.this_proc_id = &id;
 …
 EXIT:
         __cfaabi_dbg_print_safe( "Kernel : Preemption thread stopping\n" );
         unregister(&id);
+        register_proc_id(&id);
         return 0p;

libcfa/src/concurrency/ready_queue.cfa

-              r2f5ea69
+              r8cfa4ef
 //=======================================================================
 // Lock-Free registering/unregistering of threads
 unsigned doregister( struct __processor_id_t * proc ) with(*__scheduler_lock) {
+void register_proc_id( struct __processor_id_t * proc ) with(*__scheduler_lock) {
         __cfadbg_print_safe(ready_queue, "Kernel : Registering proc %p for RW-Lock\n", proc);
 …
                         /*paranoid*/ verify(0 == (__alignof__(data[i]) % cache_line_size));
                         /*paranoid*/ verify((((uintptr_t)&data[i]) % cache_line_size) == 0);
                         return i;
+                        proc->id = i;
+                }
+        }
 …
         /*paranoid*/ verify(__alignof__(data[n]) == (2 * cache_line_size));
         /*paranoid*/ verify((((uintptr_t)&data[n]) % cache_line_size) == 0);
         return n;
+}
 void unregister( struct __processor_id_t * proc ) with(*__scheduler_lock) {
+        proc->id = n;
+}
+void unregister_proc_id( struct __processor_id_t * proc ) with(*__scheduler_lock) {
         unsigned id = proc->id;
         /*paranoid*/ verify(id < ready);
 …
         __attribute__((unused)) int preferred;
         #if defined(BIAS)
+                /* paranoid */ verify(external || kernelTLS().this_processor->cltr_id < lanes.count );
                 preferred =
                         //*
 …
         int preferred;
         #if defined(BIAS)
                 // Don't bother trying locally too much
+                /* paranoid */ verify(kernelTLS().this_processor->cltr_id < lanes.count );
                 preferred = kernelTLS().this_processor->cltr_id;
         #endif
 …
+}
+static void assign_list(unsigned & value, const int inc, dlist(processor, processor) & list, unsigned count) {
+        processor * it = &list`first;
+        for(unsigned i = 0; i < count; i++) {
+                /* paranoid */ verifyf( it, "Unexpected null iterator, at index %u of %u\n", i, count);
+                it->cltr_id = value;
+                value += inc;
+                it = &(*it)`next;
+        }
+}
+static void reassign_cltr_id(struct cluster * cltr, const int inc) {
+        unsigned preferred = 0;
+        assign_list(preferred, inc, cltr->procs.actives, cltr->procs.total - cltr->procs.idle);
+        assign_list(preferred, inc, cltr->procs.idles  , cltr->procs.idle );
+}
 // Grow the ready queue
+unsigned ready_queue_grow(struct cluster * cltr, int target) {
+        unsigned preferred;
+void ready_queue_grow(struct cluster * cltr) {
         size_t ncount;
+        int target = cltr->procs.total;
         /* paranoid */ verify( ready_mutate_islocked() );
 …
                 if(target >= 2) {
                         ncount = target * 4;
-                        preferred = ncount - 4;
                 } else {
                         ncount = 1;
-                        preferred = 0;
+                }
 …
+        }
+        reassign_cltr_id(cltr, 4);
         // Make sure that everything is consistent
         /* paranoid */ check( cltr->ready_queue );
 …
         /* paranoid */ verify( ready_mutate_islocked() );
-        return preferred;
+}
 // Shrink the ready queue
 void ready_queue_shrink(struct cluster * cltr, int target) {
+void ready_queue_shrink(struct cluster * cltr) {
         /* paranoid */ verify( ready_mutate_islocked() );
         __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue\n");
 …
         // Make sure that everything is consistent
         /* paranoid */ check( cltr->ready_queue );
+        int target = cltr->procs.total;
         with( cltr->ready_queue ) {
 …
+        }
+        reassign_cltr_id(cltr, 4);
         // Make sure that everything is consistent
         /* paranoid */ check( cltr->ready_queue );

Note: See TracChangeset for help on using the changeset viewer.