Changeset 7768b8d


Ignore:
Timestamp:
Nov 26, 2019, 3:19:20 PM (2 years ago)
Author:
Thierry Delisle <tdelisle@…>
Branches:
arm-eh, jacob/cs343-translation, master, new-ast, new-ast-unique-expr
Children:
30763fd
Parents:
21184e3
Message:

First step at adding the new ready queue to Cforall

Location:
libcfa
Files:
1 added
9 edited

Legend:

Unmodified
Added
Removed
  • libcfa/prelude/sync-builtins.cf

    r21184e3 r7768b8d  
    436436#endif
    437437
     438_Bool __atomic_exchange_n(volatile _Bool *, _Bool, int);
     439_Bool __atomic_exchange_1(volatile _Bool *, _Bool, int);
     440void __atomic_exchange(volatile _Bool *, volatile _Bool *, volatile _Bool *, int);
    438441char __atomic_exchange_n(volatile char *, char, int);
    439442char __atomic_exchange_1(volatile char *, char, int);
  • libcfa/src/Makefile.am

    r21184e3 r7768b8d  
    4848thread_headers_nosrc = concurrency/invoke.h
    4949thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa concurrency/monitor.hfa concurrency/mutex.hfa
    50 thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa ${thread_headers:.hfa=.cfa}
     50thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa concurrency/ready_queue.cfa ${thread_headers:.hfa=.cfa}
    5151else
    5252headers =
  • libcfa/src/Makefile.in

    r21184e3 r7768b8d  
    165165        concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa \
    166166        concurrency/invoke.c concurrency/preemption.cfa \
    167         concurrency/coroutine.cfa concurrency/thread.cfa \
    168         concurrency/kernel.cfa concurrency/monitor.cfa \
    169         concurrency/mutex.cfa
     167        concurrency/ready_queue.cfa concurrency/coroutine.cfa \
     168        concurrency/thread.cfa concurrency/kernel.cfa \
     169        concurrency/monitor.cfa concurrency/mutex.cfa
    170170@BUILDLIB_TRUE@am__objects_3 = concurrency/coroutine.lo \
    171171@BUILDLIB_TRUE@ concurrency/thread.lo concurrency/kernel.lo \
     
    174174@BUILDLIB_TRUE@ concurrency/CtxSwitch-@ARCHITECTURE@.lo \
    175175@BUILDLIB_TRUE@ concurrency/alarm.lo concurrency/invoke.lo \
    176 @BUILDLIB_TRUE@ concurrency/preemption.lo $(am__objects_3)
     176@BUILDLIB_TRUE@ concurrency/preemption.lo \
     177@BUILDLIB_TRUE@ concurrency/ready_queue.lo $(am__objects_3)
    177178am_libcfathread_la_OBJECTS = $(am__objects_4)
    178179libcfathread_la_OBJECTS = $(am_libcfathread_la_OBJECTS)
     
    463464@BUILDLIB_FALSE@thread_headers =
    464465@BUILDLIB_TRUE@thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa concurrency/monitor.hfa concurrency/mutex.hfa
    465 @BUILDLIB_TRUE@thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa ${thread_headers:.hfa=.cfa}
     466@BUILDLIB_TRUE@thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa concurrency/ready_queue.cfa ${thread_headers:.hfa=.cfa}
    466467
    467468#----------------------------------------------------------------------------------------------------------------
     
    599600        concurrency/$(DEPDIR)/$(am__dirstamp)
    600601concurrency/preemption.lo: concurrency/$(am__dirstamp) \
     602        concurrency/$(DEPDIR)/$(am__dirstamp)
     603concurrency/ready_queue.lo: concurrency/$(am__dirstamp) \
    601604        concurrency/$(DEPDIR)/$(am__dirstamp)
    602605concurrency/coroutine.lo: concurrency/$(am__dirstamp) \
  • libcfa/src/bits/defs.hfa

    r21184e3 r7768b8d  
    5353    return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
    5454}
     55
     56#define __CFA_NO_BIT_TEST_AND_SET__
     57
     58static inline bool bts(volatile unsigned long long int * target, unsigned long long int bit ) {
     59        #if defined(__CFA_NO_BIT_TEST_AND_SET__)
     60        unsigned long long int mask = 1ul << bit;
     61        unsigned long long int ret = __atomic_fetch_or(target, mask, (int)__ATOMIC_RELAXED);
     62        return (ret & mask) != 0;
     63    #else
     64        int result = 0;
     65        asm volatile(
     66            "LOCK btsq %[bit], %[target]\n\t"
     67            :"=@ccc" (result)
     68            : [target] "m" (*target), [bit] "r" (bit)
     69        );
     70        return result != 0;
     71    #endif
     72}
     73
     74static inline bool btr(volatile unsigned long long int * target, unsigned long long int bit ) {
     75        #if defined(__CFA_NO_BIT_TEST_AND_SET__)
     76        unsigned long long int mask = 1ul << bit;
     77        unsigned long long int ret = __atomic_fetch_and(target, ~mask, (int)__ATOMIC_RELAXED);
     78        return (ret & mask) != 0;
     79        #else
     80        int result = 0;
     81        asm volatile(
     82            "LOCK btrq %[bit], %[target]\n\t"
     83            :"=@ccc" (result)
     84            : [target] "m" (*target), [bit] "r" (bit)
     85        );
     86        return result != 0;
     87    #endif
     88}
  • libcfa/src/concurrency/invoke.h

    r21184e3 r7768b8d  
    189189                // instrusive link field for threads
    190190                struct thread_desc * next;
     191                struct thread_desc * prev;
     192                unsigned long long ts;
    191193
    192194                struct {
  • libcfa/src/concurrency/kernel.cfa

    r21184e3 r7768b8d  
    210210        this.name = name;
    211211        this.cltr = &cltr;
     212        id = -1u;
    212213        terminated{ 0 };
    213214        do_terminate = false;
     
    239240        this.preemption_rate = preemption_rate;
    240241        ready_queue{};
    241         ready_queue_lock{};
    242 
    243         procs{ __get };
     242        ready_lock{};
     243
    244244        idles{ __get };
    245245        threads{ __get };
     
    263263        // Because of a bug, we couldn't initialized the seed on construction
    264264        // Do it here
    265         kernelTLS.rand_seed ^= rdtsc();
     265        kernelTLS.rand_seed ^= rdtscl();
    266266
    267267        processor * this = runner.proc;
     
    270270        __cfaabi_dbg_print_safe("Kernel : core %p starting\n", this);
    271271
    272         doregister(this->cltr, this);
     272        // register the processor unless it's the main thread which is handled in the boot sequence
     273        if(this != mainProcessor)
     274                this->id = doregister(this->cltr, this);
    273275
    274276        {
     
    306308        }
    307309
    308         unregister(this->cltr, this);
    309 
    310310        V( this->terminated );
     311
     312        // unregister the processor unless it's the main thread which is handled in the boot sequence
     313        if(this != mainProcessor)
     314                unregister(this->cltr, this);
    311315
    312316        __cfaabi_dbg_print_safe("Kernel : core %p terminated\n", this);
     
    505509
    506510        with( *thrd->curr_cluster ) {
    507                 lock  ( ready_queue_lock __cfaabi_dbg_ctx2 );
    508                 bool was_empty = !(ready_queue != 0);
    509                 append( ready_queue, thrd );
    510                 unlock( ready_queue_lock );
     511                ready_schedule_lock(*thrd->curr_cluster, kernelTLS.this_processor);
     512                __atomic_acquire(&ready_queue.lock);
     513                thrd->ts = rdtscl();
     514                bool was_empty = push( ready_queue, thrd );
     515                __atomic_unlock(&ready_queue.lock);
     516                ready_schedule_unlock(*thrd->curr_cluster, kernelTLS.this_processor);
    511517
    512518                if(was_empty) {
     
    529535thread_desc * nextThread(cluster * this) with( *this ) {
    530536        verify( ! kernelTLS.preemption_state.enabled );
    531         lock( ready_queue_lock __cfaabi_dbg_ctx2 );
    532         thread_desc * head = pop_head( ready_queue );
    533         unlock( ready_queue_lock );
     537
     538        ready_schedule_lock(*this, kernelTLS.this_processor);
     539                __atomic_acquire(&ready_queue.lock);
     540                        thread_desc * head;
     541                        __attribute__((unused)) bool _;
     542                        [head, _] = pop( ready_queue );
     543                __atomic_unlock(&ready_queue.lock);
     544        ready_schedule_unlock(*this, kernelTLS.this_processor);
     545
    534546        verify( ! kernelTLS.preemption_state.enabled );
    535547        return head;
     
    693705                pending_preemption = false;
    694706                kernel_thread = pthread_self();
     707                id = -1u;
    695708
    696709                runner{ &this };
     
    702715        mainProcessor = (processor *)&storage_mainProcessor;
    703716        (*mainProcessor){};
     717
     718        mainProcessor->id = doregister(mainCluster, mainProcessor);
    704719
    705720        //initialize the global state variables
     
    748763        kernel_stop_preemption();
    749764
     765        unregister(mainCluster, mainProcessor);
     766
    750767        // Destroy the main processor and its context in reverse order of construction
    751768        // These were manually constructed so we need manually destroy them
    752769        ^(mainProcessor->runner){};
    753         ^(mainProcessor){};
     770        ^(*mainProcessor){};
    754771
    755772        // Final step, destroy the main thread since it is no longer needed
    756         // Since we provided a stack to this taxk it will not destroy anything
    757         ^(mainThread){};
     773        // Since we provided a stack to this task it will not destroy anything
     774        ^(*mainThread){};
     775
     776        ^(*mainCluster){};
    758777
    759778        ^(__cfa_dbg_global_clusters.list){};
     
    771790        with( *cltr ) {
    772791                lock      (proc_list_lock __cfaabi_dbg_ctx2);
    773                 remove    (procs, *this);
    774792                push_front(idles, *this);
    775793                unlock    (proc_list_lock);
     
    785803                lock      (proc_list_lock __cfaabi_dbg_ctx2);
    786804                remove    (idles, *this);
    787                 push_front(procs, *this);
    788805                unlock    (proc_list_lock);
    789806        }
     
    926943}
    927944
    928 void doregister( cluster * cltr, processor * proc ) {
    929         lock      (cltr->proc_list_lock __cfaabi_dbg_ctx2);
    930         cltr->nprocessors += 1;
    931         push_front(cltr->procs, *proc);
    932         unlock    (cltr->proc_list_lock);
    933 }
    934 
    935 void unregister( cluster * cltr, processor * proc ) {
    936         lock  (cltr->proc_list_lock __cfaabi_dbg_ctx2);
    937         remove(cltr->procs, *proc );
    938         cltr->nprocessors -= 1;
    939         unlock(cltr->proc_list_lock);
    940 }
    941 
    942945//-----------------------------------------------------------------------------
    943946// Debug
  • libcfa/src/concurrency/kernel.hfa

    r21184e3 r7768b8d  
    106106        // Cluster from which to get threads
    107107        struct cluster * cltr;
     108        unsigned int id;
    108109
    109110        // Name of the processor
     
    157158}
    158159
     160
     161//-----------------------------------------------------------------------------
     162// Cluster Tools
     163struct __processor_id;
     164
     165// Reader-Writer lock protecting the ready-queue
     166struct __clusterRWLock_t {
     167        // total cachelines allocated
     168        unsigned int max;
     169
     170        // cachelines currently in use
     171        volatile unsigned int alloc;
     172
     173        // cachelines ready to itereate over
     174        // (!= to alloc when thread is in second half of doregister)
     175        volatile unsigned int ready;
     176
     177        // writer lock
     178        volatile bool lock;
     179
     180        // data pointer
     181        __processor_id * data;
     182};
     183
     184void  ?{}(__clusterRWLock_t & this);
     185void ^?{}(__clusterRWLock_t & this);
     186
     187// Underlying sub quues of the ready queue
     188struct __attribute__((aligned(128))) __intrusive_ready_queue_t {
     189        // spin lock protecting the queue
     190        volatile bool lock;
     191
     192        // anchor for the head and the tail of the queue
     193        struct __sentinel_t {
     194                struct thread_desc * next;
     195                struct thread_desc * prev;
     196                unsigned long long ts;
     197        } before, after;
     198
     199        // Optional statistic counters
     200        #ifndef __CFA_NO_SCHED_STATS__
     201                struct __attribute__((aligned(64))) {
     202                        // difference between number of push and pops
     203                        ssize_t diff;
     204
     205                        // total number of pushes and pops
     206                        size_t  push;
     207                        size_t  pop ;
     208                } stat;
     209        #endif
     210};
     211
     212void  ?{}(__intrusive_ready_queue_t & this);
     213void ^?{}(__intrusive_ready_queue_t & this);
     214
    159215//-----------------------------------------------------------------------------
    160216// Cluster
    161217struct cluster {
    162218        // Ready queue locks
    163         __spinlock_t ready_queue_lock;
     219        __clusterRWLock_t ready_lock;
    164220
    165221        // Ready queue for threads
    166         __queue_t(thread_desc) ready_queue;
     222        __intrusive_ready_queue_t ready_queue;
    167223
    168224        // Name of the cluster
     
    174230        // List of processors
    175231        __spinlock_t proc_list_lock;
    176         __dllist_t(struct processor) procs;
    177232        __dllist_t(struct processor) idles;
    178         unsigned int nprocessors;
    179233
    180234        // List of threads
  • libcfa/src/concurrency/kernel_private.hfa

    r21184e3 r7768b8d  
    9999//-----------------------------------------------------------------------------
    100100// Utils
    101 #define KERNEL_STORAGE(T,X) static char storage_##X[sizeof(T)]
     101#define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)]
    102102
    103103static inline uint32_t tls_rand() {
     
    115115void unregister( struct cluster * cltr, struct thread_desc & thrd );
    116116
    117 void doregister( struct cluster * cltr, struct processor * proc );
    118 void unregister( struct cluster * cltr, struct processor * proc );
     117//=======================================================================
     118// Cluster lock API
     119//=======================================================================
     120struct __attribute__((aligned(64))) __processor_id {
     121        processor * volatile handle;
     122        volatile bool lock;
     123};
     124
     125// Lock-Free registering/unregistering of threads
     126// Register a processor to a given cluster and get its unique id in return
     127unsigned doregister( struct cluster * cltr, struct processor * proc );
     128
     129// Unregister a processor from a given cluster using its id, getting back the original pointer
     130void     unregister( struct cluster * cltr, struct processor * proc );
     131
     132//=======================================================================
     133// Reader-writer lock implementation
     134// Concurrent with doregister/unregister,
     135//    i.e., threads can be added at any point during or between the entry/exit
     136static inline void __atomic_acquire(volatile bool * ll) {
     137        while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) {
     138                while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED))
     139                        asm volatile("pause");
     140        }
     141        /* paranoid */ verify(*ll);
     142}
     143
     144static inline bool __atomic_try_acquire(volatile bool * ll) {
     145        return __atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST);
     146}
     147
     148static inline void __atomic_unlock(volatile bool * ll) {
     149        /* paranoid */ verify(*ll);
     150        __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
     151}
     152
     153//-----------------------------------------------------------------------
     154// Reader side : acquire when using the ready queue to schedule but not
     155//  creating/destroying queues
     156static inline void ready_schedule_lock( struct cluster & cltr, struct processor * proc) with(cltr.ready_lock) {
     157        unsigned iproc = proc->id;
     158        /*paranoid*/ verify(data[iproc].handle == proc);
     159        /*paranoid*/ verify(iproc < ready);
     160
     161        // Step 1 : make sure no writer are in the middle of the critical section
     162        while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED))
     163                asm volatile("pause");
     164
     165        // Fence needed because we don't want to start trying to acquire the lock
     166        // before we read a false.
     167        // Not needed on x86
     168        // std::atomic_thread_fence(std::memory_order_seq_cst);
     169
     170        // Step 2 : acquire our local lock
     171        __atomic_acquire( &data[iproc].lock );
     172        /*paranoid*/ verify(data[iproc].lock);
     173}
     174
     175static inline void ready_schedule_unlock( struct cluster & cltr, struct processor * proc) with(cltr.ready_lock) {
     176        unsigned iproc = proc->id;
     177        /*paranoid*/ verify(data[iproc].handle == proc);
     178        /*paranoid*/ verify(iproc < ready);
     179        /*paranoid*/ verify(data[iproc].lock);
     180        __atomic_store_n(&data[iproc].lock, false, __ATOMIC_RELEASE);
     181}
     182
     183//-----------------------------------------------------------------------
     184// Writer side : acquire when changing the ready queue, e.g. adding more
     185//  queues or removing them.
     186uint_fast32_t ready_mutate_lock( struct cluster & cltr );
     187
     188void ready_mutate_unlock( struct cluster & cltr, uint_fast32_t );
     189
     190bool push(__intrusive_ready_queue_t & this, thread_desc * node);
     191[thread_desc *, bool] pop(__intrusive_ready_queue_t & this);
    119192
    120193// Local Variables: //
  • libcfa/src/concurrency/thread.cfa

    r21184e3 r7768b8d  
    4141        self_mon_p = &self_mon;
    4242        curr_cluster = &cl;
    43         next = NULL;
     43        next = 0p;
     44        prev = 0p;
    4445
    4546        node.next = NULL;
Note: See TracChangeset for help on using the changeset viewer.