Changes in / [d4f1521:397c101a]


Ignore:
Location:
libcfa/src
Files:
1 deleted
8 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/Makefile.am

    rd4f1521 r397c101a  
    4848thread_headers_nosrc = concurrency/invoke.h
    4949thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa concurrency/monitor.hfa concurrency/mutex.hfa
    50 thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa concurrency/ready_queue.cfa ${thread_headers:.hfa=.cfa}
     50thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa ${thread_headers:.hfa=.cfa}
    5151else
    5252headers =
  • libcfa/src/Makefile.in

    rd4f1521 r397c101a  
    165165        concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa \
    166166        concurrency/invoke.c concurrency/preemption.cfa \
    167         concurrency/ready_queue.cfa concurrency/coroutine.cfa \
    168         concurrency/thread.cfa concurrency/kernel.cfa \
    169         concurrency/monitor.cfa concurrency/mutex.cfa
     167        concurrency/coroutine.cfa concurrency/thread.cfa \
     168        concurrency/kernel.cfa concurrency/monitor.cfa \
     169        concurrency/mutex.cfa
    170170@BUILDLIB_TRUE@am__objects_3 = concurrency/coroutine.lo \
    171171@BUILDLIB_TRUE@ concurrency/thread.lo concurrency/kernel.lo \
     
    174174@BUILDLIB_TRUE@ concurrency/CtxSwitch-@ARCHITECTURE@.lo \
    175175@BUILDLIB_TRUE@ concurrency/alarm.lo concurrency/invoke.lo \
    176 @BUILDLIB_TRUE@ concurrency/preemption.lo \
    177 @BUILDLIB_TRUE@ concurrency/ready_queue.lo $(am__objects_3)
     176@BUILDLIB_TRUE@ concurrency/preemption.lo $(am__objects_3)
    178177am_libcfathread_la_OBJECTS = $(am__objects_4)
    179178libcfathread_la_OBJECTS = $(am_libcfathread_la_OBJECTS)
     
    464463@BUILDLIB_FALSE@thread_headers =
    465464@BUILDLIB_TRUE@thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa concurrency/monitor.hfa concurrency/mutex.hfa
    466 @BUILDLIB_TRUE@thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa concurrency/ready_queue.cfa ${thread_headers:.hfa=.cfa}
     465@BUILDLIB_TRUE@thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa ${thread_headers:.hfa=.cfa}
    467466
    468467#----------------------------------------------------------------------------------------------------------------
     
    600599        concurrency/$(DEPDIR)/$(am__dirstamp)
    601600concurrency/preemption.lo: concurrency/$(am__dirstamp) \
    602         concurrency/$(DEPDIR)/$(am__dirstamp)
    603 concurrency/ready_queue.lo: concurrency/$(am__dirstamp) \
    604601        concurrency/$(DEPDIR)/$(am__dirstamp)
    605602concurrency/coroutine.lo: concurrency/$(am__dirstamp) \
  • libcfa/src/bits/defs.hfa

    rd4f1521 r397c101a  
    5353    return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
    5454}
    55 
    56 #define __CFA_NO_BIT_TEST_AND_SET__
    57 
    58 static inline bool bts(volatile unsigned long long int * target, unsigned long long int bit ) {
    59         #if defined(__CFA_NO_BIT_TEST_AND_SET__)
    60         unsigned long long int mask = 1ul << bit;
    61         unsigned long long int ret = __atomic_fetch_or(target, mask, (int)__ATOMIC_RELAXED);
    62         return (ret & mask) != 0;
    63     #else
    64         int result = 0;
    65         asm volatile(
    66             "LOCK btsq %[bit], %[target]\n\t"
    67             :"=@ccc" (result)
    68             : [target] "m" (*target), [bit] "r" (bit)
    69         );
    70         return result != 0;
    71     #endif
    72 }
    73 
    74 static inline bool btr(volatile unsigned long long int * target, unsigned long long int bit ) {
    75         #if defined(__CFA_NO_BIT_TEST_AND_SET__)
    76         unsigned long long int mask = 1ul << bit;
    77         unsigned long long int ret = __atomic_fetch_and(target, ~mask, (int)__ATOMIC_RELAXED);
    78         return (ret & mask) != 0;
    79         #else
    80         int result = 0;
    81         asm volatile(
    82             "LOCK btrq %[bit], %[target]\n\t"
    83             :"=@ccc" (result)
    84             : [target] "m" (*target), [bit] "r" (bit)
    85         );
    86         return result != 0;
    87     #endif
    88 }
  • libcfa/src/concurrency/invoke.h

    rd4f1521 r397c101a  
    189189                // instrusive link field for threads
    190190                struct thread_desc * next;
    191                 struct thread_desc * prev;
    192                 unsigned long long ts;
    193191
    194192                struct {
  • libcfa/src/concurrency/kernel.cfa

    rd4f1521 r397c101a  
    210210        this.name = name;
    211211        this.cltr = &cltr;
    212         id = -1u;
    213212        terminated{ 0 };
    214213        do_terminate = false;
     
    240239        this.preemption_rate = preemption_rate;
    241240        ready_queue{};
    242         ready_lock{};
    243 
     241        ready_queue_lock{};
     242
     243        procs{ __get };
    244244        idles{ __get };
    245245        threads{ __get };
     
    270270        __cfaabi_dbg_print_safe("Kernel : core %p starting\n", this);
    271271
    272         // register the processor unless it's the main thread which is handled in the boot sequence
    273         if(this != mainProcessor)
    274                 this->id = doregister(this->cltr, this);
     272        doregister(this->cltr, this);
    275273
    276274        {
     
    308306        }
    309307
     308        unregister(this->cltr, this);
     309
    310310        V( this->terminated );
    311 
    312         // unregister the processor unless it's the main thread which is handled in the boot sequence
    313         if(this != mainProcessor)
    314                 unregister(this->cltr, this);
    315311
    316312        __cfaabi_dbg_print_safe("Kernel : core %p terminated\n", this);
     
    509505
    510506        with( *thrd->curr_cluster ) {
    511                 ready_schedule_lock(*thrd->curr_cluster, kernelTLS.this_processor);
    512                 __atomic_acquire(&ready_queue.lock);
    513                 thrd->ts = rdtscl();
    514                 bool was_empty = push( ready_queue, thrd );
    515                 __atomic_unlock(&ready_queue.lock);
    516                 ready_schedule_unlock(*thrd->curr_cluster, kernelTLS.this_processor);
     507                lock  ( ready_queue_lock __cfaabi_dbg_ctx2 );
     508                bool was_empty = !(ready_queue != 0);
     509                append( ready_queue, thrd );
     510                unlock( ready_queue_lock );
    517511
    518512                if(was_empty) {
     
    535529thread_desc * nextThread(cluster * this) with( *this ) {
    536530        verify( ! kernelTLS.preemption_state.enabled );
    537 
    538         ready_schedule_lock(*this, kernelTLS.this_processor);
    539                 __atomic_acquire(&ready_queue.lock);
    540                         thread_desc * head;
    541                         __attribute__((unused)) bool _;
    542                         [head, _] = pop( ready_queue );
    543                 __atomic_unlock(&ready_queue.lock);
    544         ready_schedule_unlock(*this, kernelTLS.this_processor);
    545 
     531        lock( ready_queue_lock __cfaabi_dbg_ctx2 );
     532        thread_desc * head = pop_head( ready_queue );
     533        unlock( ready_queue_lock );
    546534        verify( ! kernelTLS.preemption_state.enabled );
    547535        return head;
     
    705693                pending_preemption = false;
    706694                kernel_thread = pthread_self();
    707                 id = -1u;
    708695
    709696                runner{ &this };
     
    715702        mainProcessor = (processor *)&storage_mainProcessor;
    716703        (*mainProcessor){};
    717 
    718         mainProcessor->id = doregister(mainCluster, mainProcessor);
    719704
    720705        //initialize the global state variables
     
    763748        kernel_stop_preemption();
    764749
    765         unregister(mainCluster, mainProcessor);
    766 
    767750        // Destroy the main processor and its context in reverse order of construction
    768751        // These were manually constructed so we need manually destroy them
    769752        ^(mainProcessor->runner){};
    770         ^(*mainProcessor){};
     753        ^(mainProcessor){};
    771754
    772755        // Final step, destroy the main thread since it is no longer needed
    773         // Since we provided a stack to this task it will not destroy anything
    774         ^(*mainThread){};
    775 
    776         ^(*mainCluster){};
     756        // Since we provided a stack to this taxk it will not destroy anything
     757        ^(mainThread){};
    777758
    778759        ^(__cfa_dbg_global_clusters.list){};
     
    790771        with( *cltr ) {
    791772                lock      (proc_list_lock __cfaabi_dbg_ctx2);
     773                remove    (procs, *this);
    792774                push_front(idles, *this);
    793775                unlock    (proc_list_lock);
     
    803785                lock      (proc_list_lock __cfaabi_dbg_ctx2);
    804786                remove    (idles, *this);
     787                push_front(procs, *this);
    805788                unlock    (proc_list_lock);
    806789        }
     
    943926}
    944927
     928void doregister( cluster * cltr, processor * proc ) {
     929        lock      (cltr->proc_list_lock __cfaabi_dbg_ctx2);
     930        cltr->nprocessors += 1;
     931        push_front(cltr->procs, *proc);
     932        unlock    (cltr->proc_list_lock);
     933}
     934
     935void unregister( cluster * cltr, processor * proc ) {
     936        lock  (cltr->proc_list_lock __cfaabi_dbg_ctx2);
     937        remove(cltr->procs, *proc );
     938        cltr->nprocessors -= 1;
     939        unlock(cltr->proc_list_lock);
     940}
     941
    945942//-----------------------------------------------------------------------------
    946943// Debug
  • libcfa/src/concurrency/kernel.hfa

    rd4f1521 r397c101a  
    107107        // Cluster from which to get threads
    108108        struct cluster * cltr;
    109         unsigned int id;
    110109
    111110        // Name of the processor
     
    159158}
    160159
    161 
    162 //-----------------------------------------------------------------------------
    163 // Cluster Tools
    164 struct __processor_id;
    165 
    166 // Reader-Writer lock protecting the ready-queue
    167 struct __clusterRWLock_t {
    168         // total cachelines allocated
    169         unsigned int max;
    170 
    171         // cachelines currently in use
    172         volatile unsigned int alloc;
    173 
    174         // cachelines ready to itereate over
    175         // (!= to alloc when thread is in second half of doregister)
    176         volatile unsigned int ready;
    177 
    178         // writer lock
    179         volatile bool lock;
    180 
    181         // data pointer
    182         __processor_id * data;
    183 };
    184 
    185 void  ?{}(__clusterRWLock_t & this);
    186 void ^?{}(__clusterRWLock_t & this);
    187 
    188 // Underlying sub quues of the ready queue
    189 struct __attribute__((aligned(128))) __intrusive_ready_queue_t {
    190         // spin lock protecting the queue
    191         volatile bool lock;
    192 
    193         // anchor for the head and the tail of the queue
    194         struct __sentinel_t {
    195                 struct thread_desc * next;
    196                 struct thread_desc * prev;
    197                 unsigned long long ts;
    198         } before, after;
    199 
    200         // Optional statistic counters
    201         #ifndef __CFA_NO_SCHED_STATS__
    202                 struct __attribute__((aligned(64))) {
    203                         // difference between number of push and pops
    204                         ssize_t diff;
    205 
    206                         // total number of pushes and pops
    207                         size_t  push;
    208                         size_t  pop ;
    209                 } stat;
    210         #endif
    211 };
    212 
    213 void  ?{}(__intrusive_ready_queue_t & this);
    214 void ^?{}(__intrusive_ready_queue_t & this);
    215 
    216160//-----------------------------------------------------------------------------
    217161// Cluster
    218162struct cluster {
    219163        // Ready queue locks
    220         __clusterRWLock_t ready_lock;
     164        __spinlock_t ready_queue_lock;
    221165
    222166        // Ready queue for threads
    223         __intrusive_ready_queue_t ready_queue;
     167        __queue_t(thread_desc) ready_queue;
    224168
    225169        // Name of the cluster
     
    231175        // List of processors
    232176        __spinlock_t proc_list_lock;
     177        __dllist_t(struct processor) procs;
    233178        __dllist_t(struct processor) idles;
     179        unsigned int nprocessors;
    234180
    235181        // List of threads
  • libcfa/src/concurrency/kernel_private.hfa

    rd4f1521 r397c101a  
    9999//-----------------------------------------------------------------------------
    100100// Utils
    101 #define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)]
     101#define KERNEL_STORAGE(T,X) static char storage_##X[sizeof(T)]
    102102
    103103static inline uint32_t tls_rand() {
     
    115115void unregister( struct cluster * cltr, struct thread_desc & thrd );
    116116
    117 //=======================================================================
    118 // Cluster lock API
    119 //=======================================================================
    120 struct __attribute__((aligned(64))) __processor_id {
    121         processor * volatile handle;
    122         volatile bool lock;
    123 };
    124 
    125 // Lock-Free registering/unregistering of threads
    126 // Register a processor to a given cluster and get its unique id in return
    127 unsigned doregister( struct cluster * cltr, struct processor * proc );
    128 
    129 // Unregister a processor from a given cluster using its id, getting back the original pointer
    130 void     unregister( struct cluster * cltr, struct processor * proc );
    131 
    132 //=======================================================================
    133 // Reader-writer lock implementation
    134 // Concurrent with doregister/unregister,
    135 //    i.e., threads can be added at any point during or between the entry/exit
    136 static inline void __atomic_acquire(volatile bool * ll) {
    137         while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) {
    138                 while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED))
    139                         asm volatile("pause");
    140         }
    141         /* paranoid */ verify(*ll);
    142 }
    143 
    144 static inline bool __atomic_try_acquire(volatile bool * ll) {
    145         return __atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST);
    146 }
    147 
    148 static inline void __atomic_unlock(volatile bool * ll) {
    149         /* paranoid */ verify(*ll);
    150         __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
    151 }
    152 
    153 //-----------------------------------------------------------------------
    154 // Reader side : acquire when using the ready queue to schedule but not
    155 //  creating/destroying queues
    156 static inline void ready_schedule_lock( struct cluster & cltr, struct processor * proc) with(cltr.ready_lock) {
    157         unsigned iproc = proc->id;
    158         /*paranoid*/ verify(data[iproc].handle == proc);
    159         /*paranoid*/ verify(iproc < ready);
    160 
    161         // Step 1 : make sure no writer are in the middle of the critical section
    162         while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED))
    163                 asm volatile("pause");
    164 
    165         // Fence needed because we don't want to start trying to acquire the lock
    166         // before we read a false.
    167         // Not needed on x86
    168         // std::atomic_thread_fence(std::memory_order_seq_cst);
    169 
    170         // Step 2 : acquire our local lock
    171         __atomic_acquire( &data[iproc].lock );
    172         /*paranoid*/ verify(data[iproc].lock);
    173 }
    174 
    175 static inline void ready_schedule_unlock( struct cluster & cltr, struct processor * proc) with(cltr.ready_lock) {
    176         unsigned iproc = proc->id;
    177         /*paranoid*/ verify(data[iproc].handle == proc);
    178         /*paranoid*/ verify(iproc < ready);
    179         /*paranoid*/ verify(data[iproc].lock);
    180         __atomic_store_n(&data[iproc].lock, false, __ATOMIC_RELEASE);
    181 }
    182 
    183 //-----------------------------------------------------------------------
    184 // Writer side : acquire when changing the ready queue, e.g. adding more
    185 //  queues or removing them.
    186 uint_fast32_t ready_mutate_lock( struct cluster & cltr );
    187 
    188 void ready_mutate_unlock( struct cluster & cltr, uint_fast32_t );
    189 
    190 bool push(__intrusive_ready_queue_t & this, thread_desc * node);
    191 [thread_desc *, bool] pop(__intrusive_ready_queue_t & this);
     117void doregister( struct cluster * cltr, struct processor * proc );
     118void unregister( struct cluster * cltr, struct processor * proc );
    192119
    193120// Local Variables: //
  • libcfa/src/concurrency/thread.cfa

    rd4f1521 r397c101a  
    4141        self_mon_p = &self_mon;
    4242        curr_cluster = &cl;
    43         next = 0p;
    44         prev = 0p;
     43        next = NULL;
    4544
    4645        node.next = NULL;
Note: See TracChangeset for help on using the changeset viewer.