Ignore:
Timestamp:
Nov 13, 2023, 3:43:43 AM (2 years ago)
Author:
JiadaL <j82liang@…>
Branches:
master
Children:
25f2798
Parents:
0030b508 (diff), 2174191 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'master' of plg.uwaterloo.ca:software/cfa/cfa-cc

Location:
libcfa/src/concurrency
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/concurrency/channel.hfa

    r0030b508 rfc12f05  
    130130static inline void __cons_handoff( channel(T) & chan, T & elem ) with(chan) {
    131131    memcpy( cons`first.extra, (void *)&elem, sizeof(T) ); // do waiting consumer work
    132     __atomic_thread_fence( __ATOMIC_SEQ_CST );
    133132    wake_one( cons );
    134133}
     
    137136static inline void __prods_handoff( channel(T) & chan, T & retval ) with(chan) {
    138137    memcpy( (void *)&retval, prods`first.extra, sizeof(T) );
    139     __atomic_thread_fence( __ATOMIC_SEQ_CST );
    140138    wake_one( prods );
    141139}
  • libcfa/src/concurrency/cofor.cfa

    r0030b508 rfc12f05  
    44// cofor ( uC++ COFOR )
    55
    6 thread cofor_runner {
     6thread cofor_task {
    77        ssize_t low, high;
    88        __cofor_body_t loop_body;
    99};
    1010
    11 static void ?{}( cofor_runner & this, ssize_t low, ssize_t high, __cofor_body_t loop_body ) {
     11static void ?{}( cofor_task & this, ssize_t low, ssize_t high, __cofor_body_t loop_body ) {
    1212        this.low = low;
    1313        this.high = high;
     
    1515}
    1616
    17 void main( cofor_runner & this ) with( this ) {
     17void main( cofor_task & this ) with( this ) {
    1818        for ( ssize_t i = low; i < high; i++ )
    1919                loop_body(i);
     
    2929        ssize_t i = 0;
    3030        ssize_t stride_iter = low;
    31         cofor_runner * runners[ threads ];
     31        cofor_task * runners[ threads ];
    3232        for ( i; threads ) {
    3333                runners[i] = alloc();
  • libcfa/src/concurrency/cofor.hfa

    r0030b508 rfc12f05  
    11#include <thread.hfa>
     2#include <locks.hfa>
     3#include <list.hfa>
    24
    35//////////////////////////////////////////////////////////////////////////////////////////
     
    1416                __Cofor__( low, high, __CFA_loopLambda__ ); \
    1517        }
     18
     19struct runner_node {
     20    void * value;
     21    inline dlink(runner_node);
     22};
     23P9_EMBEDDED( runner_node, dlink(runner_node) )
     24
     25thread cofor_runner {
     26        go_mutex mutex_lock;              // MX lock
     27    dlist( runner_node ) items;
     28    void (*func)(void *);
     29    volatile bool done;
     30};
     31
     32void ?{}( cofor_runner & this ) { this.done = false; }
     33
     34void main( cofor_runner & this ) with(this) {
     35    while ( !done || !items`isEmpty ) {
     36                lock( mutex_lock );
     37        runner_node * node = &try_pop_front( items );
     38                unlock( mutex_lock );
     39        func( node->value );
     40        free( node->value );
     41        free( node );
     42    }
     43}
     44
     45void start_runners( cofor_runner * thds, unsigned nprocs, void (*func)(void *) ) {
     46        for ( i; nprocs ) {
     47                thds[i].func = func;
     48        }
     49}
     50
     51void end_runners( cofor_runner * thds, unsigned nprocs ) {
     52        for ( i; nprocs ) {
     53                thds[i].done = true;
     54        }
     55}
     56
     57void send_work( cofor_runner * thds, unsigned nprocs, unsigned & curr_proc, void * value ) {
     58        runner_node * node = malloc();
     59        (*node){};
     60        node->value = value;
     61        lock( thds[curr_proc].mutex_lock );
     62        insert_last( thds[curr_proc].items, *node );
     63        unlock( thds[curr_proc].mutex_lock );
     64        curr_proc = ( curr_proc + 1 ) % nprocs;
     65}
    1666
    1767//////////////////////////////////////////////////////////////////////////////////////////
     
    4292    delete( this.runner );
    4393}
    44 
  • libcfa/src/concurrency/kernel.hfa

    r0030b508 rfc12f05  
    303303// gets the number of constructed processors on the cluster
    304304static inline unsigned get_proc_count( cluster & this ) { return this.procs.constructed; }
     305static inline unsigned get_proc_count() { return publicTLS_get( this_processor )->cltr->procs.constructed; }
    305306
    306307// set the number of internal processors
  • libcfa/src/concurrency/locks.hfa

    r0030b508 rfc12f05  
    182182static inline void lock( mcs_spin_lock & l, mcs_spin_node & n ) {
    183183    n.locked = true;
     184
     185        #if defined(__ARM_ARCH)
     186        __asm__ __volatile__ ( "DMB ISH" ::: );
     187        #endif
     188
    184189        mcs_spin_node * prev = __atomic_exchange_n(&l.queue.tail, &n, __ATOMIC_SEQ_CST);
    185190        if( prev == 0p ) return;
    186191        prev->next = &n;
     192       
     193        #if defined(__ARM_ARCH)
     194        __asm__ __volatile__ ( "DMB ISH" ::: );
     195        #endif
     196
    187197        while( __atomic_load_n(&n.locked, __ATOMIC_RELAXED) ) Pause();
     198
     199        #if defined(__ARM_ARCH)
     200        __asm__ __volatile__ ( "DMB ISH" ::: );
     201        #endif
    188202}
    189203
    190204static inline void unlock(mcs_spin_lock & l, mcs_spin_node & n) {
     205        #if defined(__ARM_ARCH)
     206        __asm__ __volatile__ ( "DMB ISH" ::: );
     207        #endif
     208
    191209        mcs_spin_node * n_ptr = &n;
    192210        if (__atomic_compare_exchange_n(&l.queue.tail, &n_ptr, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) return;
    193211        while (__atomic_load_n(&n.next, __ATOMIC_RELAXED) == 0p) Pause();
     212
     213        #if defined(__ARM_ARCH)
     214        __asm__ __volatile__ ( "DMB ISH" ::: );
     215        #endif
     216
    194217        n.next->locked = false;
    195218}
Note: See TracChangeset for help on using the changeset viewer.