Ignore:
Timestamp:
Jul 28, 2022, 12:04:25 PM (2 years ago)
Author:
Thierry Delisle <tdelisle@…>
Branches:
ADT, ast-experimental, master, pthread-emulation
Children:
32d1383, d0fcc82
Parents:
3f95dab (diff), 2af1943 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'master' of plg.uwaterloo.ca:software/cfa/cfa-cc

Location:
libcfa/src/concurrency
Files:
12 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/concurrency/io.cfa

    r3f95dab rc4c8571  
    241241                        else {
    242242                                const unsigned target = proc->io.target;
    243                                 /* paranoid */ verify( io.tscs[target].tv != ULLONG_MAX );
     243                                /* paranoid */ verify( io.tscs[target].t.tv != ULLONG_MAX );
    244244                                HELP: if(target < ctxs_count) {
    245245                                        const unsigned long long cutoff = calc_cutoff(ctsc, ctx->cq.id, ctxs_count, io.data, io.tscs, __shard_factor.io);
    246                                         const unsigned long long age = moving_average(ctsc, io.tscs[target].tv, io.tscs[target].ma);
     246                                        const unsigned long long age = moving_average(ctsc, io.tscs[target].t.tv, io.tscs[target].t.ma);
    247247                                        __cfadbg_print_safe(io, "Kernel I/O: Help attempt on %u from %u, age %'llu vs cutoff %'llu, %s\n", target, ctx->cq.id, age, cutoff, age > cutoff ? "yes" : "no");
    248248                                        if(age <= cutoff) break HELP;
  • libcfa/src/concurrency/io/setup.cfa

    r3f95dab rc4c8571  
    359359        }
    360360
    361         void ^?{}( $io_arbiter & this ) {}
     361        void ^?{}( $io_arbiter & mutex this ) {}
    362362
    363363        $io_arbiter * create(void) {
  • libcfa/src/concurrency/io/types.hfa

    r3f95dab rc4c8571  
    125125
    126126
    127         struct __attribute__((aligned(128))) $io_context {
     127        struct __attribute__((aligned(64))) $io_context {
    128128                $io_arbiter * arbiter;
    129129                processor * proc;
     
    153153        };
    154154
    155         struct __attribute__((aligned(128))) $io_arbiter {
     155        monitor __attribute__((aligned(64))) $io_arbiter {
    156156                __outstanding_io_queue pending;
    157157        };
  • libcfa/src/concurrency/kernel.hfa

    r3f95dab rc4c8571  
    8383
    8484// Wrapper around kernel threads
    85 struct __attribute__((aligned(128))) processor {
     85struct __attribute__((aligned(64))) processor {
    8686        // Cluster from which to get threads
    8787        struct cluster * cltr;
     
    171171
    172172// Intrusives lanes which are used by the ready queue
    173 struct __attribute__((aligned(128))) __intrusive_lane_t;
     173union __attribute__((aligned(64))) __intrusive_lane_t;
    174174void  ?{}(__intrusive_lane_t & this);
    175175void ^?{}(__intrusive_lane_t & this);
    176176
    177177// Aligned timestamps which are used by the ready queue and io subsystem
    178 struct __attribute__((aligned(128))) __timestamp_t {
    179         volatile unsigned long long tv;
    180         volatile unsigned long long ma;
    181 };
    182 
    183 static inline void  ?{}(__timestamp_t & this) { this.tv = 0; this.ma = 0; }
     178union __attribute__((aligned(64))) __timestamp_t {
     179        struct {
     180                volatile unsigned long long tv;
     181                volatile unsigned long long ma;
     182        } t;
     183        char __padding[192];
     184};
     185
     186static inline void  ?{}(__timestamp_t & this) { this.t.tv = 0; this.t.ma = 0; }
    184187static inline void ^?{}(__timestamp_t &) {}
    185188
     
    212215//-----------------------------------------------------------------------------
    213216// Cluster
    214 struct __attribute__((aligned(128))) cluster {
     217struct __attribute__((aligned(64))) cluster {
    215218        struct {
    216219                struct {
  • libcfa/src/concurrency/kernel/cluster.cfa

    r3f95dab rc4c8571  
    229229                        for( idx ; lanes_count ) {
    230230                                __intrusive_lane_t & sl = readyQ.data[idx];
    231                                 assert(!readyQ.data[idx].lock);
     231                                assert(!readyQ.data[idx].l.lock);
    232232
    233233                                        if(is_empty(sl)) {
    234                                                 assert( sl.anchor.next == 0p );
    235                                                 assert( sl.anchor.ts   == MAX );
    236                                                 assert( mock_head(sl)  == sl.prev );
     234                                                assert( sl.l.anchor.next == 0p );
     235                                                assert( sl.l.anchor.ts   == MAX );
     236                                                assert( mock_head(sl)  == sl.l.prev );
    237237                                        } else {
    238                                                 assert( sl.anchor.next != 0p );
    239                                                 assert( sl.anchor.ts   != MAX );
    240                                                 assert( mock_head(sl)  != sl.prev );
     238                                                assert( sl.l.anchor.next != 0p );
     239                                                assert( sl.l.anchor.ts   != MAX );
     240                                                assert( mock_head(sl)  != sl.l.prev );
    241241                                        }
    242242                        }
     
    249249static inline void fix(__intrusive_lane_t & ll) {
    250250        if(is_empty(ll)) {
    251                 verify(ll.anchor.next == 0p);
    252                 ll.prev = mock_head(ll);
     251                verify(ll.l.anchor.next == 0p);
     252                ll.l.prev = mock_head(ll);
    253253        }
    254254}
     
    299299        tscs = alloc(count, tscs`realloc);
    300300        for(i; count) {
    301                 tscs[i].tv = rdtscl();
    302                 tscs[i].ma = 0;
     301                tscs[i].t.tv = rdtscl();
     302                tscs[i].t.ma = 0;
    303303        }
    304304}
     
    400400                for( idx; ncount ~ ocount) {
    401401                        // Lock is not strictly needed but makes checking invariants much easier
    402                         __attribute__((unused)) bool locked = __atomic_try_acquire(&readyQ.data[idx].lock);
     402                        __attribute__((unused)) bool locked = __atomic_try_acquire(&readyQ.data[idx].l.lock);
    403403                        verify(locked);
    404404
     
    418418
    419419                        // Unlock the lane
    420                         __atomic_unlock(&readyQ.data[idx].lock);
     420                        __atomic_unlock(&readyQ.data[idx].l.lock);
    421421
    422422                        // TODO print the queue statistics here
     
    467467}
    468468
     469#define nested_offsetof(type, field) ((off_t)(&(((type*)0)-> field)))
     470
    469471// Ctor
    470472void ?{}( __intrusive_lane_t & this ) {
    471         this.lock = false;
    472         this.prev = mock_head(this);
    473         this.anchor.next = 0p;
    474         this.anchor.ts   = MAX;
     473        this.l.lock = false;
     474        this.l.prev = mock_head(this);
     475        this.l.anchor.next = 0p;
     476        this.l.anchor.ts   = MAX;
    475477        #if !defined(__CFA_NO_STATISTICS__)
    476                 this.cnt  = 0;
     478                this.l.cnt  = 0;
    477479        #endif
    478480
    479481        // We add a boat-load of assertions here because the anchor code is very fragile
    480         /* paranoid */ _Static_assert( offsetof( thread$, link ) == offsetof(__intrusive_lane_t, anchor) );
    481         /* paranoid */ verify( offsetof( thread$, link ) == offsetof(__intrusive_lane_t, anchor) );
    482         /* paranoid */ verify( ((uintptr_t)( mock_head(this) ) + offsetof( thread$, link )) == (uintptr_t)(&this.anchor) );
    483         /* paranoid */ verify( &mock_head(this)->link.next == &this.anchor.next );
    484         /* paranoid */ verify( &mock_head(this)->link.ts   == &this.anchor.ts   );
     482        /* paranoid */ _Static_assert( offsetof( thread$, link ) == nested_offsetof(__intrusive_lane_t, l.anchor) );
     483        /* paranoid */ verify( offsetof( thread$, link ) == nested_offsetof(__intrusive_lane_t, l.anchor) );
     484        /* paranoid */ verify( ((uintptr_t)( mock_head(this) ) + offsetof( thread$, link )) == (uintptr_t)(&this.l.anchor) );
     485        /* paranoid */ verify( &mock_head(this)->link.next == &this.l.anchor.next );
     486        /* paranoid */ verify( &mock_head(this)->link.ts   == &this.l.anchor.ts   );
    485487        /* paranoid */ verify( mock_head(this)->link.next == 0p );
    486488        /* paranoid */ verify( mock_head(this)->link.ts   == MAX );
    487         /* paranoid */ verify( mock_head(this) == this.prev );
    488         /* paranoid */ verify( __alignof__(__intrusive_lane_t) == 128 );
    489         /* paranoid */ verify( __alignof__(this) == 128 );
    490         /* paranoid */ verifyf( ((intptr_t)(&this) % 128) == 0, "Expected address to be aligned %p %% 128 == %zd", &this, ((intptr_t)(&this) % 128) );
    491 }
     489        /* paranoid */ verify( mock_head(this) == this.l.prev );
     490        /* paranoid */ verify( __alignof__(__intrusive_lane_t) == 64 );
     491        /* paranoid */ verify( __alignof__(this) == 64 );
     492        /* paranoid */ verifyf( ((intptr_t)(&this) % 64) == 0, "Expected address to be aligned %p %% 64 == %zd", &this, ((intptr_t)(&this) % 64) );
     493}
     494
     495#undef nested_offsetof
    492496
    493497// Dtor is trivial
    494498void ^?{}( __intrusive_lane_t & this ) {
    495499        // Make sure the list is empty
    496         /* paranoid */ verify( this.anchor.next == 0p );
    497         /* paranoid */ verify( this.anchor.ts   == MAX );
    498         /* paranoid */ verify( mock_head(this)  == this.prev );
     500        /* paranoid */ verify( this.l.anchor.next == 0p );
     501        /* paranoid */ verify( this.l.anchor.ts   == MAX );
     502        /* paranoid */ verify( mock_head(this)    == this.l.prev );
    499503}
    500504
  • libcfa/src/concurrency/kernel/cluster.hfa

    r3f95dab rc4c8571  
    3939        if (ts_next == ULLONG_MAX) return;
    4040        unsigned long long now = rdtscl();
    41         unsigned long long pma = __atomic_load_n(&tscs[ idx ].ma, __ATOMIC_RELAXED);
    42         __atomic_store_n(&tscs[ idx ].tv, ts_next, __ATOMIC_RELAXED);
    43         __atomic_store_n(&tscs[ idx ].ma, moving_average(now, ts_prev, pma), __ATOMIC_RELAXED);
     41        unsigned long long pma = __atomic_load_n(&tscs[ idx ].t.ma, __ATOMIC_RELAXED);
     42        __atomic_store_n(&tscs[ idx ].t.tv, ts_next, __ATOMIC_RELAXED);
     43        __atomic_store_n(&tscs[ idx ].t.ma, moving_average(now, ts_prev, pma), __ATOMIC_RELAXED);
    4444}
    4545
     
    6161                if(ptsc != ULLONG_MAX) {
    6262                        /* paranoid */ verify( start + i < count );
    63                         unsigned long long tsc = moving_average(ctsc, ptsc, tscs[start + i].ma);
     63                        unsigned long long tsc = moving_average(ctsc, ptsc, tscs[start + i].t.ma);
    6464                        if(tsc > max) max = tsc;
    6565                }
  • libcfa/src/concurrency/kernel/fwd.hfa

    r3f95dab rc4c8571  
    3535extern "C" {
    3636        extern "Cforall" {
    37                 extern __attribute__((aligned(128))) thread_local struct KernelThreadData {
     37                extern __attribute__((aligned(64))) thread_local struct KernelThreadData {
    3838                        struct thread$          * volatile this_thread;
    3939                        struct processor        * volatile this_processor;
  • libcfa/src/concurrency/kernel/private.hfa

    r3f95dab rc4c8571  
    8888#elif defined(CFA_HAVE_LINUX_RSEQ_H)
    8989        extern "Cforall" {
    90                 extern __attribute__((aligned(128))) thread_local volatile struct rseq __cfaabi_rseq;
     90                extern __attribute__((aligned(64))) thread_local volatile struct rseq __cfaabi_rseq;
    9191        }
    9292#else
  • libcfa/src/concurrency/kernel/startup.cfa

    r3f95dab rc4c8571  
    152152#elif defined(CFA_HAVE_LINUX_RSEQ_H)
    153153        extern "Cforall" {
    154                 __attribute__((aligned(128))) thread_local volatile struct rseq __cfaabi_rseq @= {
     154                __attribute__((aligned(64))) thread_local volatile struct rseq __cfaabi_rseq @= {
    155155                        .cpu_id : RSEQ_CPU_ID_UNINITIALIZED,
    156156                };
  • libcfa/src/concurrency/ready_queue.cfa

    r3f95dab rc4c8571  
    8181                                /* paranoid */ verify( i < lanes_count );
    8282                                // If we can't lock it retry
    83                         } while( !__atomic_try_acquire( &readyQ.data[i].lock ) );
     83                        } while( !__atomic_try_acquire( &readyQ.data[i].l.lock ) );
    8484                } else {
    8585                        do {
    8686                                i = __tls_rand() % lanes_count;
    87                         } while( !__atomic_try_acquire( &readyQ.data[i].lock ) );
     87                        } while( !__atomic_try_acquire( &readyQ.data[i].l.lock ) );
    8888                }
    8989        } else {
     
    9393                        /* paranoid */ verify( i < lanes_count );
    9494                        // If we can't lock it retry
    95                 } while( !__atomic_try_acquire( &readyQ.data[i].lock ) );
     95                } while( !__atomic_try_acquire( &readyQ.data[i].l.lock ) );
    9696        }
    9797
     
    100100
    101101        // Unlock and return
    102         __atomic_unlock( &readyQ.data[i].lock );
     102        __atomic_unlock( &readyQ.data[i].l.lock );
    103103
    104104        #if !defined(__CFA_NO_STATISTICS__)
     
    136136        else {
    137137                const unsigned target = proc->rdq.target;
    138                 __cfadbg_print_safe(ready_queue, "Kernel : %u considering helping %u, tcsc %llu\n", this, target, readyQ.tscs[target].tv);
    139                 /* paranoid */ verify( readyQ.tscs[target].tv != ULLONG_MAX );
     138                __cfadbg_print_safe(ready_queue, "Kernel : %u considering helping %u, tcsc %llu\n", this, target, readyQ.tscs[target].t.tv);
     139                /* paranoid */ verify( readyQ.tscs[target].t.tv != ULLONG_MAX );
    140140                if(target < lanes_count) {
    141141                        const unsigned long long cutoff = calc_cutoff(ctsc, proc->rdq.id, lanes_count, cltr->sched.readyQ.data, cltr->sched.readyQ.tscs, __shard_factor.readyq);
    142                         const unsigned long long age = moving_average(ctsc, readyQ.tscs[target].tv, readyQ.tscs[target].ma);
     142                        const unsigned long long age = moving_average(ctsc, readyQ.tscs[target].t.tv, readyQ.tscs[target].t.ma);
    143143                        __cfadbg_print_safe(ready_queue, "Kernel : Help attempt on %u from %u, age %'llu vs cutoff %'llu, %s\n", target, this, age, cutoff, age > cutoff ? "yes" : "no");
    144144                        if(age > cutoff) {
     
    188188
    189189        // If we can't get the lock retry
    190         if( !__atomic_try_acquire(&lane.lock) ) {
     190        if( !__atomic_try_acquire(&lane.l.lock) ) {
    191191                return 0p;
    192192        }
     
    194194        // If list is empty, unlock and retry
    195195        if( is_empty(lane) ) {
    196                 __atomic_unlock(&lane.lock);
     196                __atomic_unlock(&lane.l.lock);
    197197                return 0p;
    198198        }
     
    206206        /* paranoid */ verify(thrd);
    207207        /* paranoid */ verify(ts_next);
    208         /* paranoid */ verify(lane.lock);
     208        /* paranoid */ verify(lane.l.lock);
    209209
    210210        // Unlock and return
    211         __atomic_unlock(&lane.lock);
     211        __atomic_unlock(&lane.l.lock);
    212212
    213213        // Update statistics
  • libcfa/src/concurrency/ready_subqueue.hfa

    r3f95dab rc4c8571  
    66
    77// Intrusives lanes which are used by the relaxed ready queue
    8 struct __attribute__((aligned(128))) __intrusive_lane_t {
    9         struct thread$ * prev;
     8union __attribute__((aligned(64))) __intrusive_lane_t {
     9        struct {
     10                struct thread$ * prev;
    1011
    11         // spin lock protecting the queue
    12         volatile bool lock;
     12                // spin lock protecting the queue
     13                volatile bool lock;
    1314
    14         __thread_desc_link anchor;
     15                __thread_desc_link anchor;
    1516
    16         #if !defined(__CFA_NO_STATISTICS__)
    17                 unsigned cnt;
    18         #endif
     17                #if !defined(__CFA_NO_STATISTICS__)
     18                        unsigned cnt;
     19                #endif
     20        } l;
     21        char __padding[192];
    1922};
    2023
     
    2225static inline thread$ * mock_head(const __intrusive_lane_t & this) {
    2326        thread$ * rhead = (thread$ *)(
    24                 (uintptr_t)( &this.anchor ) - __builtin_offsetof( thread$, link )
     27                (uintptr_t)( &this.l.anchor ) - __builtin_offsetof( thread$, link )
    2528        );
    2629        return rhead;
     
    3033// returns true of lane was empty before push, false otherwise
    3134static inline void push( __intrusive_lane_t & this, thread$ * node ) {
    32         /* paranoid */ verify( this.lock );
     35        /* paranoid */ verify( this.l.lock );
    3336        /* paranoid */ verify( node->link.next == 0p );
    3437        /* paranoid */ verify( __atomic_load_n(&node->link.ts, __ATOMIC_RELAXED) == MAX  );
    35         /* paranoid */ verify( this.prev->link.next == 0p );
    36         /* paranoid */ verify( __atomic_load_n(&this.prev->link.ts, __ATOMIC_RELAXED)   == MAX  );
    37         if( this.anchor.next == 0p ) {
    38                 /* paranoid */ verify( this.anchor.next == 0p );
    39                 /* paranoid */ verify( __atomic_load_n(&this.anchor.ts, __ATOMIC_RELAXED) == MAX );
    40                 /* paranoid */ verify( __atomic_load_n(&this.anchor.ts, __ATOMIC_RELAXED) != 0  );
    41                 /* paranoid */ verify( this.prev == mock_head( this ) );
     38        /* paranoid */ verify( this.l.prev->link.next == 0p );
     39        /* paranoid */ verify( __atomic_load_n(&this.l.prev->link.ts, __ATOMIC_RELAXED)   == MAX  );
     40        if( this.l.anchor.next == 0p ) {
     41                /* paranoid */ verify( this.l.anchor.next == 0p );
     42                /* paranoid */ verify( __atomic_load_n(&this.l.anchor.ts, __ATOMIC_RELAXED) == MAX );
     43                /* paranoid */ verify( __atomic_load_n(&this.l.anchor.ts, __ATOMIC_RELAXED) != 0  );
     44                /* paranoid */ verify( this.l.prev == mock_head( this ) );
    4245        } else {
    43                 /* paranoid */ verify( this.anchor.next != 0p );
    44                 /* paranoid */ verify( __atomic_load_n(&this.anchor.ts, __ATOMIC_RELAXED) != MAX );
    45                 /* paranoid */ verify( __atomic_load_n(&this.anchor.ts, __ATOMIC_RELAXED) != 0  );
    46                 /* paranoid */ verify( this.prev != mock_head( this ) );
     46                /* paranoid */ verify( this.l.anchor.next != 0p );
     47                /* paranoid */ verify( __atomic_load_n(&this.l.anchor.ts, __ATOMIC_RELAXED) != MAX );
     48                /* paranoid */ verify( __atomic_load_n(&this.l.anchor.ts, __ATOMIC_RELAXED) != 0  );
     49                /* paranoid */ verify( this.l.prev != mock_head( this ) );
    4750        }
    4851
    4952        // Get the relevant nodes locally
    50         this.prev->link.next = node;
    51         __atomic_store_n(&this.prev->link.ts, rdtscl(), __ATOMIC_RELAXED);
    52         this.prev = node;
     53        this.l.prev->link.next = node;
     54        __atomic_store_n(&this.l.prev->link.ts, rdtscl(), __ATOMIC_RELAXED);
     55        this.l.prev = node;
    5356        #if !defined(__CFA_NO_STATISTICS__)
    54                 this.cnt++;
     57                this.l.cnt++;
    5558        #endif
    5659}
     
    6063// returns true of lane was empty before push, false otherwise
    6164static inline [* thread$, unsigned long long] pop( __intrusive_lane_t & this ) {
    62         /* paranoid */ verify( this.lock );
    63         /* paranoid */ verify( this.anchor.next != 0p );
    64         /* paranoid */ verify( __atomic_load_n(&this.anchor.ts, __ATOMIC_RELAXED) != MAX );
    65         /* paranoid */ verify( __atomic_load_n(&this.anchor.ts, __ATOMIC_RELAXED) != 0   );
     65        /* paranoid */ verify( this.l.lock );
     66        /* paranoid */ verify( this.l.anchor.next != 0p );
     67        /* paranoid */ verify( __atomic_load_n(&this.l.anchor.ts, __ATOMIC_RELAXED) != MAX );
     68        /* paranoid */ verify( __atomic_load_n(&this.l.anchor.ts, __ATOMIC_RELAXED) != 0   );
    6669
    6770        // Get the relevant nodes locally
    68         thread$ * node = this.anchor.next;
    69         this.anchor.next = node->link.next;
    70         __atomic_store_n(&this.anchor.ts, __atomic_load_n(&node->link.ts, __ATOMIC_RELAXED), __ATOMIC_RELAXED);
    71         bool is_empty = this.anchor.next == 0p;
     71        thread$ * node = this.l.anchor.next;
     72        this.l.anchor.next = node->link.next;
     73        __atomic_store_n(&this.l.anchor.ts, __atomic_load_n(&node->link.ts, __ATOMIC_RELAXED), __ATOMIC_RELAXED);
     74        bool is_empty = this.l.anchor.next == 0p;
    7275        node->link.next = 0p;
    7376        __atomic_store_n(&node->link.ts, ULLONG_MAX, __ATOMIC_RELAXED);
    7477        #if !defined(__CFA_NO_STATISTICS__)
    75                 this.cnt--;
     78                this.l.cnt--;
    7679        #endif
    7780
    7881        // Update head time stamp
    79         if(is_empty) this.prev = mock_head( this );
     82        if(is_empty) this.l.prev = mock_head( this );
    8083
    81         unsigned long long ats = __atomic_load_n(&this.anchor.ts, __ATOMIC_RELAXED);
     84        unsigned long long ats = __atomic_load_n(&this.l.anchor.ts, __ATOMIC_RELAXED);
    8285        /* paranoid */ verify( node->link.next == 0p );
    8386        /* paranoid */ verify( __atomic_load_n(&node->link.ts , __ATOMIC_RELAXED) == MAX );
     
    9093// Check whether or not list is empty
    9194static inline bool is_empty(__intrusive_lane_t & this) {
    92         return this.anchor.next == 0p;
     95        return this.l.anchor.next == 0p;
    9396}
    9497
     
    9699static inline unsigned long long ts(__intrusive_lane_t & this) {
    97100        // Cannot verify 'emptiness' here since it may not be locked
    98         /* paranoid */ verify(this.anchor.ts != 0);
    99         /* paranoid */ static_assert(__atomic_always_lock_free(sizeof(this.anchor.ts), &this.anchor.ts));
    100         return __atomic_load_n(&this.anchor.ts, __ATOMIC_RELAXED);
     101        /* paranoid */ verify(this.l.anchor.ts != 0);
     102        /* paranoid */ static_assert(__atomic_always_lock_free(sizeof(this.l.anchor.ts), &this.l.anchor.ts));
     103        return __atomic_load_n(&this.l.anchor.ts, __ATOMIC_RELAXED);
    101104}
  • libcfa/src/concurrency/stats.hfa

    r3f95dab rc4c8571  
    132132        #endif
    133133
    134         struct __attribute__((aligned(128))) __stats_t {
     134        struct __attribute__((aligned(64))) __stats_t {
    135135                __stats_readyQ_t ready;
    136136                #if defined(CFA_HAVE_LINUX_IO_URING_H)
Note: See TracChangeset for help on using the changeset viewer.