- Timestamp:
- Jul 28, 2022, 12:04:25 PM (3 years ago)
- Branches:
- ADT, ast-experimental, master, pthread-emulation
- Children:
- 32d1383, d0fcc82
- Parents:
- 3f95dab (diff), 2af1943 (diff)
 Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
 Use the(diff)links above to see all the changes relative to each parent.
- Location:
- libcfa/src/concurrency
- Files:
- 
      - 12 edited
 
 - 
          
  io.cfa (modified) (1 diff)
- 
          
  io/setup.cfa (modified) (1 diff)
- 
          
  io/types.hfa (modified) (2 diffs)
- 
          
  kernel.hfa (modified) (3 diffs)
- 
          
  kernel/cluster.cfa (modified) (6 diffs)
- 
          
  kernel/cluster.hfa (modified) (2 diffs)
- 
          
  kernel/fwd.hfa (modified) (1 diff)
- 
          
  kernel/private.hfa (modified) (1 diff)
- 
          
  kernel/startup.cfa (modified) (1 diff)
- 
          
  ready_queue.cfa (modified) (7 diffs)
- 
          
  ready_subqueue.hfa (modified) (6 diffs)
- 
          
  stats.hfa (modified) (1 diff)
 
Legend:
- Unmodified
- Added
- Removed
- 
      libcfa/src/concurrency/io.cfar3f95dab rc4c8571 241 241 else { 242 242 const unsigned target = proc->io.target; 243 /* paranoid */ verify( io.tscs[target].t v != ULLONG_MAX );243 /* paranoid */ verify( io.tscs[target].t.tv != ULLONG_MAX ); 244 244 HELP: if(target < ctxs_count) { 245 245 const unsigned long long cutoff = calc_cutoff(ctsc, ctx->cq.id, ctxs_count, io.data, io.tscs, __shard_factor.io); 246 const unsigned long long age = moving_average(ctsc, io.tscs[target].t v, io.tscs[target].ma);246 const unsigned long long age = moving_average(ctsc, io.tscs[target].t.tv, io.tscs[target].t.ma); 247 247 __cfadbg_print_safe(io, "Kernel I/O: Help attempt on %u from %u, age %'llu vs cutoff %'llu, %s\n", target, ctx->cq.id, age, cutoff, age > cutoff ? "yes" : "no"); 248 248 if(age <= cutoff) break HELP; 
- 
      libcfa/src/concurrency/io/setup.cfar3f95dab rc4c8571 359 359 } 360 360 361 void ^?{}( $io_arbiter & this ) {}361 void ^?{}( $io_arbiter & mutex this ) {} 362 362 363 363 $io_arbiter * create(void) { 
- 
      libcfa/src/concurrency/io/types.hfar3f95dab rc4c8571 125 125 126 126 127 struct __attribute__((aligned( 128))) $io_context {127 struct __attribute__((aligned(64))) $io_context { 128 128 $io_arbiter * arbiter; 129 129 processor * proc; … … 153 153 }; 154 154 155 struct __attribute__((aligned(128))) $io_arbiter {155 monitor __attribute__((aligned(64))) $io_arbiter { 156 156 __outstanding_io_queue pending; 157 157 }; 
- 
      libcfa/src/concurrency/kernel.hfar3f95dab rc4c8571 83 83 84 84 // Wrapper around kernel threads 85 struct __attribute__((aligned( 128))) processor {85 struct __attribute__((aligned(64))) processor { 86 86 // Cluster from which to get threads 87 87 struct cluster * cltr; … … 171 171 172 172 // Intrusives lanes which are used by the ready queue 173 struct __attribute__((aligned(128))) __intrusive_lane_t;173 union __attribute__((aligned(64))) __intrusive_lane_t; 174 174 void ?{}(__intrusive_lane_t & this); 175 175 void ^?{}(__intrusive_lane_t & this); 176 176 177 177 // Aligned timestamps which are used by the ready queue and io subsystem 178 struct __attribute__((aligned(128))) __timestamp_t { 179 volatile unsigned long long tv; 180 volatile unsigned long long ma; 181 }; 182 183 static inline void ?{}(__timestamp_t & this) { this.tv = 0; this.ma = 0; } 178 union __attribute__((aligned(64))) __timestamp_t { 179 struct { 180 volatile unsigned long long tv; 181 volatile unsigned long long ma; 182 } t; 183 char __padding[192]; 184 }; 185 186 static inline void ?{}(__timestamp_t & this) { this.t.tv = 0; this.t.ma = 0; } 184 187 static inline void ^?{}(__timestamp_t &) {} 185 188 … … 212 215 //----------------------------------------------------------------------------- 213 216 // Cluster 214 struct __attribute__((aligned( 128))) cluster {217 struct __attribute__((aligned(64))) cluster { 215 218 struct { 216 219 struct { 
- 
      libcfa/src/concurrency/kernel/cluster.cfar3f95dab rc4c8571 229 229 for( idx ; lanes_count ) { 230 230 __intrusive_lane_t & sl = readyQ.data[idx]; 231 assert(!readyQ.data[idx].l ock);231 assert(!readyQ.data[idx].l.lock); 232 232 233 233 if(is_empty(sl)) { 234 assert( sl. anchor.next == 0p );235 assert( sl. anchor.ts == MAX );236 assert( mock_head(sl) == sl. prev );234 assert( sl.l.anchor.next == 0p ); 235 assert( sl.l.anchor.ts == MAX ); 236 assert( mock_head(sl) == sl.l.prev ); 237 237 } else { 238 assert( sl. anchor.next != 0p );239 assert( sl. anchor.ts != MAX );240 assert( mock_head(sl) != sl. prev );238 assert( sl.l.anchor.next != 0p ); 239 assert( sl.l.anchor.ts != MAX ); 240 assert( mock_head(sl) != sl.l.prev ); 241 241 } 242 242 } … … 249 249 static inline void fix(__intrusive_lane_t & ll) { 250 250 if(is_empty(ll)) { 251 verify(ll. anchor.next == 0p);252 ll. prev = mock_head(ll);251 verify(ll.l.anchor.next == 0p); 252 ll.l.prev = mock_head(ll); 253 253 } 254 254 } … … 299 299 tscs = alloc(count, tscs`realloc); 300 300 for(i; count) { 301 tscs[i].t v = rdtscl();302 tscs[i]. ma = 0;301 tscs[i].t.tv = rdtscl(); 302 tscs[i].t.ma = 0; 303 303 } 304 304 } … … 400 400 for( idx; ncount ~ ocount) { 401 401 // Lock is not strictly needed but makes checking invariants much easier 402 __attribute__((unused)) bool locked = __atomic_try_acquire(&readyQ.data[idx].l ock);402 __attribute__((unused)) bool locked = __atomic_try_acquire(&readyQ.data[idx].l.lock); 403 403 verify(locked); 404 404 … … 418 418 419 419 // Unlock the lane 420 __atomic_unlock(&readyQ.data[idx].l ock);420 __atomic_unlock(&readyQ.data[idx].l.lock); 421 421 422 422 // TODO print the queue statistics here … … 467 467 } 468 468 469 #define nested_offsetof(type, field) ((off_t)(&(((type*)0)-> field))) 470 469 471 // Ctor 470 472 void ?{}( __intrusive_lane_t & this ) { 471 this.l ock = false;472 this. prev = mock_head(this);473 this. anchor.next = 0p;474 this. anchor.ts = MAX;473 this.l.lock = false; 474 this.l.prev = mock_head(this); 475 this.l.anchor.next = 0p; 476 this.l.anchor.ts = MAX; 475 477 #if !defined(__CFA_NO_STATISTICS__) 476 this. cnt = 0;478 this.l.cnt = 0; 477 479 #endif 478 480 479 481 // We add a boat-load of assertions here because the anchor code is very fragile 480 /* paranoid */ _Static_assert( offsetof( thread$, link ) == offsetof(__intrusive_lane_t,anchor) );481 /* paranoid */ verify( offsetof( thread$, link ) == offsetof(__intrusive_lane_t,anchor) );482 /* paranoid */ verify( ((uintptr_t)( mock_head(this) ) + offsetof( thread$, link )) == (uintptr_t)(&this. anchor) );483 /* paranoid */ verify( &mock_head(this)->link.next == &this. anchor.next );484 /* paranoid */ verify( &mock_head(this)->link.ts == &this. anchor.ts );482 /* paranoid */ _Static_assert( offsetof( thread$, link ) == nested_offsetof(__intrusive_lane_t, l.anchor) ); 483 /* paranoid */ verify( offsetof( thread$, link ) == nested_offsetof(__intrusive_lane_t, l.anchor) ); 484 /* paranoid */ verify( ((uintptr_t)( mock_head(this) ) + offsetof( thread$, link )) == (uintptr_t)(&this.l.anchor) ); 485 /* paranoid */ verify( &mock_head(this)->link.next == &this.l.anchor.next ); 486 /* paranoid */ verify( &mock_head(this)->link.ts == &this.l.anchor.ts ); 485 487 /* paranoid */ verify( mock_head(this)->link.next == 0p ); 486 488 /* paranoid */ verify( mock_head(this)->link.ts == MAX ); 487 /* paranoid */ verify( mock_head(this) == this.prev ); 488 /* paranoid */ verify( __alignof__(__intrusive_lane_t) == 128 ); 489 /* paranoid */ verify( __alignof__(this) == 128 ); 490 /* paranoid */ verifyf( ((intptr_t)(&this) % 128) == 0, "Expected address to be aligned %p %% 128 == %zd", &this, ((intptr_t)(&this) % 128) ); 491 } 489 /* paranoid */ verify( mock_head(this) == this.l.prev ); 490 /* paranoid */ verify( __alignof__(__intrusive_lane_t) == 64 ); 491 /* paranoid */ verify( __alignof__(this) == 64 ); 492 /* paranoid */ verifyf( ((intptr_t)(&this) % 64) == 0, "Expected address to be aligned %p %% 64 == %zd", &this, ((intptr_t)(&this) % 64) ); 493 } 494 495 #undef nested_offsetof 492 496 493 497 // Dtor is trivial 494 498 void ^?{}( __intrusive_lane_t & this ) { 495 499 // Make sure the list is empty 496 /* paranoid */ verify( this. anchor.next == 0p );497 /* paranoid */ verify( this. anchor.ts == MAX );498 /* paranoid */ verify( mock_head(this) == this.prev );500 /* paranoid */ verify( this.l.anchor.next == 0p ); 501 /* paranoid */ verify( this.l.anchor.ts == MAX ); 502 /* paranoid */ verify( mock_head(this) == this.l.prev ); 499 503 } 500 504 
- 
      libcfa/src/concurrency/kernel/cluster.hfar3f95dab rc4c8571 39 39 if (ts_next == ULLONG_MAX) return; 40 40 unsigned long long now = rdtscl(); 41 unsigned long long pma = __atomic_load_n(&tscs[ idx ]. ma, __ATOMIC_RELAXED);42 __atomic_store_n(&tscs[ idx ].t v, ts_next, __ATOMIC_RELAXED);43 __atomic_store_n(&tscs[ idx ]. ma, moving_average(now, ts_prev, pma), __ATOMIC_RELAXED);41 unsigned long long pma = __atomic_load_n(&tscs[ idx ].t.ma, __ATOMIC_RELAXED); 42 __atomic_store_n(&tscs[ idx ].t.tv, ts_next, __ATOMIC_RELAXED); 43 __atomic_store_n(&tscs[ idx ].t.ma, moving_average(now, ts_prev, pma), __ATOMIC_RELAXED); 44 44 } 45 45 … … 61 61 if(ptsc != ULLONG_MAX) { 62 62 /* paranoid */ verify( start + i < count ); 63 unsigned long long tsc = moving_average(ctsc, ptsc, tscs[start + i]. ma);63 unsigned long long tsc = moving_average(ctsc, ptsc, tscs[start + i].t.ma); 64 64 if(tsc > max) max = tsc; 65 65 } 
- 
      libcfa/src/concurrency/kernel/fwd.hfar3f95dab rc4c8571 35 35 extern "C" { 36 36 extern "Cforall" { 37 extern __attribute__((aligned( 128))) thread_local struct KernelThreadData {37 extern __attribute__((aligned(64))) thread_local struct KernelThreadData { 38 38 struct thread$ * volatile this_thread; 39 39 struct processor * volatile this_processor; 
- 
      libcfa/src/concurrency/kernel/private.hfar3f95dab rc4c8571 88 88 #elif defined(CFA_HAVE_LINUX_RSEQ_H) 89 89 extern "Cforall" { 90 extern __attribute__((aligned( 128))) thread_local volatile struct rseq __cfaabi_rseq;90 extern __attribute__((aligned(64))) thread_local volatile struct rseq __cfaabi_rseq; 91 91 } 92 92 #else 
- 
      libcfa/src/concurrency/kernel/startup.cfar3f95dab rc4c8571 152 152 #elif defined(CFA_HAVE_LINUX_RSEQ_H) 153 153 extern "Cforall" { 154 __attribute__((aligned( 128))) thread_local volatile struct rseq __cfaabi_rseq @= {154 __attribute__((aligned(64))) thread_local volatile struct rseq __cfaabi_rseq @= { 155 155 .cpu_id : RSEQ_CPU_ID_UNINITIALIZED, 156 156 }; 
- 
      libcfa/src/concurrency/ready_queue.cfar3f95dab rc4c8571 81 81 /* paranoid */ verify( i < lanes_count ); 82 82 // If we can't lock it retry 83 } while( !__atomic_try_acquire( &readyQ.data[i].l ock ) );83 } while( !__atomic_try_acquire( &readyQ.data[i].l.lock ) ); 84 84 } else { 85 85 do { 86 86 i = __tls_rand() % lanes_count; 87 } while( !__atomic_try_acquire( &readyQ.data[i].l ock ) );87 } while( !__atomic_try_acquire( &readyQ.data[i].l.lock ) ); 88 88 } 89 89 } else { … … 93 93 /* paranoid */ verify( i < lanes_count ); 94 94 // If we can't lock it retry 95 } while( !__atomic_try_acquire( &readyQ.data[i].l ock ) );95 } while( !__atomic_try_acquire( &readyQ.data[i].l.lock ) ); 96 96 } 97 97 … … 100 100 101 101 // Unlock and return 102 __atomic_unlock( &readyQ.data[i].l ock );102 __atomic_unlock( &readyQ.data[i].l.lock ); 103 103 104 104 #if !defined(__CFA_NO_STATISTICS__) … … 136 136 else { 137 137 const unsigned target = proc->rdq.target; 138 __cfadbg_print_safe(ready_queue, "Kernel : %u considering helping %u, tcsc %llu\n", this, target, readyQ.tscs[target].t v);139 /* paranoid */ verify( readyQ.tscs[target].t v != ULLONG_MAX );138 __cfadbg_print_safe(ready_queue, "Kernel : %u considering helping %u, tcsc %llu\n", this, target, readyQ.tscs[target].t.tv); 139 /* paranoid */ verify( readyQ.tscs[target].t.tv != ULLONG_MAX ); 140 140 if(target < lanes_count) { 141 141 const unsigned long long cutoff = calc_cutoff(ctsc, proc->rdq.id, lanes_count, cltr->sched.readyQ.data, cltr->sched.readyQ.tscs, __shard_factor.readyq); 142 const unsigned long long age = moving_average(ctsc, readyQ.tscs[target].t v, readyQ.tscs[target].ma);142 const unsigned long long age = moving_average(ctsc, readyQ.tscs[target].t.tv, readyQ.tscs[target].t.ma); 143 143 __cfadbg_print_safe(ready_queue, "Kernel : Help attempt on %u from %u, age %'llu vs cutoff %'llu, %s\n", target, this, age, cutoff, age > cutoff ? "yes" : "no"); 144 144 if(age > cutoff) { … … 188 188 189 189 // If we can't get the lock retry 190 if( !__atomic_try_acquire(&lane.l ock) ) {190 if( !__atomic_try_acquire(&lane.l.lock) ) { 191 191 return 0p; 192 192 } … … 194 194 // If list is empty, unlock and retry 195 195 if( is_empty(lane) ) { 196 __atomic_unlock(&lane.l ock);196 __atomic_unlock(&lane.l.lock); 197 197 return 0p; 198 198 } … … 206 206 /* paranoid */ verify(thrd); 207 207 /* paranoid */ verify(ts_next); 208 /* paranoid */ verify(lane.l ock);208 /* paranoid */ verify(lane.l.lock); 209 209 210 210 // Unlock and return 211 __atomic_unlock(&lane.l ock);211 __atomic_unlock(&lane.l.lock); 212 212 213 213 // Update statistics 
- 
      libcfa/src/concurrency/ready_subqueue.hfar3f95dab rc4c8571 6 6 7 7 // Intrusives lanes which are used by the relaxed ready queue 8 struct __attribute__((aligned(128))) __intrusive_lane_t { 9 struct thread$ * prev; 8 union __attribute__((aligned(64))) __intrusive_lane_t { 9 struct { 10 struct thread$ * prev; 10 11 11 // spin lock protecting the queue12 volatile bool lock;12 // spin lock protecting the queue 13 volatile bool lock; 13 14 14 __thread_desc_link anchor;15 __thread_desc_link anchor; 15 16 16 #if !defined(__CFA_NO_STATISTICS__) 17 unsigned cnt; 18 #endif 17 #if !defined(__CFA_NO_STATISTICS__) 18 unsigned cnt; 19 #endif 20 } l; 21 char __padding[192]; 19 22 }; 20 23 … … 22 25 static inline thread$ * mock_head(const __intrusive_lane_t & this) { 23 26 thread$ * rhead = (thread$ *)( 24 (uintptr_t)( &this. anchor ) - __builtin_offsetof( thread$, link )27 (uintptr_t)( &this.l.anchor ) - __builtin_offsetof( thread$, link ) 25 28 ); 26 29 return rhead; … … 30 33 // returns true of lane was empty before push, false otherwise 31 34 static inline void push( __intrusive_lane_t & this, thread$ * node ) { 32 /* paranoid */ verify( this.l ock );35 /* paranoid */ verify( this.l.lock ); 33 36 /* paranoid */ verify( node->link.next == 0p ); 34 37 /* paranoid */ verify( __atomic_load_n(&node->link.ts, __ATOMIC_RELAXED) == MAX ); 35 /* paranoid */ verify( this. prev->link.next == 0p );36 /* paranoid */ verify( __atomic_load_n(&this. prev->link.ts, __ATOMIC_RELAXED) == MAX );37 if( this. anchor.next == 0p ) {38 /* paranoid */ verify( this. anchor.next == 0p );39 /* paranoid */ verify( __atomic_load_n(&this. anchor.ts, __ATOMIC_RELAXED) == MAX );40 /* paranoid */ verify( __atomic_load_n(&this. anchor.ts, __ATOMIC_RELAXED) != 0 );41 /* paranoid */ verify( this. prev == mock_head( this ) );38 /* paranoid */ verify( this.l.prev->link.next == 0p ); 39 /* paranoid */ verify( __atomic_load_n(&this.l.prev->link.ts, __ATOMIC_RELAXED) == MAX ); 40 if( this.l.anchor.next == 0p ) { 41 /* paranoid */ verify( this.l.anchor.next == 0p ); 42 /* paranoid */ verify( __atomic_load_n(&this.l.anchor.ts, __ATOMIC_RELAXED) == MAX ); 43 /* paranoid */ verify( __atomic_load_n(&this.l.anchor.ts, __ATOMIC_RELAXED) != 0 ); 44 /* paranoid */ verify( this.l.prev == mock_head( this ) ); 42 45 } else { 43 /* paranoid */ verify( this. anchor.next != 0p );44 /* paranoid */ verify( __atomic_load_n(&this. anchor.ts, __ATOMIC_RELAXED) != MAX );45 /* paranoid */ verify( __atomic_load_n(&this. anchor.ts, __ATOMIC_RELAXED) != 0 );46 /* paranoid */ verify( this. prev != mock_head( this ) );46 /* paranoid */ verify( this.l.anchor.next != 0p ); 47 /* paranoid */ verify( __atomic_load_n(&this.l.anchor.ts, __ATOMIC_RELAXED) != MAX ); 48 /* paranoid */ verify( __atomic_load_n(&this.l.anchor.ts, __ATOMIC_RELAXED) != 0 ); 49 /* paranoid */ verify( this.l.prev != mock_head( this ) ); 47 50 } 48 51 49 52 // Get the relevant nodes locally 50 this. prev->link.next = node;51 __atomic_store_n(&this. prev->link.ts, rdtscl(), __ATOMIC_RELAXED);52 this. prev = node;53 this.l.prev->link.next = node; 54 __atomic_store_n(&this.l.prev->link.ts, rdtscl(), __ATOMIC_RELAXED); 55 this.l.prev = node; 53 56 #if !defined(__CFA_NO_STATISTICS__) 54 this. cnt++;57 this.l.cnt++; 55 58 #endif 56 59 } … … 60 63 // returns true of lane was empty before push, false otherwise 61 64 static inline [* thread$, unsigned long long] pop( __intrusive_lane_t & this ) { 62 /* paranoid */ verify( this.l ock );63 /* paranoid */ verify( this. anchor.next != 0p );64 /* paranoid */ verify( __atomic_load_n(&this. anchor.ts, __ATOMIC_RELAXED) != MAX );65 /* paranoid */ verify( __atomic_load_n(&this. anchor.ts, __ATOMIC_RELAXED) != 0 );65 /* paranoid */ verify( this.l.lock ); 66 /* paranoid */ verify( this.l.anchor.next != 0p ); 67 /* paranoid */ verify( __atomic_load_n(&this.l.anchor.ts, __ATOMIC_RELAXED) != MAX ); 68 /* paranoid */ verify( __atomic_load_n(&this.l.anchor.ts, __ATOMIC_RELAXED) != 0 ); 66 69 67 70 // Get the relevant nodes locally 68 thread$ * node = this. anchor.next;69 this. anchor.next = node->link.next;70 __atomic_store_n(&this. anchor.ts, __atomic_load_n(&node->link.ts, __ATOMIC_RELAXED), __ATOMIC_RELAXED);71 bool is_empty = this. anchor.next == 0p;71 thread$ * node = this.l.anchor.next; 72 this.l.anchor.next = node->link.next; 73 __atomic_store_n(&this.l.anchor.ts, __atomic_load_n(&node->link.ts, __ATOMIC_RELAXED), __ATOMIC_RELAXED); 74 bool is_empty = this.l.anchor.next == 0p; 72 75 node->link.next = 0p; 73 76 __atomic_store_n(&node->link.ts, ULLONG_MAX, __ATOMIC_RELAXED); 74 77 #if !defined(__CFA_NO_STATISTICS__) 75 this. cnt--;78 this.l.cnt--; 76 79 #endif 77 80 78 81 // Update head time stamp 79 if(is_empty) this. prev = mock_head( this );82 if(is_empty) this.l.prev = mock_head( this ); 80 83 81 unsigned long long ats = __atomic_load_n(&this. anchor.ts, __ATOMIC_RELAXED);84 unsigned long long ats = __atomic_load_n(&this.l.anchor.ts, __ATOMIC_RELAXED); 82 85 /* paranoid */ verify( node->link.next == 0p ); 83 86 /* paranoid */ verify( __atomic_load_n(&node->link.ts , __ATOMIC_RELAXED) == MAX ); … … 90 93 // Check whether or not list is empty 91 94 static inline bool is_empty(__intrusive_lane_t & this) { 92 return this. anchor.next == 0p;95 return this.l.anchor.next == 0p; 93 96 } 94 97 … … 96 99 static inline unsigned long long ts(__intrusive_lane_t & this) { 97 100 // Cannot verify 'emptiness' here since it may not be locked 98 /* paranoid */ verify(this. anchor.ts != 0);99 /* paranoid */ static_assert(__atomic_always_lock_free(sizeof(this. anchor.ts), &this.anchor.ts));100 return __atomic_load_n(&this. anchor.ts, __ATOMIC_RELAXED);101 /* paranoid */ verify(this.l.anchor.ts != 0); 102 /* paranoid */ static_assert(__atomic_always_lock_free(sizeof(this.l.anchor.ts), &this.l.anchor.ts)); 103 return __atomic_load_n(&this.l.anchor.ts, __ATOMIC_RELAXED); 101 104 } 
- 
      libcfa/src/concurrency/stats.hfar3f95dab rc4c8571 132 132 #endif 133 133 134 struct __attribute__((aligned( 128))) __stats_t {134 struct __attribute__((aligned(64))) __stats_t { 135 135 __stats_readyQ_t ready; 136 136 #if defined(CFA_HAVE_LINUX_IO_URING_H) 
  Note:
 See   TracChangeset
 for help on using the changeset viewer.
  