- File:
-
- 1 edited
-
libcfa/src/concurrency/ready_queue.cfa (modified) (22 diffs)
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/concurrency/ready_queue.cfa
rfd9b524 rceb7db8 17 17 // #define __CFA_DEBUG_PRINT_READY_QUEUE__ 18 18 19 // #define USE_SNZI 20 19 21 #include "bits/defs.hfa" 20 22 #include "kernel_private.hfa" … … 148 150 // queues or removing them. 149 151 uint_fast32_t ready_mutate_lock( void ) with(*__scheduler_lock) { 152 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled ); 153 150 154 // Step 1 : lock global lock 151 155 // It is needed to avoid processors that register mid Critical-Section … … 162 166 } 163 167 168 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled ); 164 169 return s; 165 170 } 166 171 167 172 void ready_mutate_unlock( uint_fast32_t last_s ) with(*__scheduler_lock) { 173 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled ); 174 168 175 // Step 1 : release local locks 169 176 // This must be done while the global lock is held to avoid … … 180 187 /*paranoid*/ assert(true == lock); 181 188 __atomic_store_n(&lock, (bool)false, __ATOMIC_RELEASE); 189 190 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled ); 182 191 } 183 192 … … 192 201 void ^?{}(__ready_queue_t & this) with (this) { 193 202 verify( 1 == lanes.count ); 194 verify( !query( snzi ) ); 203 #ifdef USE_SNZI 204 verify( !query( snzi ) ); 205 #endif 195 206 free(lanes.data); 196 207 } … … 198 209 //----------------------------------------------------------------------- 199 210 __attribute__((hot)) bool query(struct cluster * cltr) { 200 return query(cltr->ready_queue.snzi); 211 #ifdef USE_SNZI 212 return query(cltr->ready_queue.snzi); 213 #endif 214 return true; 215 } 216 217 static inline [unsigned, bool] idx_from_r(unsigned r, unsigned preferred) { 218 unsigned i; 219 bool local; 220 #if defined(BIAS) 221 unsigned rlow = r % BIAS; 222 unsigned rhigh = r / BIAS; 223 if((0 != rlow) && preferred >= 0) { 224 // (BIAS - 1) out of BIAS chances 225 // Use perferred queues 226 i = preferred + (rhigh % 4); 227 local = true; 228 } 229 else { 230 // 1 out of BIAS chances 231 // Use all queues 232 i = rhigh; 233 local = false; 234 } 235 #else 236 i = r; 237 local = false; 238 #endif 239 return [i, local]; 201 240 } 202 241 … … 208 247 thrd->link.ts = rdtscl(); 209 248 210 #if defined(BIAS) && !defined(__CFA_NO_STATISTICS__) 211 bool local = false; 212 int preferred = 249 __attribute__((unused)) bool local; 250 __attribute__((unused)) int preferred; 251 #if defined(BIAS) 252 preferred = 213 253 //* 214 254 kernelTLS.this_processor ? kernelTLS.this_processor->id * 4 : -1; … … 216 256 thrd->link.preferred * 4; 217 257 //*/ 218 219 220 258 #endif 221 259 … … 224 262 do { 225 263 // Pick the index of a lane 226 #if defined(BIAS) 227 unsigned r = __tls_rand(); 228 unsigned rlow = r % BIAS; 229 unsigned rhigh = r / BIAS; 230 if((0 != rlow) && preferred >= 0) { 231 // (BIAS - 1) out of BIAS chances 232 // Use perferred queues 233 i = preferred + (rhigh % 4); 234 235 #if !defined(__CFA_NO_STATISTICS__) 236 local = true; 237 __tls_stats()->ready.pick.push.local++; 238 #endif 239 } 240 else { 241 // 1 out of BIAS chances 242 // Use all queues 243 i = rhigh; 244 local = false; 245 } 246 #else 247 i = __tls_rand(); 264 // unsigned r = __tls_rand(); 265 unsigned r = __tls_rand_fwd(); 266 [i, local] = idx_from_r(r, preferred); 267 268 #if !defined(__CFA_NO_STATISTICS__) 269 if(local) { 270 __tls_stats()->ready.pick.push.local++; 271 } 248 272 #endif 249 273 … … 260 284 261 285 // Actually push it 262 bool lane_first = push(lanes.data[i], thrd); 263 264 // If this lane used to be empty we need to do more 265 if(lane_first) { 266 // Check if the entire queue used to be empty 267 first = !query(snzi); 268 269 // Update the snzi 270 arrive( snzi, i ); 271 } 286 #ifdef USE_SNZI 287 bool lane_first = 288 #endif 289 290 push(lanes.data[i], thrd); 291 292 #ifdef USE_SNZI 293 // If this lane used to be empty we need to do more 294 if(lane_first) { 295 // Check if the entire queue used to be empty 296 first = !query(snzi); 297 298 // Update the snzi 299 arrive( snzi, i ); 300 } 301 #endif 302 303 __tls_rand_advance_bck(); 272 304 273 305 // Unlock and return … … 294 326 __attribute__((hot)) $thread * pop(struct cluster * cltr) with (cltr->ready_queue) { 295 327 /* paranoid */ verify( lanes.count > 0 ); 328 unsigned count = __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 329 int preferred; 296 330 #if defined(BIAS) 297 331 // Don't bother trying locally too much 298 332 int local_tries = 8; 299 #endif 333 preferred = kernelTLS.this_processor->id * 4; 334 #endif 335 300 336 301 337 // As long as the list is not empty, try finding a lane that isn't empty and pop from it 302 while( query(snzi) ) { 338 #ifdef USE_SNZI 339 while( query(snzi) ) { 340 #else 341 for(25) { 342 #endif 303 343 // Pick two lists at random 304 unsigned i,j; 305 #if defined(BIAS) 306 #if !defined(__CFA_NO_STATISTICS__) 307 bool local = false; 308 #endif 309 uint64_t r = __tls_rand(); 310 unsigned rlow = r % BIAS; 311 uint64_t rhigh = r / BIAS; 312 if(local_tries && 0 != rlow) { 313 // (BIAS - 1) out of BIAS chances 314 // Use perferred queues 315 unsigned pid = kernelTLS.this_processor->id * 4; 316 i = pid + (rhigh % 4); 317 j = pid + ((rhigh >> 32ull) % 4); 318 319 // count the tries 320 local_tries--; 321 322 #if !defined(__CFA_NO_STATISTICS__) 323 local = true; 324 __tls_stats()->ready.pick.pop.local++; 325 #endif 326 } 327 else { 328 // 1 out of BIAS chances 329 // Use all queues 330 i = rhigh; 331 j = rhigh >> 32ull; 332 } 333 #else 334 i = __tls_rand(); 335 j = __tls_rand(); 336 #endif 337 338 i %= __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 339 j %= __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 344 // unsigned ri = __tls_rand(); 345 // unsigned rj = __tls_rand(); 346 unsigned ri = __tls_rand_bck(); 347 unsigned rj = __tls_rand_bck(); 348 349 unsigned i, j; 350 __attribute__((unused)) bool locali, localj; 351 [i, locali] = idx_from_r(ri, preferred); 352 [j, localj] = idx_from_r(rj, preferred); 353 354 #if !defined(__CFA_NO_STATISTICS__) 355 if(locali) { 356 __tls_stats()->ready.pick.pop.local++; 357 } 358 if(localj) { 359 __tls_stats()->ready.pick.pop.local++; 360 } 361 #endif 362 363 i %= count; 364 j %= count; 340 365 341 366 // try popping from the 2 picked lists … … 343 368 if(thrd) { 344 369 #if defined(BIAS) && !defined(__CFA_NO_STATISTICS__) 345 if( local ) __tls_stats()->ready.pick.pop.lsuccess++;370 if( locali || localj ) __tls_stats()->ready.pick.pop.lsuccess++; 346 371 #endif 347 372 return thrd; … … 352 377 return 0p; 353 378 } 379 380 __attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr) with (cltr->ready_queue) { 381 /* paranoid */ verify( lanes.count > 0 ); 382 unsigned count = __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 383 unsigned offset = __tls_rand(); 384 for(i; count) { 385 unsigned idx = (offset + i) % count; 386 struct $thread * thrd = try_pop(cltr, idx); 387 if(thrd) { 388 return thrd; 389 } 390 } 391 392 // All lanes where empty return 0p 393 return 0p; 394 } 395 354 396 355 397 //----------------------------------------------------------------------- … … 388 430 // Actually pop the list 389 431 struct $thread * thrd; 390 bool emptied; 391 [thrd, emptied] = pop(lane); 432 thrd = pop(lane); 392 433 393 434 /* paranoid */ verify(thrd); 394 435 /* paranoid */ verify(lane.lock); 395 436 396 // If this was the last element in the lane 397 if(emptied) { 398 depart( snzi, w ); 399 } 437 #ifdef USE_SNZI 438 // If this was the last element in the lane 439 if(emptied) { 440 depart( snzi, w ); 441 } 442 #endif 400 443 401 444 // Unlock and return … … 424 467 if(head(lane)->link.next == thrd) { 425 468 $thread * pthrd; 426 bool emptied; 427 [pthrd, emptied] = pop(lane); 469 pthrd = pop(lane); 428 470 429 471 /* paranoid */ verify( pthrd == thrd ); 430 472 431 473 removed = true; 432 if(emptied) { 433 depart( snzi, i ); 434 } 474 #ifdef USE_SNZI 475 if(emptied) { 476 depart( snzi, i ); 477 } 478 #endif 435 479 } 436 480 __atomic_unlock(&lane.lock); … … 494 538 // grow the ready queue 495 539 with( cltr->ready_queue ) { 496 ^(snzi){}; 540 #ifdef USE_SNZI 541 ^(snzi){}; 542 #endif 497 543 498 544 // Find new count … … 501 547 502 548 // Allocate new array (uses realloc and memcpies the data) 503 lanes.data = alloc( lanes.data, ncount);549 lanes.data = alloc( ncount, lanes.data`realloc ); 504 550 505 551 // Fix the moved data … … 516 562 lanes.count = ncount; 517 563 518 // Re-create the snzi 519 snzi{ log2( lanes.count / 8 ) }; 520 for( idx; (size_t)lanes.count ) { 521 if( !is_empty(lanes.data[idx]) ) { 522 arrive(snzi, idx); 523 } 524 } 564 #ifdef USE_SNZI 565 // Re-create the snzi 566 snzi{ log2( lanes.count / 8 ) }; 567 for( idx; (size_t)lanes.count ) { 568 if( !is_empty(lanes.data[idx]) ) { 569 arrive(snzi, idx); 570 } 571 } 572 #endif 525 573 } 526 574 … … 542 590 543 591 with( cltr->ready_queue ) { 544 ^(snzi){}; 592 #ifdef USE_SNZI 593 ^(snzi){}; 594 #endif 545 595 546 596 // Remember old count … … 567 617 while(!is_empty(lanes.data[idx])) { 568 618 struct $thread * thrd; 569 __attribute__((unused)) bool _; 570 [thrd, _] = pop(lanes.data[idx]); 619 thrd = pop(lanes.data[idx]); 571 620 572 621 push(cltr, thrd); … … 589 638 590 639 // Allocate new array (uses realloc and memcpies the data) 591 lanes.data = alloc( lanes.data, lanes.count);640 lanes.data = alloc( lanes.count, lanes.data`realloc ); 592 641 593 642 // Fix the moved data … … 596 645 } 597 646 598 // Re-create the snzi 599 snzi{ log2( lanes.count / 8 ) }; 600 for( idx; (size_t)lanes.count ) { 601 if( !is_empty(lanes.data[idx]) ) { 602 arrive(snzi, idx); 603 } 604 } 647 #ifdef USE_SNZI 648 // Re-create the snzi 649 snzi{ log2( lanes.count / 8 ) }; 650 for( idx; (size_t)lanes.count ) { 651 if( !is_empty(lanes.data[idx]) ) { 652 arrive(snzi, idx); 653 } 654 } 655 #endif 605 656 } 606 657
Note:
See TracChangeset
for help on using the changeset viewer.