Changeset 815943f for libcfa/src
- Timestamp:
- Oct 5, 2022, 10:10:59 PM (3 years ago)
- Branches:
- ADT, ast-experimental, master
- Children:
- 265e460
- Parents:
- ae151cf (diff), 0deeaad (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)
links above to see all the changes relative to each parent. - Location:
- libcfa/src
- Files:
-
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/concurrency/io.cfa
rae151cf r815943f 243 243 /* paranoid */ verify( io.tscs[target].t.tv != ULLONG_MAX ); 244 244 HELP: if(target < ctxs_count) { 245 const unsigned long longcutoff = calc_cutoff(ctsc, ctx->cq.id, ctxs_count, io.data, io.tscs, __shard_factor.io, false);246 const unsigned long longage = moving_average(ctsc, io.tscs[target].t.tv, io.tscs[target].t.ma, false);245 const __readyQ_avg_t cutoff = calc_cutoff(ctsc, ctx->cq.id, ctxs_count, io.data, io.tscs, __shard_factor.io, false); 246 const __readyQ_avg_t age = moving_average(ctsc, io.tscs[target].t.tv, io.tscs[target].t.ma, false); 247 247 __cfadbg_print_safe(io, "Kernel I/O: Help attempt on %u from %u, age %'llu vs cutoff %'llu, %s\n", target, ctx->cq.id, age, cutoff, age > cutoff ? "yes" : "no"); 248 248 if(age <= cutoff) break HELP; -
libcfa/src/concurrency/kernel.hfa
rae151cf r815943f 181 181 182 182 // Aligned timestamps which are used by the ready queue and io subsystem 183 union __attribute__((aligned(64))) __timestamp_t { 184 struct { 185 volatile unsigned long long tv; 186 volatile unsigned long long ma; 187 } t; 188 char __padding[192]; 189 }; 190 191 static inline void ?{}(__timestamp_t & this) { this.t.tv = 0; this.t.ma = 0; } 192 static inline void ^?{}(__timestamp_t &) {} 183 union __attribute__((aligned(64))) __timestamp_t; 184 185 void ?{}(__timestamp_t & this); 186 void ^?{}(__timestamp_t &); 193 187 194 188 -
libcfa/src/concurrency/kernel/cluster.cfa
rae151cf r815943f 221 221 static const unsigned __readyq_single_shard = 2; 222 222 223 void ?{}(__timestamp_t & this) { this.t.tv = 0; this.t.ma = 0; } 224 void ^?{}(__timestamp_t &) {} 225 223 226 //----------------------------------------------------------------------- 224 227 // Check that all the intrusive queues in the data structure are still consistent -
libcfa/src/concurrency/kernel/cluster.hfa
rae151cf r815943f 18 18 #include "device/cpu.hfa" 19 19 #include "kernel/private.hfa" 20 #include "math.hfa" 20 21 21 22 #include <limits.h> … … 23 24 #include "clock.hfa" 24 25 26 #if defined(READYQ_USE_LINEAR_AVG) 27 28 // no conversion needed in this case 29 static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { return intsc; } 30 31 // warn normally all ints 32 #define warn_large_before warnf( !strict || old_avg < 33_000_000_000, "Suspiciously large previous average: %'llu (%llx), %'ldms \n", old_avg, old_avg, program()`ms ) 33 #define warn_large_after warnf( !strict || ret < 33_000_000_000, "Suspiciously large new average after %'ldms cputime: %'llu (%llx) from %'llu-%'llu (%'llu, %'llu) and %'llu\n", program()`ms, ret, ret, currtsc, intsc, new_val, new_val / 1000000, old_avg ) 34 35 // 8X linear factor is just 8 * x 36 #define AVG_FACTOR( x ) (8 * (x)) 37 38 #elif defined(READYQ_USE_LOGDBL_AVG) 39 40 // convert to log2 scale but using double 41 static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { if(unlikely(0 == intsc)) return 0.0; else return log2(intsc); } 42 43 #define warn_large_before warnf( !strict || old_avg < 35.0, "Suspiciously large previous average: %'lf, %'ldms \n", old_avg, program()`ms ) 44 #define warn_large_after warnf( !strict || ret < 35.3, "Suspiciously large new average after %'ldms cputime: %'lf from %'llu-%'llu (%'llu, %'llu) and %'lf\n", program()`ms, ret, currtsc, intsc, new_val, new_val / 1000000, old_avg ); \ 45 verify(ret >= 0) 46 47 // 8X factor in logscale is log2(8X) = log2(8) + log2(X) = 3 + log2(X) 48 #define AVG_FACTOR( x ) (3.0 + (x)) 49 50 // we need to overload the __atomic_load_n because they don't support double 51 static inline double __atomic_load_n(volatile double * ptr, int mem) { 52 volatile uint64_t * uptr = (volatile uint64_t *)ptr; 53 _Static_assert(sizeof(*uptr) == sizeof(*ptr)); 54 uint64_t ret = 0; 55 ret = __atomic_load_n(uptr, mem); 56 uint64_t *rp = &ret; 57 double ret = *(volatile double *)rp; 58 /* paranoid */ verify( ret == 0 || ret > 3e-100 ); 59 return ret; 60 } 61 62 // we need to overload the __atomic_store_n because they don't support double 63 static inline void __atomic_store_n(volatile double * ptr, double val, int mem) { 64 /* paranoid */ verify( val == 0 || val > 3e-100 ); 65 volatile uint64_t * uptr = (volatile uint64_t *)ptr; 66 _Static_assert(sizeof(*uptr) == sizeof(*ptr)); 67 uint64_t * valp = (uint64_t *)&val; 68 __atomic_store_n(uptr, *valp, mem); 69 } 70 71 #elif defined(READYQ_USE_LOGDBL_AVG) 72 73 //convert to log2 scale but with fix point u32.32 values 74 static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { return ulog2_32_32(tsc); } 75 76 // 8X factor, +3 in logscale (see above) is + 0x3.00000000 77 #define AVG_FACTOR( x ) (0x3_00000000ull + (x)) 78 79 #else 80 #error must pick a scheme for averaging 81 #endif 82 25 83 //----------------------------------------------------------------------- 26 84 // Calc moving average based on existing average, before and current time. 27 static inline unsigned long long moving_average(unsigned long long currtsc, unsigned long long instsc, unsigned long longold_avg, bool strict) {85 static inline __readyQ_avg_t moving_average(unsigned long long currtsc, unsigned long long intsc, __readyQ_avg_t old_avg, bool strict) { 28 86 (void)strict; // disable the warning around the fact this is unused in release. 29 /* paranoid */ warn f( !strict || old_avg < 33_000_000_000, "Suspiciously large previous average: %'llu (%llx), %'ldms \n", old_avg, old_avg, program()`ms );87 /* paranoid */ warn_large_before; 30 88 31 const unsigned long long new_val = currtsc > in stsc ? currtsc - instsc : 0;32 const unsigned long longtotal_weight = 16;33 const unsigned long long new_weight = 4;34 const unsigned long longold_weight = total_weight - new_weight;35 const unsigned long long ret = ((new_weight * new_val) + (old_weight * old_avg)) / total_weight;89 const unsigned long long new_val = currtsc > intsc ? currtsc - intsc : 0; 90 const __readyQ_avg_t total_weight = 16; 91 const __readyQ_avg_t new_weight = 12; 92 const __readyQ_avg_t old_weight = total_weight - new_weight; 93 const __readyQ_avg_t ret = ((new_weight * __to_readyQ_avg(new_val)) + (old_weight * old_avg)) / total_weight; 36 94 37 /* paranoid */ warn f( !strict || ret < 33_000_000_000, "Suspiciously large new average after %'ldms cputime: %'llu (%llx) from %'llu-%'llu (%'llu, %'llu) and %'llu\n", program()`ms, ret, ret, currtsc, instsc, new_val, new_val / 1000000, old_avg );95 /* paranoid */ warn_large_after; 38 96 return ret; 39 97 } … … 42 100 if (ts_next == ULLONG_MAX) return; 43 101 unsigned long long now = rdtscl(); 44 unsigned long longpma = __atomic_load_n(&tscs[ idx ].t.ma, __ATOMIC_RELAXED);102 __readyQ_avg_t pma = __atomic_load_n(&tscs[ idx ].t.ma, __ATOMIC_RELAXED); 45 103 __atomic_store_n(&tscs[ idx ].t.tv, ts_next, __ATOMIC_RELAXED); 46 104 __atomic_store_n(&tscs[ idx ].t.ma, moving_average(now, ts_prev, pma, strict), __ATOMIC_RELAXED); … … 50 108 // Calc age a timestamp should be before needing help. 51 109 forall(Data_t * | { unsigned long long ts(Data_t & this); }) 52 static inline unsigned long longcalc_cutoff(110 static inline __readyQ_avg_t calc_cutoff( 53 111 const unsigned long long ctsc, 54 112 unsigned procid, … … 60 118 ) { 61 119 unsigned start = procid; 62 unsigned long longmax = 0;120 __readyQ_avg_t max = 0; 63 121 for(i; shard_factor) { 64 122 unsigned long long ptsc = ts(data[start + i]); 65 123 if(ptsc != ULLONG_MAX) { 66 124 /* paranoid */ verify( start + i < count ); 67 unsigned long long tsc= moving_average(ctsc, ptsc, tscs[start + i].t.ma, strict);68 if( tsc > max) max = tsc;125 __readyQ_avg_t avg = moving_average(ctsc, ptsc, tscs[start + i].t.ma, strict); 126 if(avg > max) max = avg; 69 127 } 70 128 } 71 return 8 * max;129 return AVG_FACTOR( max ); 72 130 } 73 131 -
libcfa/src/concurrency/kernel/private.hfa
rae151cf r815943f 49 49 #endif 50 50 51 // #define READYQ_USE_LINEAR_AVG 52 #define READYQ_USE_LOGDBL_AVG 53 // #define READYQ_USE_LOGINT_AVG 54 55 #if defined(READYQ_USE_LINEAR_AVG) 56 typedef unsigned long long __readyQ_avg_t; 57 #elif defined(READYQ_USE_LOGDBL_AVG) 58 typedef double __readyQ_avg_t; 59 #elif defined(READYQ_USE_LOGDBL_AVG) 60 typedef unsigned long long __readyQ_avg_t; 61 #else 62 #error must pick a scheme for averaging 63 #endif 64 51 65 //----------------------------------------------------------------------------- 52 66 // Scheduler 67 union __attribute__((aligned(64))) __timestamp_t { 68 struct { 69 volatile unsigned long long tv; 70 volatile __readyQ_avg_t ma; 71 } t; 72 char __padding[192]; 73 }; 74 53 75 extern "C" { 54 76 void disable_interrupts() OPTIONAL_THREAD; -
libcfa/src/concurrency/ready_queue.cfa
rae151cf r815943f 139 139 /* paranoid */ verify( readyQ.tscs[target].t.tv != ULLONG_MAX ); 140 140 if(target < lanes_count) { 141 const unsigned long longcutoff = calc_cutoff(ctsc, proc->rdq.id, lanes_count, cltr->sched.readyQ.data, cltr->sched.readyQ.tscs, __shard_factor.readyq, true);142 const unsigned long longage = moving_average(ctsc, readyQ.tscs[target].t.tv, readyQ.tscs[target].t.ma, false);141 const __readyQ_avg_t cutoff = calc_cutoff(ctsc, proc->rdq.id, lanes_count, cltr->sched.readyQ.data, cltr->sched.readyQ.tscs, __shard_factor.readyq, true); 142 const __readyQ_avg_t age = moving_average(ctsc, readyQ.tscs[target].t.tv, readyQ.tscs[target].t.ma, false); 143 143 __cfadbg_print_safe(ready_queue, "Kernel : Help attempt on %u from %u, age %'llu vs cutoff %'llu, %s\n", target, this, age, cutoff, age > cutoff ? "yes" : "no"); 144 144 if(age > cutoff) { -
libcfa/src/math.hfa
rae151cf r815943f 22 22 23 23 #include "common.hfa" 24 #include "bits/debug.hfa" 24 25 25 26 //---------------------- General ---------------------- … … 307 308 // forall( T | { T ?+?( T, T ); T ?-?( T, T ); T ?%?( T, T ); } ) 308 309 // T ceiling_div( T n, T align ) { verify( is_pow2( align ) );return (n + (align - 1)) / align; } 309 310 310 311 // gcc notices the div/mod pair and saves both so only one div. 311 312 signed char ceiling( signed char n, signed char align ) { return floor( n + (n % align != 0 ? align - 1 : 0), align ); } … … 429 430 } // distribution 430 431 432 static inline unsigned long long log2_u32_32(unsigned long long val) { 433 enum { 434 TABLE_BITS = 6, 435 TABLE_SIZE = (1 << TABLE_BITS) + 2, 436 }; 437 // for(i; TABLE_SIZE) { 438 // table[i] = (unsigned long long)(log2(1.0 + i / pow(2, TABLE_BITS)) * pow(2, 32))); 439 // } 440 static const unsigned long long table[] = { 441 0x0000000000, 0x0005b9e5a1, 0x000b5d69ba, 0x0010eb389f, 442 0x001663f6fa, 0x001bc84240, 0x002118b119, 0x002655d3c4, 443 0x002b803473, 0x00309857a0, 0x00359ebc5b, 0x003a93dc98, 444 0x003f782d72, 0x00444c1f6b, 0x0049101eac, 0x004dc4933a, 445 0x005269e12f, 0x00570068e7, 0x005b888736, 0x006002958c, 446 0x00646eea24, 0x0068cdd829, 0x006d1fafdc, 0x007164beb4, 447 0x00759d4f80, 0x0079c9aa87, 0x007dea15a3, 0x0081fed45c, 448 0x0086082806, 0x008a064fd5, 0x008df988f4, 0x0091e20ea1, 449 0x0095c01a39, 0x009993e355, 0x009d5d9fd5, 0x00a11d83f4, 450 0x00a4d3c25e, 0x00a8808c38, 0x00ac241134, 0x00afbe7fa0, 451 0x00b3500472, 0x00b6d8cb53, 0x00ba58feb2, 0x00bdd0c7c9, 452 0x00c1404ead, 0x00c4a7ba58, 0x00c80730b0, 0x00cb5ed695, 453 0x00ceaecfea, 0x00d1f73f9c, 0x00d53847ac, 0x00d8720935, 454 0x00dba4a47a, 0x00ded038e6, 0x00e1f4e517, 0x00e512c6e5, 455 0x00e829fb69, 0x00eb3a9f01, 0x00ee44cd59, 0x00f148a170, 456 0x00f446359b, 0x00f73da38d, 0x00fa2f045e, 0x00fd1a708b, 457 0x0100000000, 0x0102dfca16, 458 }; 459 _Static_assert((sizeof(table) / sizeof(table[0])) == TABLE_SIZE, "TABLE_SIZE should be accurate"); 460 // starting from val = (2 ** i)*(1 + f) where 0 <= f < 1 461 // log identities mean log2(val) = log2((2 ** i)*(1 + f)) = log2(2**i) + log2(1+f) 462 // 463 // getting i is easy to do using builtin_clz (count leading zero) 464 // 465 // we want to calculate log2(1+f) independently to have a many bits of precision as possible. 466 // val = (2 ** i)*(1 + f) = 2 ** i + f * 2 ** i 467 // isolating f we get 468 // val - 2 ** i = f * 2 ** i 469 // (val - 2 ** i) / 2 ** i = f 470 // 471 // we want to interpolate from the table to get the values 472 // and compromise by doing quadratic interpolation (rather than higher degree interpolation) 473 // 474 // for the interpolation we want to shift everything the fist sample point 475 // so our parabola becomes x = 0 476 // this further simplifies the equations 477 // 478 // the consequence is that we need f in 2 forms: 479 // - finding the index of x0 480 // - finding the distance between f and x0 481 // 482 // since sample points are equidistant we can significantly simplify the equations 483 484 // get i 485 const unsigned long long bits = sizeof(val) * __CHAR_BIT__; 486 const unsigned long long lz = __builtin_clzl(val); 487 const unsigned long long i = bits - 1 - lz; 488 489 // get the fractinal part as a u32.32 490 const unsigned long long frac = (val << (lz + 1)) >> 32; 491 492 // get high order bits for the index into the table 493 const unsigned long long idx0 = frac >> (32 - TABLE_BITS); 494 495 // get the x offset, i.e., the difference between the first sample point and the actual fractional part 496 const long long udx = frac - (idx0 << (32 - TABLE_BITS)); 497 /* paranoid */ verify((idx0 + 2) < TABLE_SIZE); 498 499 const long long y0 = table[idx0 + 0]; 500 const long long y1 = table[idx0 + 1]; 501 const long long y2 = table[idx0 + 2]; 502 503 // from there we can quadraticly interpolate to get the data, using the lagrange polynomial 504 // normally it would look like: 505 // double r0 = y0 * ((x - x1) / (x0 - x1)) * ((x - x2) / (x0 - x2)); 506 // double r1 = y1 * ((x - x0) / (x1 - x0)) * ((x - x2) / (x1 - x2)); 507 // double r2 = y2 * ((x - x0) / (x2 - x0)) * ((x - x1) / (x2 - x1)); 508 // but since the spacing between sample points is fixed, we can simplify it and extract common expressions 509 const long long f1 = (y1 - y0); 510 const long long f2 = (y2 - y0); 511 const long long a = f2 - (f1 * 2l); 512 const long long b = (f1 * 2l) - a; 513 514 // Now we can compute it in the form (ax + b)x + c (which avoid repeating steps) 515 long long sum = ((a*udx) >> (32 - TABLE_BITS)) + b; 516 sum = (sum*udx) >> (32 - TABLE_BITS + 1); 517 sum = y0 + sum; 518 519 return (i << 32) + (sum); 520 } 521 431 522 // Local Variables: // 432 523 // mode: c //
Note:
See TracChangeset
for help on using the changeset viewer.