| [708ae38] | 1 | //
 | 
|---|
 | 2 | // Cforall Version 1.0.0 Copyright (C) 2022 University of Waterloo
 | 
|---|
 | 3 | //
 | 
|---|
 | 4 | // The contents of this file are covered under the licence agreement in the
 | 
|---|
 | 5 | // file "LICENCE" distributed with Cforall.
 | 
|---|
 | 6 | //
 | 
|---|
 | 7 | // cluster.hfa -- file that includes helpers for subsystem that need cluster wide support
 | 
|---|
 | 8 | //
 | 
|---|
 | 9 | // Author           : Thierry Delisle
 | 
|---|
 | 10 | // Created On       : Tue Mar 15 16:40:12 2022
 | 
|---|
 | 11 | // Last Modified By :
 | 
|---|
 | 12 | // Last Modified On :
 | 
|---|
 | 13 | // Update Count     :
 | 
|---|
 | 14 | //
 | 
|---|
 | 15 | 
 | 
|---|
| [8f01ad71] | 16 | #pragma once
 | 
|---|
 | 17 | 
 | 
|---|
| [78a580d] | 18 | #include "device/cpu.hfa"
 | 
|---|
| [708ae38] | 19 | #include "kernel/private.hfa"
 | 
|---|
| [31c967b] | 20 | #include "math.hfa"
 | 
|---|
| [708ae38] | 21 | 
 | 
|---|
| [b035046] | 22 | #include <limits.h>
 | 
|---|
| [a46f7b6] | 23 | #include <inttypes.h>
 | 
|---|
| [78a580d] | 24 | 
 | 
|---|
| [da77728] | 25 | #include "clock.hfa"
 | 
|---|
 | 26 | 
 | 
|---|
| [31c967b] | 27 | #if   defined(READYQ_USE_LINEAR_AVG)
 | 
|---|
 | 28 | 
 | 
|---|
 | 29 | // no conversion needed in this case
 | 
|---|
 | 30 | static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { return intsc; }
 | 
|---|
 | 31 | 
 | 
|---|
 | 32 | // warn normally all ints
 | 
|---|
| [ba068c0] | 33 | #define warn_large_before warnf( !strict || old_avg < 33_000_000_000, "Suspiciously large previous average: %'llu (%llx), %'" PRId64 "ms \n", old_avg, old_avg, program_cpu()`ms )
 | 
|---|
 | 34 | #define warn_large_after warnf( !strict || ret < 33_000_000_000, "Suspiciously large new average after %'" PRId64 "ms cputime: %'llu (%llx) from %'llu-%'llu (%'llu, %'llu) and %'llu\n", program_cpu()`ms, ret, ret, currtsc, intsc, new_val, new_val / 1000000, old_avg )
 | 
|---|
| [31c967b] | 35 | 
 | 
|---|
 | 36 | // 8X linear factor is just 8 * x
 | 
|---|
 | 37 | #define AVG_FACTOR( x ) (8 * (x))
 | 
|---|
 | 38 | 
 | 
|---|
 | 39 | #elif defined(READYQ_USE_LOGDBL_AVG)
 | 
|---|
 | 40 | 
 | 
|---|
 | 41 | // convert to log2 scale but using double
 | 
|---|
| [24d6572] | 42 | static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { if(unlikely(0 == intsc)) return 0.0; else return log2((__readyQ_avg_t)intsc); }
 | 
|---|
| [31c967b] | 43 | 
 | 
|---|
| [ba068c0] | 44 | #define warn_large_before warnf( !strict || old_avg < 35.0, "Suspiciously large previous average: %'lf, %'" PRId64 "ms \n", old_avg, program_cpu()`ms )
 | 
|---|
 | 45 | #define warn_large_after warnf( !strict || ret < 35.3, "Suspiciously large new average after %'" PRId64 "ms cputime: %'lf from %'llu-%'llu (%'llu, %'llu) and %'lf\n", program_cpu()`ms, ret, currtsc, intsc, new_val, new_val / 1000000, old_avg ); \
 | 
|---|
| [31c967b] | 46 | verify(ret >= 0)
 | 
|---|
 | 47 | 
 | 
|---|
 | 48 | // 8X factor in logscale is log2(8X) = log2(8) + log2(X) = 3 + log2(X)
 | 
|---|
 | 49 | #define AVG_FACTOR( x ) (3.0 + (x))
 | 
|---|
 | 50 | 
 | 
|---|
 | 51 | // we need to overload the __atomic_load_n because they don't support double
 | 
|---|
 | 52 | static inline double __atomic_load_n(volatile double * ptr, int mem) {
 | 
|---|
 | 53 |         volatile uint64_t * uptr = (volatile uint64_t *)ptr;
 | 
|---|
 | 54 |         _Static_assert(sizeof(*uptr) == sizeof(*ptr));
 | 
|---|
 | 55 |         uint64_t ret = 0;
 | 
|---|
 | 56 |         ret = __atomic_load_n(uptr, mem);
 | 
|---|
 | 57 |         uint64_t *rp = &ret;
 | 
|---|
 | 58 |         double ret = *(volatile double *)rp;
 | 
|---|
 | 59 |         /* paranoid */ verify( ret == 0 || ret > 3e-100 );
 | 
|---|
 | 60 |         return ret;
 | 
|---|
 | 61 | }
 | 
|---|
 | 62 | 
 | 
|---|
 | 63 | // we need to overload the __atomic_store_n because they don't support double
 | 
|---|
 | 64 | static inline void __atomic_store_n(volatile double * ptr, double val, int mem) {
 | 
|---|
 | 65 |         /* paranoid */ verify( val == 0 || val > 3e-100 );
 | 
|---|
 | 66 |         volatile uint64_t * uptr = (volatile uint64_t *)ptr;
 | 
|---|
 | 67 |         _Static_assert(sizeof(*uptr) == sizeof(*ptr));
 | 
|---|
 | 68 |         uint64_t * valp = (uint64_t *)&val;
 | 
|---|
 | 69 |         __atomic_store_n(uptr, *valp, mem);
 | 
|---|
 | 70 | }
 | 
|---|
 | 71 | 
 | 
|---|
 | 72 | #elif defined(READYQ_USE_LOGDBL_AVG)
 | 
|---|
 | 73 | 
 | 
|---|
 | 74 | //convert to log2 scale but with fix point u32.32 values
 | 
|---|
 | 75 | static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { return ulog2_32_32(tsc); }
 | 
|---|
 | 76 | 
 | 
|---|
 | 77 | // 8X factor, +3 in logscale (see above) is + 0x3.00000000
 | 
|---|
 | 78 | #define AVG_FACTOR( x ) (0x3_00000000ull + (x))
 | 
|---|
 | 79 | 
 | 
|---|
 | 80 | #else
 | 
|---|
 | 81 | #error must pick a scheme for averaging
 | 
|---|
 | 82 | #endif
 | 
|---|
 | 83 | 
 | 
|---|
| [708ae38] | 84 | //-----------------------------------------------------------------------
 | 
|---|
 | 85 | // Calc moving average based on existing average, before and current time.
 | 
|---|
| [31c967b] | 86 | static inline __readyQ_avg_t moving_average(unsigned long long currtsc, unsigned long long intsc, __readyQ_avg_t old_avg, bool strict) {
 | 
|---|
| [5f9c42b] | 87 |         (void)strict; // disable the warning around the fact this is unused in release.
 | 
|---|
| [31c967b] | 88 |         /* paranoid */ warn_large_before;
 | 
|---|
| [708ae38] | 89 | 
 | 
|---|
| [31c967b] | 90 |         const unsigned long long new_val = currtsc > intsc ? currtsc - intsc : 0;
 | 
|---|
 | 91 |         const __readyQ_avg_t total_weight = 16;
 | 
|---|
| [d1cd4c4] | 92 |         const __readyQ_avg_t new_weight   = 12;
 | 
|---|
| [31c967b] | 93 |         const __readyQ_avg_t old_weight = total_weight - new_weight;
 | 
|---|
 | 94 |         const __readyQ_avg_t ret = ((new_weight * __to_readyQ_avg(new_val)) + (old_weight * old_avg)) / total_weight;
 | 
|---|
| [da77728] | 95 | 
 | 
|---|
| [31c967b] | 96 |         /* paranoid */ warn_large_after;
 | 
|---|
| [708ae38] | 97 |         return ret;
 | 
|---|
 | 98 | }
 | 
|---|
 | 99 | 
 | 
|---|
| [5f9c42b] | 100 | static inline void touch_tsc(__timestamp_t * tscs, size_t idx, unsigned long long ts_prev, unsigned long long ts_next, bool strict) {
 | 
|---|
| [b035046] | 101 |         if (ts_next == ULLONG_MAX) return;
 | 
|---|
| [78a580d] | 102 |         unsigned long long now = rdtscl();
 | 
|---|
| [31c967b] | 103 |         __readyQ_avg_t pma = __atomic_load_n(&tscs[ idx ].t.ma, __ATOMIC_RELAXED);
 | 
|---|
| [2af1943] | 104 |         __atomic_store_n(&tscs[ idx ].t.tv, ts_next, __ATOMIC_RELAXED);
 | 
|---|
| [5f9c42b] | 105 |         __atomic_store_n(&tscs[ idx ].t.ma, moving_average(now, ts_prev, pma, strict), __ATOMIC_RELAXED);
 | 
|---|
| [78a580d] | 106 | }
 | 
|---|
 | 107 | 
 | 
|---|
| [708ae38] | 108 | //-----------------------------------------------------------------------
 | 
|---|
 | 109 | // Calc age a timestamp should be before needing help.
 | 
|---|
 | 110 | forall(Data_t * | { unsigned long long ts(Data_t & this); })
 | 
|---|
| [31c967b] | 111 | static inline __readyQ_avg_t calc_cutoff(
 | 
|---|
| [708ae38] | 112 |         const unsigned long long ctsc,
 | 
|---|
| [4479890] | 113 |         unsigned procid,
 | 
|---|
| [708ae38] | 114 |         size_t count,
 | 
|---|
 | 115 |         Data_t * data,
 | 
|---|
 | 116 |         __timestamp_t * tscs,
 | 
|---|
| [5f9c42b] | 117 |         const unsigned shard_factor,
 | 
|---|
 | 118 |         bool strict
 | 
|---|
| [708ae38] | 119 | ) {
 | 
|---|
| [4479890] | 120 |         unsigned start = procid;
 | 
|---|
| [31c967b] | 121 |         __readyQ_avg_t max = 0;
 | 
|---|
| [708ae38] | 122 |         for(i; shard_factor) {
 | 
|---|
 | 123 |                 unsigned long long ptsc = ts(data[start + i]);
 | 
|---|
| [b035046] | 124 |                 if(ptsc != ULLONG_MAX) {
 | 
|---|
| [708ae38] | 125 |                         /* paranoid */ verify( start + i < count );
 | 
|---|
| [31c967b] | 126 |                         __readyQ_avg_t avg = moving_average(ctsc, ptsc, tscs[start + i].t.ma, strict);
 | 
|---|
 | 127 |                         if(avg > max) max = avg;
 | 
|---|
| [708ae38] | 128 |                 }
 | 
|---|
 | 129 |         }
 | 
|---|
| [31c967b] | 130 |         return AVG_FACTOR( max );
 | 
|---|
| [708ae38] | 131 | }
 | 
|---|
 | 132 | 
 | 
|---|
 | 133 | static inline unsigned cache_id(struct cluster * cltr, unsigned idx) with (cltr->sched) {
 | 
|---|
 | 134 |         // Figure out the current cpu and make sure it is valid
 | 
|---|
 | 135 |         const int cpu = __kernel_getcpu();
 | 
|---|
 | 136 |         /* paranoid */ verify(cpu >= 0);
 | 
|---|
 | 137 |         /* paranoid */ verify(cpu < cpu_info.hthrd_count);
 | 
|---|
 | 138 |         unsigned this_cache = cpu_info.llc_map[cpu].cache;
 | 
|---|
 | 139 | 
 | 
|---|
 | 140 |         // Super important: don't write the same value over and over again
 | 
|---|
 | 141 |         // We want to maximise our chances that his particular values stays in cache
 | 
|---|
 | 142 |         if(caches[idx].id != this_cache)
 | 
|---|
 | 143 |                 __atomic_store_n(&caches[idx].id, this_cache, __ATOMIC_RELAXED);
 | 
|---|
 | 144 | 
 | 
|---|
 | 145 |         return this_cache;
 | 
|---|
 | 146 | }
 | 
|---|
 | 147 | 
 | 
|---|
| [aca0d2f] | 148 | const static struct {
 | 
|---|
 | 149 |         unsigned readyq;
 | 
|---|
 | 150 |         unsigned io;
 | 
|---|
| [adb3ea1] | 151 | } __shard_factor = { 2, 1 };
 | 
|---|
| [708ae38] | 152 | 
 | 
|---|
 | 153 | // Local Variables: //
 | 
|---|
 | 154 | // mode: c //
 | 
|---|
 | 155 | // tab-width: 4 //
 | 
|---|
| [aca0d2f] | 156 | // End: //
 | 
|---|