| 1 | //
 | 
|---|
| 2 | // Cforall Version 1.0.0 Copyright (C) 2022 University of Waterloo
 | 
|---|
| 3 | //
 | 
|---|
| 4 | // The contents of this file are covered under the licence agreement in the
 | 
|---|
| 5 | // file "LICENCE" distributed with Cforall.
 | 
|---|
| 6 | //
 | 
|---|
| 7 | // cluster.hfa -- file that includes helpers for subsystem that need cluster wide support
 | 
|---|
| 8 | //
 | 
|---|
| 9 | // Author           : Thierry Delisle
 | 
|---|
| 10 | // Created On       : Tue Mar 15 16:40:12 2022
 | 
|---|
| 11 | // Last Modified By :
 | 
|---|
| 12 | // Last Modified On :
 | 
|---|
| 13 | // Update Count     :
 | 
|---|
| 14 | //
 | 
|---|
| 15 | 
 | 
|---|
| 16 | #pragma once
 | 
|---|
| 17 | 
 | 
|---|
| 18 | #include "device/cpu.hfa"
 | 
|---|
| 19 | #include "kernel/private.hfa"
 | 
|---|
| 20 | #include "math.hfa"
 | 
|---|
| 21 | 
 | 
|---|
| 22 | #include <limits.h>
 | 
|---|
| 23 | #include <inttypes.h>
 | 
|---|
| 24 | 
 | 
|---|
| 25 | #include "clock.hfa"
 | 
|---|
| 26 | 
 | 
|---|
| 27 | #if   defined(READYQ_USE_LINEAR_AVG)
 | 
|---|
| 28 | 
 | 
|---|
| 29 | // no conversion needed in this case
 | 
|---|
| 30 | static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { return intsc; }
 | 
|---|
| 31 | 
 | 
|---|
| 32 | // warn normally all ints
 | 
|---|
| 33 | #define warn_large_before warnf( !strict || old_avg < 33_000_000_000, "Suspiciously large previous average: %'llu (%llx), %'" PRId64 "ms \n", old_avg, old_avg, program_cpu()`ms )
 | 
|---|
| 34 | #define warn_large_after warnf( !strict || ret < 33_000_000_000, "Suspiciously large new average after %'" PRId64 "ms cputime: %'llu (%llx) from %'llu-%'llu (%'llu, %'llu) and %'llu\n", program_cpu()`ms, ret, ret, currtsc, intsc, new_val, new_val / 1000000, old_avg )
 | 
|---|
| 35 | 
 | 
|---|
| 36 | // 8X linear factor is just 8 * x
 | 
|---|
| 37 | #define AVG_FACTOR( x ) (8 * (x))
 | 
|---|
| 38 | 
 | 
|---|
| 39 | #elif defined(READYQ_USE_LOGDBL_AVG)
 | 
|---|
| 40 | 
 | 
|---|
| 41 | // convert to log2 scale but using double
 | 
|---|
| 42 | static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { if(unlikely(0 == intsc)) return 0.0; else return log2((__readyQ_avg_t)intsc); }
 | 
|---|
| 43 | 
 | 
|---|
| 44 | #define warn_large_before warnf( !strict || old_avg < 35.0, "Suspiciously large previous average: %'lf, %'" PRId64 "ms \n", old_avg, program_cpu()`ms )
 | 
|---|
| 45 | #define warn_large_after warnf( !strict || ret < 35.3, "Suspiciously large new average after %'" PRId64 "ms cputime: %'lf from %'llu-%'llu (%'llu, %'llu) and %'lf\n", program_cpu()`ms, ret, currtsc, intsc, new_val, new_val / 1000000, old_avg ); \
 | 
|---|
| 46 | verify(ret >= 0)
 | 
|---|
| 47 | 
 | 
|---|
| 48 | // 8X factor in logscale is log2(8X) = log2(8) + log2(X) = 3 + log2(X)
 | 
|---|
| 49 | #define AVG_FACTOR( x ) (3.0 + (x))
 | 
|---|
| 50 | 
 | 
|---|
| 51 | // we need to overload the __atomic_load_n because they don't support double
 | 
|---|
| 52 | static inline double __atomic_load_n(volatile double * ptr, int mem) {
 | 
|---|
| 53 |         volatile uint64_t * uptr = (volatile uint64_t *)ptr;
 | 
|---|
| 54 |         _Static_assert(sizeof(*uptr) == sizeof(*ptr));
 | 
|---|
| 55 |         uint64_t ret = 0;
 | 
|---|
| 56 |         ret = __atomic_load_n(uptr, mem);
 | 
|---|
| 57 |         uint64_t *rp = &ret;
 | 
|---|
| 58 |         double ret = *(volatile double *)rp;
 | 
|---|
| 59 |         /* paranoid */ verify( ret == 0 || ret > 3e-100 );
 | 
|---|
| 60 |         return ret;
 | 
|---|
| 61 | }
 | 
|---|
| 62 | 
 | 
|---|
| 63 | // we need to overload the __atomic_store_n because they don't support double
 | 
|---|
| 64 | static inline void __atomic_store_n(volatile double * ptr, double val, int mem) {
 | 
|---|
| 65 |         /* paranoid */ verify( val == 0 || val > 3e-100 );
 | 
|---|
| 66 |         volatile uint64_t * uptr = (volatile uint64_t *)ptr;
 | 
|---|
| 67 |         _Static_assert(sizeof(*uptr) == sizeof(*ptr));
 | 
|---|
| 68 |         uint64_t * valp = (uint64_t *)&val;
 | 
|---|
| 69 |         __atomic_store_n(uptr, *valp, mem);
 | 
|---|
| 70 | }
 | 
|---|
| 71 | 
 | 
|---|
| 72 | #elif defined(READYQ_USE_LOGDBL_AVG)
 | 
|---|
| 73 | 
 | 
|---|
| 74 | //convert to log2 scale but with fix point u32.32 values
 | 
|---|
| 75 | static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { return ulog2_32_32(tsc); }
 | 
|---|
| 76 | 
 | 
|---|
| 77 | // 8X factor, +3 in logscale (see above) is + 0x3.00000000
 | 
|---|
| 78 | #define AVG_FACTOR( x ) (0x3_00000000ull + (x))
 | 
|---|
| 79 | 
 | 
|---|
| 80 | #else
 | 
|---|
| 81 | #error must pick a scheme for averaging
 | 
|---|
| 82 | #endif
 | 
|---|
| 83 | 
 | 
|---|
| 84 | //-----------------------------------------------------------------------
 | 
|---|
| 85 | // Calc moving average based on existing average, before and current time.
 | 
|---|
| 86 | static inline __readyQ_avg_t moving_average(unsigned long long currtsc, unsigned long long intsc, __readyQ_avg_t old_avg, bool strict) {
 | 
|---|
| 87 |         (void)strict; // disable the warning around the fact this is unused in release.
 | 
|---|
| 88 |         /* paranoid */ warn_large_before;
 | 
|---|
| 89 | 
 | 
|---|
| 90 |         const unsigned long long new_val = currtsc > intsc ? currtsc - intsc : 0;
 | 
|---|
| 91 |         const __readyQ_avg_t total_weight = 16;
 | 
|---|
| 92 |         const __readyQ_avg_t new_weight   = 12;
 | 
|---|
| 93 |         const __readyQ_avg_t old_weight = total_weight - new_weight;
 | 
|---|
| 94 |         const __readyQ_avg_t ret = ((new_weight * __to_readyQ_avg(new_val)) + (old_weight * old_avg)) / total_weight;
 | 
|---|
| 95 | 
 | 
|---|
| 96 |         /* paranoid */ warn_large_after;
 | 
|---|
| 97 |         return ret;
 | 
|---|
| 98 | }
 | 
|---|
| 99 | 
 | 
|---|
| 100 | static inline void touch_tsc(__timestamp_t * tscs, size_t idx, unsigned long long ts_prev, unsigned long long ts_next, bool strict) {
 | 
|---|
| 101 |         if (ts_next == ULLONG_MAX) return;
 | 
|---|
| 102 |         unsigned long long now = rdtscl();
 | 
|---|
| 103 |         __readyQ_avg_t pma = __atomic_load_n(&tscs[ idx ].t.ma, __ATOMIC_RELAXED);
 | 
|---|
| 104 |         __atomic_store_n(&tscs[ idx ].t.tv, ts_next, __ATOMIC_RELAXED);
 | 
|---|
| 105 |         __atomic_store_n(&tscs[ idx ].t.ma, moving_average(now, ts_prev, pma, strict), __ATOMIC_RELAXED);
 | 
|---|
| 106 | }
 | 
|---|
| 107 | 
 | 
|---|
| 108 | //-----------------------------------------------------------------------
 | 
|---|
| 109 | // Calc age a timestamp should be before needing help.
 | 
|---|
| 110 | forall(Data_t * | { unsigned long long ts(Data_t & this); })
 | 
|---|
| 111 | static inline __readyQ_avg_t calc_cutoff(
 | 
|---|
| 112 |         const unsigned long long ctsc,
 | 
|---|
| 113 |         unsigned procid,
 | 
|---|
| 114 |         size_t count,
 | 
|---|
| 115 |         Data_t * data,
 | 
|---|
| 116 |         __timestamp_t * tscs,
 | 
|---|
| 117 |         const unsigned shard_factor,
 | 
|---|
| 118 |         bool strict
 | 
|---|
| 119 | ) {
 | 
|---|
| 120 |         unsigned start = procid;
 | 
|---|
| 121 |         __readyQ_avg_t max = 0;
 | 
|---|
| 122 |         for(i; shard_factor) {
 | 
|---|
| 123 |                 unsigned long long ptsc = ts(data[start + i]);
 | 
|---|
| 124 |                 if(ptsc != ULLONG_MAX) {
 | 
|---|
| 125 |                         /* paranoid */ verify( start + i < count );
 | 
|---|
| 126 |                         __readyQ_avg_t avg = moving_average(ctsc, ptsc, tscs[start + i].t.ma, strict);
 | 
|---|
| 127 |                         if(avg > max) max = avg;
 | 
|---|
| 128 |                 }
 | 
|---|
| 129 |         }
 | 
|---|
| 130 |         return AVG_FACTOR( max );
 | 
|---|
| 131 | }
 | 
|---|
| 132 | 
 | 
|---|
| 133 | static inline unsigned cache_id(struct cluster * cltr, unsigned idx) with (cltr->sched) {
 | 
|---|
| 134 |         // Figure out the current cpu and make sure it is valid
 | 
|---|
| 135 |         const int cpu = __kernel_getcpu();
 | 
|---|
| 136 |         /* paranoid */ verify(cpu >= 0);
 | 
|---|
| 137 |         /* paranoid */ verify(cpu < cpu_info.hthrd_count);
 | 
|---|
| 138 |         unsigned this_cache = cpu_info.llc_map[cpu].cache;
 | 
|---|
| 139 | 
 | 
|---|
| 140 |         // Super important: don't write the same value over and over again
 | 
|---|
| 141 |         // We want to maximise our chances that his particular values stays in cache
 | 
|---|
| 142 |         if(caches[idx].id != this_cache)
 | 
|---|
| 143 |                 __atomic_store_n(&caches[idx].id, this_cache, __ATOMIC_RELAXED);
 | 
|---|
| 144 | 
 | 
|---|
| 145 |         return this_cache;
 | 
|---|
| 146 | }
 | 
|---|
| 147 | 
 | 
|---|
| 148 | const static struct {
 | 
|---|
| 149 |         unsigned readyq;
 | 
|---|
| 150 |         unsigned io;
 | 
|---|
| 151 | } __shard_factor = { 2, 1 };
 | 
|---|
| 152 | 
 | 
|---|
| 153 | // Local Variables: //
 | 
|---|
| 154 | // mode: c //
 | 
|---|
| 155 | // tab-width: 4 //
 | 
|---|
| 156 | // End: //
 | 
|---|