// // Cforall Version 1.0.0 Copyright (C) 2022 University of Waterloo // // The contents of this file are covered under the licence agreement in the // file "LICENCE" distributed with Cforall. // // cluster.hfa -- file that includes helpers for subsystem that need cluster wide support // // Author : Thierry Delisle // Created On : Tue Mar 15 16:40:12 2022 // Last Modified By : // Last Modified On : // Update Count : // #pragma once #include "device/cpu.hfa" #include "kernel/private.hfa" #include "math.hfa" #include #include #include "clock.hfa" #if defined(READYQ_USE_LINEAR_AVG) // no conversion needed in this case static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { return intsc; } // warn normally all ints #define warn_large_before warnf( !strict || old_avg < 33_000_000_000, "Suspiciously large previous average: %'llu (%llx), %'" PRId64 "ms \n", old_avg, old_avg, program_cpu()`ms ) #define warn_large_after warnf( !strict || ret < 33_000_000_000, "Suspiciously large new average after %'" PRId64 "ms cputime: %'llu (%llx) from %'llu-%'llu (%'llu, %'llu) and %'llu\n", program_cpu()`ms, ret, ret, currtsc, intsc, new_val, new_val / 1000000, old_avg ) // 8X linear factor is just 8 * x #define AVG_FACTOR( x ) (8 * (x)) #elif defined(READYQ_USE_LOGDBL_AVG) // convert to log2 scale but using double static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { if(unlikely(0 == intsc)) return 0.0; else return log2((__readyQ_avg_t)intsc); } #define warn_large_before warnf( !strict || old_avg < 35.0, "Suspiciously large previous average: %'lf, %'" PRId64 "ms \n", old_avg, program_cpu()`ms ) #define warn_large_after warnf( !strict || ret < 35.3, "Suspiciously large new average after %'" PRId64 "ms cputime: %'lf from %'llu-%'llu (%'llu, %'llu) and %'lf\n", program_cpu()`ms, ret, currtsc, intsc, new_val, new_val / 1000000, old_avg ); \ verify(ret >= 0) // 8X factor in logscale is log2(8X) = log2(8) + log2(X) = 3 + log2(X) #define AVG_FACTOR( x ) (3.0 + (x)) // we need to overload the __atomic_load_n because they don't support double static inline double __atomic_load_n(volatile double * ptr, int mem) { volatile uint64_t * uptr = (volatile uint64_t *)ptr; _Static_assert(sizeof(*uptr) == sizeof(*ptr)); uint64_t ret = 0; ret = __atomic_load_n(uptr, mem); uint64_t *rp = &ret; double ret = *(volatile double *)rp; /* paranoid */ verify( ret == 0 || ret > 3e-100 ); return ret; } // we need to overload the __atomic_store_n because they don't support double static inline void __atomic_store_n(volatile double * ptr, double val, int mem) { /* paranoid */ verify( val == 0 || val > 3e-100 ); volatile uint64_t * uptr = (volatile uint64_t *)ptr; _Static_assert(sizeof(*uptr) == sizeof(*ptr)); uint64_t * valp = (uint64_t *)&val; __atomic_store_n(uptr, *valp, mem); } #elif defined(READYQ_USE_LOGDBL_AVG) //convert to log2 scale but with fix point u32.32 values static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { return ulog2_32_32(tsc); } // 8X factor, +3 in logscale (see above) is + 0x3.00000000 #define AVG_FACTOR( x ) (0x3_00000000ull + (x)) #else #error must pick a scheme for averaging #endif //----------------------------------------------------------------------- // Calc moving average based on existing average, before and current time. static inline __readyQ_avg_t moving_average(unsigned long long currtsc, unsigned long long intsc, __readyQ_avg_t old_avg, bool strict) { (void)strict; // disable the warning around the fact this is unused in release. /* paranoid */ warn_large_before; const unsigned long long new_val = currtsc > intsc ? currtsc - intsc : 0; const __readyQ_avg_t total_weight = 16; const __readyQ_avg_t new_weight = 12; const __readyQ_avg_t old_weight = total_weight - new_weight; const __readyQ_avg_t ret = ((new_weight * __to_readyQ_avg(new_val)) + (old_weight * old_avg)) / total_weight; /* paranoid */ warn_large_after; return ret; } static inline void touch_tsc(__timestamp_t * tscs, size_t idx, unsigned long long ts_prev, unsigned long long ts_next, bool strict) { if (ts_next == ULLONG_MAX) return; unsigned long long now = rdtscl(); __readyQ_avg_t pma = __atomic_load_n(&tscs[ idx ].t.ma, __ATOMIC_RELAXED); __atomic_store_n(&tscs[ idx ].t.tv, ts_next, __ATOMIC_RELAXED); __atomic_store_n(&tscs[ idx ].t.ma, moving_average(now, ts_prev, pma, strict), __ATOMIC_RELAXED); } //----------------------------------------------------------------------- // Calc age a timestamp should be before needing help. forall(Data_t * | { unsigned long long ts(Data_t & this); }) static inline __readyQ_avg_t calc_cutoff( const unsigned long long ctsc, unsigned procid, size_t count, Data_t * data, __timestamp_t * tscs, const unsigned shard_factor, bool strict ) { unsigned start = procid; __readyQ_avg_t max = 0; for(i; shard_factor) { unsigned long long ptsc = ts(data[start + i]); if(ptsc != ULLONG_MAX) { /* paranoid */ verify( start + i < count ); __readyQ_avg_t avg = moving_average(ctsc, ptsc, tscs[start + i].t.ma, strict); if(avg > max) max = avg; } } return AVG_FACTOR( max ); } static inline unsigned cache_id(struct cluster * cltr, unsigned idx) with (cltr->sched) { // Figure out the current cpu and make sure it is valid const int cpu = __kernel_getcpu(); /* paranoid */ verify(cpu >= 0); /* paranoid */ verify(cpu < cpu_info.hthrd_count); unsigned this_cache = cpu_info.llc_map[cpu].cache; // Super important: don't write the same value over and over again // We want to maximise our chances that his particular values stays in cache if(caches[idx].id != this_cache) __atomic_store_n(&caches[idx].id, this_cache, __ATOMIC_RELAXED); return this_cache; } const static struct { unsigned readyq; unsigned io; } __shard_factor = { 2, 1 }; // Local Variables: // // mode: c // // tab-width: 4 // // End: //