//
// Cforall Version 1.0.0 Copyright (C) 2022 University of Waterloo
//
// The contents of this file are covered under the licence agreement in the
// file "LICENCE" distributed with Cforall.
//
// cluster.hfa -- file that includes helpers for subsystem that need cluster wide support
//
// Author           : Thierry Delisle
// Created On       : Tue Mar 15 16:40:12 2022
// Last Modified By :
// Last Modified On :
// Update Count     :
//

#pragma once

#include "device/cpu.hfa"
#include "kernel/private.hfa"
#include "math.hfa"

#include <limits.h>
#include <inttypes.h>

#include "clock.hfa"

#if   defined(READYQ_USE_LINEAR_AVG)

// no conversion needed in this case
static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { return intsc; }

// warn normally all ints
#define warn_large_before warnf( !strict || old_avg < 33_000_000_000, "Suspiciously large previous average: %'llu (%llx), %'" PRId64 "ms \n", old_avg, old_avg, program_cpu()`ms )
#define warn_large_after warnf( !strict || ret < 33_000_000_000, "Suspiciously large new average after %'" PRId64 "ms cputime: %'llu (%llx) from %'llu-%'llu (%'llu, %'llu) and %'llu\n", program_cpu()`ms, ret, ret, currtsc, intsc, new_val, new_val / 1000000, old_avg )

// 8X linear factor is just 8 * x
#define AVG_FACTOR( x ) (8 * (x))

#elif defined(READYQ_USE_LOGDBL_AVG)

// convert to log2 scale but using double
static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { if(unlikely(0 == intsc)) return 0.0; else return log2((__readyQ_avg_t)intsc); }

#define warn_large_before warnf( !strict || old_avg < 35.0, "Suspiciously large previous average: %'lf, %'" PRId64 "ms \n", old_avg, program_cpu()`ms )
#define warn_large_after warnf( !strict || ret < 35.3, "Suspiciously large new average after %'" PRId64 "ms cputime: %'lf from %'llu-%'llu (%'llu, %'llu) and %'lf\n", program_cpu()`ms, ret, currtsc, intsc, new_val, new_val / 1000000, old_avg ); \
verify(ret >= 0)

// 8X factor in logscale is log2(8X) = log2(8) + log2(X) = 3 + log2(X)
#define AVG_FACTOR( x ) (3.0 + (x))

// we need to overload the __atomic_load_n because they don't support double
static inline double __atomic_load_n(volatile double * ptr, int mem) {
	volatile uint64_t * uptr = (volatile uint64_t *)ptr;
	_Static_assert(sizeof(*uptr) == sizeof(*ptr));
	uint64_t ret = 0;
	ret = __atomic_load_n(uptr, mem);
	uint64_t *rp = &ret;
	double ret = *(volatile double *)rp;
	/* paranoid */ verify( ret == 0 || ret > 3e-100 );
	return ret;
}

// we need to overload the __atomic_store_n because they don't support double
static inline void __atomic_store_n(volatile double * ptr, double val, int mem) {
	/* paranoid */ verify( val == 0 || val > 3e-100 );
	volatile uint64_t * uptr = (volatile uint64_t *)ptr;
	_Static_assert(sizeof(*uptr) == sizeof(*ptr));
	uint64_t * valp = (uint64_t *)&val;
	__atomic_store_n(uptr, *valp, mem);
}

#elif defined(READYQ_USE_LOGDBL_AVG)

//convert to log2 scale but with fix point u32.32 values
static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { return ulog2_32_32(tsc); }

// 8X factor, +3 in logscale (see above) is + 0x3.00000000
#define AVG_FACTOR( x ) (0x3_00000000ull + (x))

#else
#error must pick a scheme for averaging
#endif

//-----------------------------------------------------------------------
// Calc moving average based on existing average, before and current time.
static inline __readyQ_avg_t moving_average(unsigned long long currtsc, unsigned long long intsc, __readyQ_avg_t old_avg, bool strict) {
	(void)strict; // disable the warning around the fact this is unused in release.
	/* paranoid */ warn_large_before;

	const unsigned long long new_val = currtsc > intsc ? currtsc - intsc : 0;
	const __readyQ_avg_t total_weight = 16;
	const __readyQ_avg_t new_weight   = 12;
	const __readyQ_avg_t old_weight = total_weight - new_weight;
	const __readyQ_avg_t ret = ((new_weight * __to_readyQ_avg(new_val)) + (old_weight * old_avg)) / total_weight;

	/* paranoid */ warn_large_after;
	return ret;
}

static inline void touch_tsc(__timestamp_t * tscs, size_t idx, unsigned long long ts_prev, unsigned long long ts_next, bool strict) {
	if (ts_next == ULLONG_MAX) return;
	unsigned long long now = rdtscl();
	__readyQ_avg_t pma = __atomic_load_n(&tscs[ idx ].t.ma, __ATOMIC_RELAXED);
	__atomic_store_n(&tscs[ idx ].t.tv, ts_next, __ATOMIC_RELAXED);
	__atomic_store_n(&tscs[ idx ].t.ma, moving_average(now, ts_prev, pma, strict), __ATOMIC_RELAXED);
}

//-----------------------------------------------------------------------
// Calc age a timestamp should be before needing help.
forall(Data_t * | { unsigned long long ts(Data_t & this); })
static inline __readyQ_avg_t calc_cutoff(
	const unsigned long long ctsc,
	unsigned procid,
	size_t count,
	Data_t * data,
	__timestamp_t * tscs,
	const unsigned shard_factor,
	bool strict
) {
	unsigned start = procid;
	__readyQ_avg_t max = 0;
	for(i; shard_factor) {
		unsigned long long ptsc = ts(data[start + i]);
		if(ptsc != ULLONG_MAX) {
			/* paranoid */ verify( start + i < count );
			__readyQ_avg_t avg = moving_average(ctsc, ptsc, tscs[start + i].t.ma, strict);
			if(avg > max) max = avg;
		}
	}
	return AVG_FACTOR( max );
}

static inline unsigned cache_id(struct cluster * cltr, unsigned idx) with (cltr->sched) {
	// Figure out the current cpu and make sure it is valid
	const int cpu = __kernel_getcpu();
	/* paranoid */ verify(cpu >= 0);
	/* paranoid */ verify(cpu < cpu_info.hthrd_count);
	unsigned this_cache = cpu_info.llc_map[cpu].cache;

	// Super important: don't write the same value over and over again
	// We want to maximise our chances that his particular values stays in cache
	if(caches[idx].id != this_cache)
		__atomic_store_n(&caches[idx].id, this_cache, __ATOMIC_RELAXED);

	return this_cache;
}

const static struct {
	unsigned readyq;
	unsigned io;
} __shard_factor = { 2, 1 };

// Local Variables: //
// mode: c //
// tab-width: 4 //
// End: //